24. ファイル参照の抽出
記事から参照されているメディアファイルをすべて抜き出せ.
import json
import re
def wiki_selection():
with open("D:\\nlp100\\jawiki-country.json",encoding="UTF-8") as fr:
wiki_line = fr.readline()
while wiki_line:
wiki_dic = json.loads(wiki_line)
if wiki_dic["title"] == "イギリス":
return wiki_dic["text"]
wiki_line =fr.readline()
text = wiki_selection()
for line in re.findall("File:.*?\|",text):
print(line)
<結果>
File:Battle of Waterloo 1815.PNG|
File:The British Empire.png|
File:Uk topo en.jpg|
File:BenNevis2005.jpg|
File:Elizabeth II greets NASA GSFC employees, May 8, 2007 edit.jpg|
File:Palace of Westminster, London - Feb 2007.jpg|
File:David Cameron and Barack Obama at the G20 Summit in Toronto.jpg|
File:Soldiers Trooping the Colour, 16th June 2007.jpg|
File:Scotland Parliament Holyrood.jpg|
File:London.bankofengland.arp.jpg|
File:City of London skyline from London City Hall - Oct 2008.jpg|
File:Oil platform in the North SeaPros.jpg|
File:Eurostar at St Pancras Jan 2008.jpg|
File:Heathrow T5.jpg|
File:Anglospeak.svg|