40. 係り受け解析結果の読み込み(形態素)
形態素を表すクラスMorphを実装せよ.このクラスは表層形(surface),基本形(base),品詞(pos),品詞細分類1(pos1)をメンバ変数に持つこととする.さらに,CaboChaの解析結果(neko.txt.cabocha)を読み込み,各文をMorphオブジェクトのリストとして表現し,3文目の形態素列を表示せよ.
import MeCab
import CaboCha
import re
class Morph:
def __init__(self,surface,base,pos,pos1):
self.surface = surface #表層形
self.base = base #基本形
self.pos = pos #品詞
self.pos1 = pos1 #品詞細分類1
def parse_neko(in_text_name,out_text_name):
c = CaboCha.Parser()
with open(out_text_name,mode="w") as out_text:
with open(in_text_name) as in_text:
for line in in_text:
tree = c.parse(line)
out_text.write(tree.toString(CaboCha.FORMAT_LATTICE))
#parse_neko("./neko.txt","./neko.txt.cabocha")
sent = []
text = []
with open("./neko.txt.cabocha") as parse_text:
for line in parse_text:
if line == "EOS\n":
text.append(sent)
sent = []
else:
if line[0] == "*":
continue
line = line.rstrip()
word = re.split(r"[\t,]",line)
sent.append(Morph(word[0],word[7],word[1],word[2]))
for word in text[2]:
print(word.surface)
<結果>
吾輩
は
猫
で
ある
。