/** * @version $Id: SyntaxStructure.java 1839 2014-04-16 02:33:51Z yukihiro-kinjyo $ * * 2011/09/02 20:34:11 * @author imai * * Copyright 2011-2014 TIDAコンソーシアム All Rights Reserved. */ package com.tida_okinawa.corona.correction.morphem; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.eclipse.ui.views.properties.IPropertyDescriptor; import org.eclipse.ui.views.properties.IPropertySource; import org.eclipse.ui.views.properties.TextPropertyDescriptor; import com.tida_okinawa.corona.correction.common.StringUtil; import com.tida_okinawa.corona.io.model.MorphemeElement; import com.tida_okinawa.corona.io.model.dic.TermClass; /** * 形態素・係り受け解析結果 * * @author imai * */ public class SyntaxStructure extends ArrayList<ISyntaxStructureElement> { /** * オブジェクトのシリアライズ用ID */ private static final long serialVersionUID = -1797704075541229412L; /** * Juman-KNP の出力のまま(デバッグ用) */ final List<String> original; /** * 係り先 */ private final Map<Integer, Relation> destsMap = new HashMap<Integer, Relation>(); /** * 係り元 */ private final Map<Integer, List<Relation>> sourcesMap = new HashMap<Integer, List<Relation>>(); /** * 1つのテキストを、句読点で区切ってknpにかけるのに対応 */ int index0 = 0; int indexSentence = 0; /** * 句/文節を作る * * @param text * KNPの結果のテキスト ex. *1 <文頭> ... * @return 文節/句 */ private ISyntaxStructureElement createElement(String text) { int index = size(); ISyntaxStructureElement newElement = new SyntaxStructureElement(index, text); super.add(newElement); return newElement; } /** * コンストラクタ * * @param text * KNPの解析結果 */ public SyntaxStructure(String text) { this(StringUtil.splitFast(text)); // note: DBには 解析結果(List<String>)を連結したものを入れている } /** * コンストラクタ * * @param texts * KNPの解析結果 */ public SyntaxStructure(List<String> texts) { this(texts, "+"); //$NON-NLS-1$ } /** * 形態素のリストを取得 * * @return 形態素 */ public List<MorphemeElement> getMorphemeElemsnts() { List<MorphemeElement> list = new ArrayList<MorphemeElement>(); for (ISyntaxStructureElement element : this) { list.addAll(element.getMorphemes()); } return list; } /** * 形態素→文節/句 * * @param me * @return 文節/句 */ public ISyntaxStructureElement getSyntaxStructureElement(MorphemeElement me) { for (ISyntaxStructureElement element : this) { if (element.getMorphemes().contains(me)) { return element; } } // never return null; } /** * コンストラクタ * * @param texts * KNPの解析結果 * @param type * 対象 "*" or "+" */ private SyntaxStructure(List<String> texts, String type) { original = texts; /** * 処理中の句/文節 */ ISyntaxStructureElement lastElement = null; for (String text : texts) { if (text.startsWith("#")) { //$NON-NLS-1$ // コメント行 } else if (text.startsWith("@")) { //$NON-NLS-1$ // Juman 次点候補 (無視する) } else if (text.startsWith(type)) { lastElement = createElement(text); Relation relation = createRelation(lastElement, text); if (relation.getDest() >= 0) { // 係り先がなければ -1 destsMap.put(lastElement.getIndex(), relation); List<Relation> srcs = sourcesMap.get(relation.getDest()); if (srcs == null) { srcs = new ArrayList<Relation>(); sourcesMap.put(relation.getDest(), srcs); } srcs.add(relation); } } else if (text.startsWith("*") || text.startsWith("+") || text.trim().equals("")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ // 対象外の行, 空白行 // type ("*" or "+") は先に処理される } else if (text.equals("EOS")) { //$NON-NLS-1$ // 次の文の文節の開始番号 index0 = size(); } else { // 形態素 if (lastElement == null) { lastElement = createElement(""); //$NON-NLS-1$ // 形態素だけを処理した場合 } MorphemeElement morpheme = new MorphemeElement(text); lastElement.getMorphemes().add(morpheme); morpheme.indexSentence = indexSentence; if (morpheme.getHinshiSaibunrui().equals(TermClass.PERIOD.getName())) { indexSentence++; // 句点で分割 } } } } /** * {@link Relation}を作る * * @param srcElement * @param text * @return 係り受けリレーション */ private Relation createRelation(ISyntaxStructureElement srcElement, String text) { String s = text.substring(1).trim(); // #, * を除く // 10D の D を除く for (int pos = 0; pos < s.length(); pos++) { char c = s.charAt(pos); if ((c != '-') && (c < '0' || '9' < c)) { int dst = Integer.parseInt(s.substring(0, pos)); if (dst > 0) { // dst==-1 : 係り先なし dst += index0; // 前の文の番号だけずらす } String type = s.substring(pos, pos + 1); return new Relation(srcElement.getIndex(), dst, type); } } throw new IllegalArgumentException(text); } /** * 表記をつなげて原文を取得 * * @return 原文 */ public String getText() { StringBuilder text = new StringBuilder(3000); for (ISyntaxStructureElement element : this) { text.append(element.getHyouki()); } return text.toString(); } @Override public String toString() { // DB格納用の文字列を作る // EOS はなくなるが、構文パターンに影響ないのでそのまま StringBuilder sb = new StringBuilder(1000000); // 100KB for (ISyntaxStructureElement element : this) { sb.append(element.toString()); } return sb.toString(); } /** * 係り受け先/元 * */ static public class Relation { int src; int dest; String type; Relation(int src, int dest, String type) { this.src = src; this.dest = dest; this.type = type; } /** * 係先の文節/句のID * * @return 係り元ID */ public int getSrc() { return src; } /** * 係り元の文節/句のID * * @return 係り先ID */ public int getDest() { return dest; } /** * 係先のタイプ * "D", "P" * * @return 係り先のタイプ */ public String getType() { return type; } @Override public String toString() { return src + "-" + type + "->" + dest; //$NON-NLS-1$ //$NON-NLS-2$ } } /** * 句/文節 * */ class SyntaxStructureElement implements ISyntaxStructureElement { /** * この文節/句の番号(デバッグ用) */ final int index; /** * KNPの結果のテキスト * ex. *1 <文頭> ... */ final String source; /** * この文節/句を構成する形態素 */ final List<MorphemeElement> morphemes = new ArrayList<MorphemeElement>(); /** * * @param index * @param text * KNPの結果のテキスト ex. *1 <文頭> ... */ SyntaxStructureElement(int index, String text) { this.index = index; this.source = text; } @Override public int getIndex() { return index; } @Override public List<MorphemeElement> getMorphemes() { return morphemes; } @Override public ISyntaxStructureElement getDependDestination() { return getDependDestination(null); } @Override public List<ISyntaxStructureElement> getDependSources() { return getDependSources(null); } @Override public String getHyouki() { return getText(MorphemeElement.HYOUKI); } @Override public String getYomi() { return getText(MorphemeElement.YOMI); } /** * 文節/句を構成する形態素をつなげる * * @param index * @return 形態素 */ private String getText(int index) { StringBuilder s = new StringBuilder(2000); for (MorphemeElement morpheme : morphemes) { s.append(morpheme.get(index)); } return s.toString(); } @Override public ISyntaxStructureElement getDependDestination(String type) { Relation relation = destsMap.get(index); if (relation != null) { if (type == null || type.equals(relation.getType())) { return SyntaxStructure.this.get(relation.getDest()); } } return null; } @Override public List<ISyntaxStructureElement> getDependSources(String type) { List<Relation> relations = sourcesMap.get(index); List<ISyntaxStructureElement> srcs = new ArrayList<ISyntaxStructureElement>(); if (relations != null) { for (Relation relation : relations) { if (type == null || type.equals(relation.getType())) { srcs.add(SyntaxStructure.this.get(relation.getSrc())); } } } return srcs; } @Override public String toString() { // DB格納用のテキストを作る StringBuilder sb = new StringBuilder(10000); // 10KB if (!source.isEmpty()) { // KNPをやっていない場合 source="" // インデックスの差し替え // TODO:係り受けタイプがこれ以外にあるかも? String str = ""; //$NON-NLS-1$ int index = -1; if (this.getDependDestination("D") != null) { //$NON-NLS-1$ index = this.getDependDestination("D").getIndex(); //$NON-NLS-1$ str = "+ " + index + source.substring(source.indexOf("D"));// 未格 //$NON-NLS-1$ //$NON-NLS-2$ } else if (this.getDependDestination("P") != null) { //$NON-NLS-1$ index = this.getDependDestination("P").getIndex(); //$NON-NLS-1$ str = "+ " + index + source.substring(source.indexOf("P"));// 連体 //$NON-NLS-1$ //$NON-NLS-2$ } else if (this.getDependDestination("A") != null) { //$NON-NLS-1$ index = this.getDependDestination("A").getIndex(); //$NON-NLS-1$ str = "+ " + index + source.substring(source.indexOf("A"));// 同格連体 //$NON-NLS-1$ //$NON-NLS-2$ } else { str = source; } sb.append(str); sb.append("\n"); //$NON-NLS-1$ } for (MorphemeElement element : getMorphemes()) { sb.append(element.toString()); sb.append("\n"); //$NON-NLS-1$ } return sb.toString(); } @Override public Object getAdapter(@SuppressWarnings("rawtypes") Class adapter) { if (adapter.equals(IPropertySource.class)) { return source1; } return null; } private IPropertySource source1 = new IPropertySource() { @Override public IPropertyDescriptor[] getPropertyDescriptors() { IPropertyDescriptor[] descriptor = new IPropertyDescriptor[] { new TextPropertyDescriptor("id", "ID"), //$NON-NLS-1$ //$NON-NLS-2$ new TextPropertyDescriptor("text", Messages.SyntaxStructure_NOTATION), new TextPropertyDescriptor("src", Messages.MODIFICATION_DESTINATION), new TextPropertyDescriptor("dist", Messages.MODIFICATION_SOURCE), }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ return descriptor; } @Override public Object getPropertyValue(Object id) { if (id.equals("id")) { //$NON-NLS-1$ return String.valueOf(getIndex()); } if (id.equals("text")) { //$NON-NLS-1$ return getHyouki(); } if (id.equals("src")) { //$NON-NLS-1$ if (getDependSources() != null && !getDependSources().isEmpty()) { StringBuilder buf = new StringBuilder(); for (ISyntaxStructureElement src : getDependSources()) { buf.append(" , " + src.getIndex() + ":" + src.getHyouki()); //$NON-NLS-1$ //$NON-NLS-2$ } return buf.toString().substring(3); } return ""; //$NON-NLS-1$ } if (id.equals("dist")) { //$NON-NLS-1$ if (getDependDestination() != null) { // return getDst(); return String.valueOf(getDependDestination().getIndex()) + ":" + getDependDestination().getHyouki(); //$NON-NLS-1$ } return ""; //$NON-NLS-1$ } return null; } @Override public boolean isPropertySet(Object id) { return false; } @Override public void resetPropertyValue(Object id) { } @Override public void setPropertyValue(Object id, Object value) { } @Override public Object getEditableValue() { return null; } }; } }