package edu.fudan.nlp.cn.anaphora; import java.util.Iterator; import java.util.LinkedList; import java.util.TreeSet; import edu.fudan.data.reader.AR_Reader; import edu.fudan.ml.classifier.linear.Linear; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.InstanceSet; import edu.fudan.nlp.cn.tag.POSTagger; /** * 指代消解的程序接口 * @author jszhao * @version 1.0 * @since FudanNLP 1.5 */ public class Anaphora { private LinkedList<EntityGroup> arGroup; private Linear cl; private TreeSet<Entity> ts; private FormChanger fc; private LinkedList<Instance> llis; private InstanceSet test; public Anaphora(String segmodel, String posmodel, String armodel) throws Exception{ EntitiesGetter.initTagger(segmodel,posmodel); cl = Linear.loadFrom(armodel); } public Anaphora(POSTagger tag,String armodel) throws Exception{ EntitiesGetter.initTagger(tag); cl = Linear.loadFrom(armodel); } /** * 用了标注模型,得到指代对集合 * @param str * @return 指代对集合 * @throws Exception */ public LinkedList<EntityGroup> getArGroup(String str) throws Exception{ this.init(str); this.doIt(); return this.arGroup; } /** * 不用标注模型,得到指代对集合 * @param str * @return 指代对集合 * @throws Exception */ public LinkedList<EntityGroup> getArGroup(String[][][] stringTag,String str) throws Exception{ this.init2(stringTag,str); this.doIt(); return this.arGroup; } private void init(String str) throws Exception{ arGroup = new LinkedList<EntityGroup>(); ts = new TreeSet<Entity>(); llis = new LinkedList<Instance>(); fc = new FormChanger(); test = new InstanceSet(cl.getPipe()); test.loadThruPipes(new AR_Reader(str)); for(int i=0;i<test.size();i++){ String ss = cl.getStringLabel(test.getInstance(i)); if(ss.equals("1")){ llis.add(test.getInstance(i)); } } fc.groupToList(llis); fc.getLlsb(); ts = fc.getTs(); } private void init2(String[][][]stringTag,String str) throws Exception{ arGroup = new LinkedList<EntityGroup>(); ts = new TreeSet<Entity>(); llis = new LinkedList<Instance>(); fc = new FormChanger(); test = new InstanceSet(cl.getPipe()); test.loadThruPipes(new AR_Reader(stringTag,str)); for(int i=0;i<test.size();i++){ String ss = cl.getStringLabel(test.getInstance(i)); if(ss.equals("1")){ llis.add(test.getInstance(i)); } } fc.groupToList(llis); fc.getLlsb(); ts = fc.getTs(); } private void doIt(){ LinkedList<Entity> ll =null; int flag = 0;Entity re =null;Entity re1 =null; int i = this.ts.size();int j =0; WeightGetter wp = null; EntityGroup reg =null; EntityGroup reg1 =null; while(flag!=i-j){ flag =0; ll = new LinkedList<Entity>(); Iterator<Entity> it = this.ts.iterator(); while(it.hasNext()){ flag++; re = it.next(); if(!re.getIsResolution()){ ll.add(re); } else{ j++; it.remove(); break; } } if(flag==i-j&&!re.getIsResolution()) break; it = ll.iterator(); int ii = -1000; while(it.hasNext()){ re1 = it.next(); reg = new EntityGroup(re1,re); wp = new WeightGetter(reg); if(wp.getWeight()>=ii){ ii = wp.getWeight(); reg1 =reg; reg1.setWeight(ii); } } if(reg1!=null&®1.getWeight()<-100){ continue; } this.arGroup.add(reg1); } } /** * 用标注工具的最后结果 * @param str 句子 * @throws Exception */ public String resultToString(String str) throws Exception{ EntityGroup reg =null; StringBuffer strBuf = new StringBuffer(); getArGroup(str) ; Iterator it = arGroup.iterator(); while(it.hasNext()){ reg = (EntityGroup) it.next(); strBuf.append(reg.getAhead().getData()+"("+reg.getAhead().getStart()+")"+"<--"+reg.getBehind().getData()+"("+reg.getBehind().getStart()+")"+"\n"); } return strBuf.toString(); } /** * 不用标注工具的最后结果 * @param stringTag 词和词性数组 * @param str 句子 * @throws Exception */ public String resultToString(String[][][] stringTag,String str) throws Exception{ EntityGroup reg =null; StringBuffer strBuf = new StringBuffer(); getArGroup(stringTag,str); Iterator it = arGroup.iterator(); while(it.hasNext()){ reg = (EntityGroup) it.next(); strBuf.append(reg.getAhead().getData()+"("+reg.getAhead().getStart()+")"+"<--"+reg.getBehind().getData()+"("+reg.getBehind().getStart()+")"+"\n"); } return strBuf.toString(); } }