package edu.fudan.data.reader; import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import edu.fudan.ml.classifier.linear.Linear; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.InstanceSet; import edu.fudan.nlp.cn.anaphora.Entity; import edu.fudan.nlp.cn.anaphora.EntityGroup; import edu.fudan.nlp.cn.anaphora.EntitiesGetter; import edu.fudan.nlp.cn.anaphora.FeatureGeter; /** * 用于指代消解的读入 * @author jszhao * @version 1.0 * @since FudanNLP 1.5 */ public class AR_Reader extends Reader{ private String data; private LinkedList<Instance> list; private Iterator it; private LinkedList<Entity> ll; private EntitiesGetter elp; public AR_Reader (String data) throws Exception { this.data = data; elp= new EntitiesGetter(); ll = elp.getEntiyList(data); this.dothis(); it = list.iterator(); } public AR_Reader (String[][][] stringTag,String data) throws Exception { this.data = data; elp= new EntitiesGetter(); ll = elp.getEntiyList(stringTag,data); this.dothis(); it = list.iterator(); } private void dothis() throws Exception{ list = new LinkedList<Instance>(); Entity ss = null;Entity s2 =null; EntityGroup eg = null; FeatureGeter fp = null; Instance in = null; Iterator it =null; List<String> newdata = null; while(ll.size()>0){ ss=(Entity)ll.poll(); it= ll.iterator(); while(it.hasNext()){ s2 = (Entity)it.next(); eg = new EntityGroup(ss,s2); fp = new FeatureGeter(eg); String[] tokens = this.intArrayToString(fp.getFeatrue()).split("\\t+|\\s+"); newdata= Arrays.asList(tokens); in = new Instance(newdata,null); in.setSource(eg); list.add(in); } } } private String intArrayToString(int[] ia){ StringBuffer sb = new StringBuffer(); for(int i = 0;i<ia.length;i++){ sb.append(ia[i]); sb.append(" "); } return sb.toString(); } public Instance next () { return (Instance) it.next(); } public boolean hasNext () { return it.hasNext(); } public static void main(String args[]) throws Exception{ Linear cl=null;; cl = Linear.loadFrom("./models/ar_model.gz"); InstanceSet test = new InstanceSet(cl.getPipe()); test.loadThruPipes(new AR_Reader("随着中国经济融入世界经济进程的加快,和以高科技为主体的经济发展,众多跨国公司在中国不 在是单纯的建立生产基地,而是越来越多的将研发中心转移到了中国。目前已经有包括:微软、 摩托罗拉和贝尔实验室在内的几十家规模较大的跨国公司,将其研发中心在中国落户。 ")); for(int i=0;i<test.size();i++){ String ss = cl.getStringLabel(test.getInstance(i)); if(ss.equals("1")) System.out.print(ss+"\n"); } System.gc(); } }