package is2.parserR2; import extractors.Extractor; import extractors.ExtractorClusterStacked; import extractors.ExtractorReranker; import is2.data.*; import is2.io.CONLLReader09; import is2.io.CONLLWriter09; import is2.tools.Tool; import is2.util.DB; import is2.util.OptionsSuper; import is2.util.ParserEvaluator; import java.io.*; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; public class Reranker implements Tool { public static int THREADS = 4; Long2IntInterface l2i; // the parser models ParametersFloat paramsParsers[]; // the re-ranker model ParametersFloat parametersReranker; PipeReranker pipeReranker; Pipe pipe; Options options; HashMap<Integer, Integer> rank = new HashMap<>(); int amongxbest = 0, amongxbest_ula = 0, nbest = 0, bestProj = 0, smallestErrorSum = 0, countAllNodes = 0; static int NBest = 1000; /** * Initialize the parser * * @param options */ public Reranker(Options options) { this.options = options; } /** * @param modelFileName The file name of the parsing model */ public Reranker(String modelFileName) { this(new Options(new String[]{"-model", modelFileName})); } public Reranker() { } public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); Options options = new Options(args); NBest = options.best; DB.println("n-best " + NBest); Runtime runtime = Runtime.getRuntime(); THREADS = runtime.availableProcessors(); if (options.cores < THREADS && options.cores > 0) { THREADS = options.cores; } DB.println("Found " + runtime.availableProcessors() + " cores use " + THREADS); if (options.train) { Reranker p = new Reranker(); p.options = options; p.l2i = new Long2Int(options.hsize); p.pipeReranker = new PipeReranker(options); p.pipeReranker.extractor = new ExtractorReranker(p.l2i); // initialize the parser p.pipe = new Pipe(options); // read parsing models p.paramsParsers = new ParametersFloat[options.end + 1]; for (int m = 0; m <= options.end; m++) { String name = options.prefix_model + m; p.paramsParsers[m] = new ParametersFloat(0); p.readModel(name, p.pipe, p.paramsParsers[m]); } // set up the reranker p.parametersReranker = new ParametersFloat(p.l2i.size()); Instances[] iss = new Instances[options.end + 1]; for (int m = 0; m <= options.end; m++) { String name = options.prefix_test + m; iss[m] = new Instances(); DB.println("create instances of part " + name); p.pipe.getInstances(name, iss[m]); } ExtractorReranker.initFeatures(); p.pipeReranker.extractor.init(); ExtractorReranker.initStat(); p.train(options, iss); p.writeModell(options, p.parametersReranker, null, p.pipe.cl); } if (options.test) { Reranker p = new Reranker(); p.options = options; // set up the reranker p.l2i = new Long2Int(options.hsize); p.pipeReranker = new PipeReranker(options); p.pipeReranker.extractor = new ExtractorReranker(p.l2i); p.parametersReranker = new ParametersFloat(p.l2i.size()); // initialize the parser p.pipe = new Pipe(options); // read parsing models p.paramsParsers = new ParametersFloat[options.end + 1]; String nbestName = "n-best+" + options.testfile.substring(options.testfile.length() - 12, options.testfile.length() - 1); File fnbest = new File(nbestName); int read = fnbest.exists() ? 2 : 1; if (read != 2) { for (int m = 0; m <= options.end; m++) { String name = options.prefix_model + m; p.paramsParsers[m] = new ParametersFloat(0); p.readModel(name, p.pipe, p.paramsParsers[m]); } } p.readModel(options.modelName, p.pipeReranker, p.parametersReranker); ExtractorReranker.initFeatures(); ExtractorReranker.initStat(); p.pipeReranker.extractor.init(); p.rerankedParses(options, p.pipe, p.parametersReranker, false, nbestName); } is2.parser.Parser.out.println(); if (options.eval) { is2.parser.Parser.out.println("\nEVALUATION PERFORMANCE:"); ParserEvaluator.evaluate(options.goldfile, options.outfile); } long end = System.currentTimeMillis(); is2.parser.Parser.out.println("used time " + ((float) ((end - start) / 100) / 10)); Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); is2.parser.Parser.out.println("end."); } /** * Read the models and mapping * * @param options * @param pipe * @param prm * @throws IOException */ public void readModel(String modelName, Pipe pipe, Parameters prm) throws IOException { DB.println("Reading data started: " + modelName); // prepare zipped reader ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName))); zis.getNextEntry(); try (DataInputStream dis = new DataInputStream(new BufferedInputStream(zis))) { pipe.mf.read(dis); pipe.cl = new Cluster(dis); prm.read(dis); Long2Int l2i = new Long2Int(prm.size()); DB.println("li size " + l2i.size()); pipe.extractor = new ExtractorClusterStacked[THREADS]; for (int t = 0; t < THREADS; t++) { pipe.extractor[t] = new ExtractorClusterStacked(l2i); } ExtractorClusterStacked.initFeatures(); for (int t = 0; t < THREADS; t++) { pipe.extractor[t].initStat(); pipe.extractor[t].init(); } Edges.read(dis); options.decodeProjective = dis.readBoolean(); ExtractorClusterStacked.maxForm = dis.readInt(); boolean foundInfo = false; try { String info; int icnt = dis.readInt(); for (int i = 0; i < icnt; i++) { info = dis.readUTF(); is2.parser.Parser.out.println(info); } } catch (Exception e) { if (!foundInfo) { is2.parser.Parser.out.println("no info about training"); } } } DB.println("Reading data finnished"); Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; // ExtractorClusterStacked.initStat(); } /** * Read the models and mapping * * @param options * @param pipe * @param params * @throws IOException */ public void readModel(String modelName, PipeReranker pipe, Parameters params) throws IOException { DB.println("Reading data started: " + modelName); // prepare zipped reader ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName))); zis.getNextEntry(); try (DataInputStream dis = new DataInputStream(new BufferedInputStream(zis))) { pipe.mf.read(dis); // DB.println("reranker model "+pipe.mf.toString()); pipe.cl = new Cluster(dis); params.read(dis); this.l2i = new Long2Int(params.size()); DB.println("li size " + l2i.size()); pipe.extractor = new ExtractorReranker(l2i); ExtractorReranker.initFeatures(); ExtractorReranker.initStat(); pipe.extractor.init(); Edges.read(dis); options.decodeProjective = dis.readBoolean(); ExtractorClusterStacked.maxForm = dis.readInt(); boolean foundInfo = false; try { String info; int icnt = dis.readInt(); for (int i = 0; i < icnt; i++) { info = dis.readUTF(); is2.parser.Parser.out.println(info); } } catch (Exception e) { if (!foundInfo) { is2.parser.Parser.out.println("no info about training"); } } } DB.println("Reading data finnished"); Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; //ExtractorClusterStacked.initStat(); } /** * Do the training * * @param instanceLengths * @param options * @param pipe * @param parametersReranker * @param is * @param cluster * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void train(OptionsSuper options, Instances[] iss) throws IOException, InterruptedException, ClassNotFoundException { int read = 0; // 0 compute -- 1 compute and write -- 2 read parses DB.println("Training Information "); DB.println("-------------------- "); ExtractorReranker.initStat(); pipeReranker.extractor.init(); for (Extractor e : this.pipe.extractor) { e.init(); } int numInstances = 0; int maxLenInstances = 0; // int maxLenSentence=1; for (Instances is : iss) { numInstances += is.size(); for (int i = 0; i < is.size(); i++) { if (maxLenInstances < is.length(i)) { maxLenInstances = is.length(i); } } } DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); int iter = 0; float error; float f1; double upd = (double) (options.count * options.numIters) + options.numIters * 10; //float[][] = new float[this.NBest][3]; FVR act = new FVR(); FVR pred = new FVR(); FVR f = new FVR(); long[] vs = new long[ExtractorReranker._FC * maxLenInstances]; for (; iter < options.numIters; iter++) { is2.parser.Parser.out.print("Iteration " + iter + ": "); error = 0; f1 = 0; float las = 0, cnt = 0, averageScore = 0; float firstBestTotalError = 0, totalError = 0; long start = System.currentTimeMillis(); long last = System.currentTimeMillis(); long rerankTime = 0; String nbest = "n-best"; File fnbest = new File(nbest); read = fnbest.exists() ? 2 : 1; DataInputStream dis = null; DataOutputStream dos = null; if (read == 1) { DB.println("computing and writting nbest list to file: " + nbest); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbest))); zos.putNextEntry(new ZipEntry("data")); dos = new DataOutputStream(new BufferedOutputStream(zos)); } // start reading again if (read == 2) { // DB.println("reading nbest list from file: "+nbest); ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbest))); zis.getNextEntry(); dis = new DataInputStream(new BufferedInputStream(zis)); } HashMap<Integer, Integer> remapped = new HashMap<>(); int i = 0, round = 0, instance = 0, length = 0, count = 0, changes = 0; for (Instances is : iss) { F2SF fparser = this.paramsParsers[instance].getFV(); round++; // go over the sentences in the instance for (int n = 0; n < is.size(); n++) { count += 1; length += is.length(n); upd--; if (is.labels[n].length > options.maxLen) { continue; } List<ParseNBest> parses = null; short pos[] = is.pposs[n]; // read or write nbest list if (read == 0 || read == 1 && iter == 0) { data = pipe.fillVector(fparser, is, n, data, pipe.cl, THREADS, l2i); parses = Decoder.decode(pos, data, options.decodeProjective, pipe.extractor[0]); if (parses.size() > NBest) { parses = parses.subList(0, NBest); } if (read == 1) { // write the forest dos.writeInt(parses.size()); for (int k = 0; k < parses.size(); k++) { dos.writeUTF(parses.get(k).signature()); dos.writeFloat((float) parses.get(k).f1); } } } else if (read == 2) { parses = new ArrayList<>(); int parseCount = dis.readInt(); for (int k = 0; k < parseCount; k++) { ParseNBest p = new ParseNBest(dis.readUTF(), dis.readFloat()); if (parses.size() < NBest) { parses.add(p); } } } int best = 0; float bestScore = -100; int goldBest = 0; double goldError = Float.MAX_VALUE; long startReranking = System.currentTimeMillis(); // score the n-best parses for (int k = 0; k < parses.size(); k++) { ParseNBest p = parses.get(k); pipeReranker.extractor.extractFeatures(is, n, p, parses.indexOf(p), vs, pipe.cl); int rank = 1 * ExtractorReranker.s_type; f.clear(); for (int j = 0; j < vs.length; j++) { if (vs[j] == Integer.MIN_VALUE) { break; } if (vs[j] > 0) { f.add(pipeReranker.extractor.li.l2i(vs[j] + rank)); } } f.add(pipeReranker.extractor.li.l2i(1 + rank), (float) p.f1); float score = (float) (parametersReranker.getScore(f)); if (score > bestScore) { //rankScore[k][2]> bestScore = score; best = k; } } // get the best parse in the n-best list for (int k = 0; k < parses.size(); k++) { if (parses.get(k).heads.length != is.length(n)) { DB.println("error " + n + " " + parses.get(k).heads.length + " " + is.length(n)); continue; } double errg = pipe.errors(is, n, parses.get(k)); if (goldError > errg) { goldError = errg; goldBest = k; } } ParseNBest firstBest = parses.get(0); ParseNBest predParse = parses.get(best); ParseNBest goldBestParse = parses.get(goldBest); double e = pipe.errors(is, n, predParse); Integer ctb = remapped.get(best); if (ctb == null) { remapped.put(best, 1); } else { remapped.put(best, ctb + 1); } String info = " 1best-error " + ((length - firstBestTotalError) / length) + " reranked " + ((length - totalError) / length) + " chd " + changes + " " + " ps las " + (las / cnt) + " avs " + ((float) averageScore / (float) count) + " "; if ((n + 1) % 500 == 0) { PipeGen.outValueErr(count, Math.round(error * 1000) / 1000, f1 / count, last, upd, info); } firstBestTotalError += Decoder.getError(firstBest, is, n, Decoder.LAS); totalError += Decoder.getError(predParse, is, n, Decoder.LAS); rerankTime += System.currentTimeMillis() - startReranking; if (best != 0) { changes++; } las += is.length(n) - Decoder.getError(goldBestParse, is, n, Decoder.LAS); cnt += is.length(n); averageScore += predParse.f1; if (options.count < count) { break; } if (Decoder.getError(goldBestParse, is, n, Decoder.LAS) >= Decoder.getError(predParse, is, n, Decoder.LAS)) { continue; } // get predicted feature vector pipeReranker.extractor.extractFeatures(is, n, predParse, parses.indexOf(predParse), vs, pipe.cl); pred.clear(); int rank = 1 * ExtractorReranker.s_type; for (int j = 0; j < vs.length; j++) { if (vs[j] == Integer.MIN_VALUE) { break; } if (vs[j] > 0) { pred.add(pipeReranker.extractor.li.l2i(vs[j] + rank)); } } pred.add(pipeReranker.extractor.li.l2i(1 + rank), (float) predParse.f1); error += 1; pipeReranker.extractor.extractFeatures(is, n, goldBestParse, parses.indexOf(goldBestParse), vs, pipe.cl); act.clear(); rank = 1 * ExtractorReranker.s_type; for (int j = 0; j < vs.length; j++) { if (vs[j] == Integer.MIN_VALUE) { break; } if (vs[j] > 0) { act.add(pipeReranker.extractor.li.l2i(vs[j] + rank)); } } act.add(pipeReranker.extractor.li.l2i(1 + rank), (float) goldBestParse.f1); float lam_dist = (float) (parametersReranker.getScore(act) - (parametersReranker.getScore(pred))); parametersReranker.update(act, pred, is, n, null, upd, e, lam_dist); } instance++; } String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + ((Decoder.timeRearrange) / 1000000F) + " te " + ((Pipe.timeExtract) / 1000000F) + " nz " + parametersReranker.countNZ() + " 1best-error " + ((length - firstBestTotalError) / length) + " reranked-best " + ((length - totalError) / length) + " rds " + round + " " + " rerank-t " + (rerankTime / count) + " chd " + changes + " " + " ps las " + (las / cnt) + " avs " + ((float) averageScore / (float) count) + " "; // DB.println("remapped "+remapped); PipeGen.outValueErr(count, Math.round(error * 1000) / 1000, f1 / count, last, upd, info); long end = System.currentTimeMillis(); is2.parser.Parser.out.println(" time:" + (end - start)); i++; // ParametersFloat pf = params.average2((iter+1)*is.size()); Decoder.timeDecotder = 0; Decoder.timeRearrange = 0; Pipe.timeExtract = 0; if (dos != null) { dos.close(); } if (dis != null) { dis.close(); } } DB.println("sb " + parametersReranker.parameters[this.pipeReranker.extractor.li.l2i(4090378920L + 1 * ExtractorReranker.s_type)]);//4090378266 parametersReranker.average(iter * numInstances); } /** * Do the parsing * * @param options * @param pipe * @param params * @throws IOException */ private void rerankedParses(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, String nbestName) throws Exception { long start = System.currentTimeMillis(); ExtractorClusterStacked.initFeatures(); DataInputStream dis = null; DataOutputStream dos = null; float olas = 0, olcnt = 0; File fnbest = new File(nbestName); int read = fnbest.exists() ? 2 : 1; if (read == 1) { DB.println("computing and writting nbest list to file: " + nbestName); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbestName))); zos.putNextEntry(new ZipEntry("data")); dos = new DataOutputStream(new BufferedOutputStream(zos)); } if (read == 2) { // DB.println("reading nbest list from file: "+nbestName); ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbestName))); zis.getNextEntry(); dis = new DataInputStream(new BufferedInputStream(zis)); } for (int m = 0; m < this.paramsParsers.length; m++) { CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); float las = 0, lcnt = 0, averageScore = 0; int cnt = 0; int del = 0; long last = System.currentTimeMillis(); String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) { types[e.getValue()] = e.getKey(); } is2.parser.Parser.out.print("Processing Sentence: "); FVR f = new FVR(); while (true) { Instances is = new Instances(); is.init(1, new MFB(), options.formatTask); SentenceData09 instance = pipe.nextInstance(is, depReader); if (instance == null) { break; } cnt++; String[] forms = instance.forms; List<ParseNBest> parses = null; // read or write nbest list if (read == 0 || read == 1) { parses = this.parseNBest(instance, this.paramsParsers[m]); // data = pipe.fillVector(fparser, is, n, data, pipe.cl,THREADS); // parses = Decoder.decode(pos, data, options.decodeProjective); if (parses.size() > NBest) { parses = parses.subList(0, NBest); } if (read == 1) { // write the forest dos.writeInt(parses.size()); for (int k = 0; k < parses.size(); k++) { dos.writeUTF(parses.get(k).signature()); dos.writeFloat((float) parses.get(k).f1); } } } else if (read == 2) { parses = new ArrayList<>(); int parseCount = dis.readInt(); for (int k = 0; k < parseCount; k++) { ParseNBest p = new ParseNBest(dis.readUTF(), dis.readFloat()); if (parses.size() < NBest) { parses.add(p); } } } nbest += parses.size(); //List<ParseNBest> parses = this.parseNBest(instance, this.paramsParsers[m]); long vs[] = new long[ExtractorReranker._FC * is.length(0)]; float bestScore = 0; int best = 0; for (int k = 0; k < parses.size(); k++) { ParseNBest p = parses.get(k); pipeReranker.extractor.extractFeatures(is, 0, p, k, vs, pipeReranker.cl); int rank = 1 * ExtractorReranker.s_type; f.clear(); for (int j = 0; j < vs.length; j++) { if (vs[j] == Integer.MIN_VALUE) { break; } if (vs[j] > 0) { f.add(pipeReranker.extractor.li.l2i(vs[j] + rank)); } } f.add(pipeReranker.extractor.li.l2i(1 + rank), (float) p.f1); float score = (float) (parametersReranker.getScore(f)); if (score > bestScore) { //rankScore[k][2]> bestScore = score; best = k; } } // change to best ParseNBest d = parses.get(best); las += (is.length(0) - 1) - Decoder.getError(d, is, 0, Decoder.LAS); lcnt += is.length(0) - 1; averageScore += d.f1; SentenceData09 i09 = new SentenceData09(instance); i09.createSemantic(instance); for (int j = 0; j < forms.length - 1; j++) { i09.plabels[j] = types[d.labels[j + 1]]; i09.pheads[j] = d.heads[j + 1]; } depWriter.write(i09); String info = "" + ((float) (averageScore / (float) cnt)) + " "; if (cnt % 10 == 0) { PipeGen.outValueErr(cnt, lcnt - las, las / lcnt, last, 0, info);//outValue(cnt, del,last, info); } } //pipe.close(); depWriter.finishWriting(); long end = System.currentTimeMillis(); DB.println("rank\n" + rank + "\n"); DB.println("x-best-las " + amongxbest + " x-best-ula " + amongxbest_ula + " cnt " + cnt + " x-best-las " + ((float) ((float) amongxbest / cnt)) + " x-best-ula " + ((float) ((float) amongxbest_ula / cnt)) + " nbest " + ((float) nbest / cnt) + " 1best " + ((float) (rank.get(0) == null ? 0 : rank.get(0)) / cnt) + " best-proj " + ((float) bestProj / cnt) + " Sum LAS " + ((float) this.smallestErrorSum / countAllNodes) + " " + "" + (las / lcnt)); // DB.println("errors "+error); olas += las; olcnt += lcnt; rank.clear(); amongxbest = 0; amongxbest_ula = 0; nbest = 0; bestProj = 0; if (maxInfo) { is2.parser.Parser.out.println("Used time " + (end - start)); } if (maxInfo) { is2.parser.Parser.out.println("forms count " + Instances.m_count + " unkown " + Instances.m_unkown); } } if (dos != null) { dos.flush(); dos.close(); } if (dis != null) { dis.close(); } DB.println("\n overall las " + (olas / olcnt)); } /** * Do the parsing * * @param options * @param pipe * @param params * @throws IOException */ private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception { CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); ExtractorClusterStacked.initFeatures(); int cnt = 0; String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) { types[e.getValue()] = e.getKey(); } // Parser.out.print("Processing Sentence: "); while (true) { Instances is = new Instances(); is.init(1, new MFB(), options.formatTask); SentenceData09 instance = pipe.nextInstance(is, depReader); if (instance == null) { break; } cnt++; this.parseNBest(instance, this.paramsParsers[0]); } //pipe.close(); // depWriter.finishWriting(); // long end = System.currentTimeMillis(); // DB.println("rank\n"+rank+"\n"); // DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las " // +((float)((float)amongxbest/cnt))+ // " x-best-ula "+((float)((float)amongxbest_ula/cnt))+ // " nbest "+((float)nbest/cnt)+ // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+ // " best-proj "+((float)bestProj/cnt)); // DB.println("errors "+error); } public SentenceData09 parse(SentenceData09 instance, ParametersFloat params) throws IOException { String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) { types[e.getValue()] = e.getKey(); } Instances is = new Instances(); is.init(1, new MFB(), options.formatTask); new CONLLReader09().insert(is, instance); String[] forms = instance.forms; // use for the training ppos DataF d2; try { d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);//cnt-1 } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; List<ParseNBest> parses = null; Parse d = null; try { parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); //cnt-1 d = parses.get(0); } catch (Exception e) { e.printStackTrace(); } if (parses.size() > NBest) { parses = parses.subList(0, NBest); } int g_las = Decoder.getGoldRank(parses, is, 0, Decoder.LAS); int g_ula = Decoder.getGoldRank(parses, is, 0, !Decoder.LAS); int smallest = Decoder.getSmallestError(parses, is, 0, !Decoder.LAS); smallestErrorSum += is.length(0) - smallest; countAllNodes += is.length(0); if (g_las >= 0) { amongxbest++; } if (g_ula >= 0) { amongxbest_ula++; } nbest += parses.size(); Integer r = rank.get(g_las); if (r == null) { rank.put(g_las, 1); } else { rank.put(g_las, r + 1); } float err = (float) this.pipe.errors(is, 0, d); float errBestProj = (float) this.pipe.errors(is, 0, Decoder.bestProj); if (errBestProj == 0) { bestProj++; } SentenceData09 i09 = new SentenceData09(instance); i09.createSemantic(instance); for (int j = 0; j < forms.length - 1; j++) { i09.plabels[j] = types[d.labels[j + 1]]; i09.pheads[j] = d.heads[j + 1]; } return i09; } public List<ParseNBest> parseNBest(SentenceData09 instance, ParametersFloat params) throws IOException { Instances is = new Instances(); is.init(1, new MFB(), options.formatTask); new CONLLReader09().insert(is, instance); // use for the training ppos DataF d2; try { d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);//cnt-1 } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; List<ParseNBest> parses = null; try { parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); //cnt-1 } catch (Exception e) { e.printStackTrace(); } if (parses.size() > NBest) { parses = parses.subList(0, NBest); } return parses; } /* * (non-Javadoc) @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override public SentenceData09 apply(SentenceData09 snt09) { try { parse(snt09, this.parametersReranker); } catch (Exception e) { e.printStackTrace(); } Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); return snt09; } /** * Write the parsing model * * @param options * @param params * @param extension * @throws FileNotFoundException * @throws IOException */ private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException { String name = extension == null ? options.modelName : options.modelName + extension; DB.println("Writting model: " + name); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name))); zos.putNextEntry(new ZipEntry("data")); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); MFB.writeData(dos); cs.write(dos); params.write(dos); Edges.write(dos); dos.writeBoolean(options.decodeProjective); dos.writeInt(ExtractorClusterStacked.maxForm); dos.writeInt(5); // Info count dos.writeUTF("Used parser " + Reranker.class.toString()); dos.writeUTF("Creation date " + (new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); dos.writeUTF("Training data " + options.trainfile); dos.writeUTF("Iterations " + options.numIters + " Used sentences " + options.count); dos.writeUTF("Cluster " + options.clusterFile); dos.flush(); dos.close(); } }