package edu.stanford.nlp.dcoref; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import junit.framework.TestCase; import edu.stanford.nlp.util.StringUtils; public class DcorefSlowITest extends TestCase { protected void makePropsFile(String path, String workDir, String scorer) throws IOException { PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter(path))); pw.println("annotators = pos, lemma, ner, parse"); // WordNet is moved to more pw.println("dcoref.sievePasses = MarkRole, DiscourseMatch, ExactStringMatch, RelaxedExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, RelaxedHeadMatch, PronounMatch"); // pw.println("dcoref.sievePasses = MarkRole, DiscourseMatch, ExactStringMatch, RelaxedExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, AliasMatch, RelaxedHeadMatch, LexicalChainMatch, PronounMatch"); pw.println("dcoref.score = true"); pw.println("dcoref.postprocessing = true"); pw.println("dcoref.maxdist = -1"); pw.println("dcoref.replicate.conll = true"); pw.println("dcoref.conll.scorer = " + scorer); pw.println("dcoref.conll2011 = /scr/nlp/data/conll-2011/v2/data/dev/data/english/annotations"); pw.println("dcoref.logFile = "+workDir + File.separator + "log.txt"); pw.close(); } public void testDcorefCoNLLResultV4() throws Exception { double finalScore = runDcoref("/scr/nlp/data/conll-2011/scorer/v4/scorer.pl"); System.out.printf("Final Score (CoNLL 2011, scorer v4): (MUC+B^3+ceafe)/3 = %.2f%n", finalScore); assertEquals(59.3, finalScore, 0.3); // 2016-07: 59.45 } public void testDcorefCoNLLResultV801() throws Exception { double finalScore = runDcoref("/scr/nlp/data/conll-2012/scorer/v8.01/scorer.pl"); System.out.printf("Final Score (CoNLL 2011, scorer v8): (MUC+B^3+ceafe)/3 = %.2f%n", finalScore); assertEquals(54.0, finalScore, 0.3); // 2016-07: 54.13 } protected double runDcoref(String scorer) throws Exception { final File WORK_DIR_FILE = File.createTempFile("DcorefITest", ""); final String WORK_DIR = WORK_DIR_FILE.getPath(); final String PROPS_PATH = WORK_DIR + File.separator + "coref.properties"; System.err.println("Working in directory " + WORK_DIR); if (WORK_DIR_FILE.exists()) { if ( ! WORK_DIR_FILE.delete()) { throw new IOException("Couldn't delete existing work dir " + WORK_DIR_FILE); } } if ( ! WORK_DIR_FILE.mkdir()) { throw new IOException("Couldn't create new work dir " + WORK_DIR_FILE); } WORK_DIR_FILE.deleteOnExit(); makePropsFile(PROPS_PATH, WORK_DIR, scorer); System.out.println("Made props file " + PROPS_PATH); Properties props = StringUtils.argsToProperties("-props", PROPS_PATH); SieveCoreferenceSystem corefSystem = new SieveCoreferenceSystem(props); String returnMsg = runCorefSystem(corefSystem, props, WORK_DIR); System.out.println(returnMsg); return getFinalScore(returnMsg); } private static String runCorefSystem(SieveCoreferenceSystem corefSystem, Properties props, String WORK_DIR) throws Exception { String conllOutputMentionGoldFile = WORK_DIR + File.separator+"conlloutput.gold.txt"; String conllOutputMentionCorefPredictedFile = WORK_DIR + File.separator+ "conlloutput.coref.predicted.txt"; PrintWriter writerGold = new PrintWriter(new FileOutputStream(conllOutputMentionGoldFile)); PrintWriter writerPredictedCoref = new PrintWriter(new FileOutputStream(conllOutputMentionCorefPredictedFile)); MentionExtractor mentionExtractor = new CoNLLMentionExtractor(corefSystem.dictionaries(), props, corefSystem.semantics()); for (Document document; (document = mentionExtractor.nextDoc()) != null; ) { document.extractGoldCorefClusters(); SieveCoreferenceSystem.printConllOutput(document, writerGold, true); corefSystem.coref(document); // Do Coreference Resolution SieveCoreferenceSystem.printConllOutput(document, writerPredictedCoref, false, true); } writerGold.close(); writerPredictedCoref.close(); String summary = SieveCoreferenceSystem.getConllEvalSummary(corefSystem.conllMentionEvalScript, conllOutputMentionGoldFile, conllOutputMentionCorefPredictedFile); return summary; } /** get the average score: (MUC + B^3 + CEAF_E)/3. */ private static double getFinalScore(String summary) { Pattern f1 = Pattern.compile("Coreference:.*F1: (.*)%"); Matcher f1Matcher = f1.matcher(summary); double[] F1s = new double[5]; int i = 0; while (f1Matcher.find()) { F1s[i++] = Double.parseDouble(f1Matcher.group(1)); } return (F1s[0]+F1s[1]+F1s[3])/3; } }