package edu.stanford.nlp.coref; import java.io.IOException; import java.io.File; import java.io.PrintWriter; import java.text.DecimalFormat; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.stanford.nlp.io.StringOutputStream; import edu.stanford.nlp.util.SystemUtils; import edu.stanford.nlp.util.logging.Redwood; /** * Utilities for running coref evaluation scripts and printing the results * @author Heeyoung Lee * @author Kevin Clark */ public class CorefScorer { public static String getEvalSummary(String evalScript, String goldFile, String predictFile) throws IOException { ProcessBuilder process = new ProcessBuilder(evalScript, "all", goldFile, predictFile, "none"); StringOutputStream errSos = new StringOutputStream(); StringOutputStream outSos = new StringOutputStream(); PrintWriter out = new PrintWriter(outSos); PrintWriter err = new PrintWriter(errSos); SystemUtils.run(process, out, err); out.close(); err.close(); String summary = outSos.toString(); String errStr = errSos.toString(); if ( ! errStr.isEmpty()) { summary += "\nERROR: " + errStr; } Pattern pattern = Pattern.compile("\\d+\\.\\d\\d\\d+"); DecimalFormat df = new DecimalFormat("#.##"); Matcher matcher = pattern.matcher(summary); while(matcher.find()) { String number = matcher.group(); summary = summary.replaceFirst(number, df.format(Double.parseDouble(number))); } return summary; } public static void printScoreSummary(String summary, Logger logger, boolean afterPostProcessing) { String[] lines = summary.split("\n"); if(!afterPostProcessing) { for(String line : lines) { if(line.startsWith("Identification of Mentions")) { Redwood.log(line); return; } } } else { StringBuilder sb = new StringBuilder(); for(String line : lines) { if(line.startsWith("METRIC")) sb.append(line); if(!line.startsWith("Identification of Mentions") && line.contains("Recall")) { sb.append(line).append("\n"); } } Redwood.log(sb.toString()); } } public static double getFinalConllScore(String summary) { Pattern f1 = Pattern.compile("Coreference:.*F1: (.*)%"); Matcher f1Matcher = f1.matcher(summary); double[] F1s = new double[5]; int i = 0; while (f1Matcher.find()) { F1s[i++] = Double.parseDouble(f1Matcher.group(1)); } double finalScore = (F1s[0]+F1s[1]+F1s[3])/3; return finalScore; } public static void printFinalConllScore(String summary) { double finalScore = getFinalConllScore(summary); Redwood.log( "Final conll score ((muc+bcub+ceafe)/3) = " + (new DecimalFormat("#.##")).format(finalScore)); } public static double getFinalConllScoreFromOutputDir(String corefOutputDir, String scorerPath) { File baseFolder = new File(corefOutputDir); File[] filesInBaseFolder = baseFolder.listFiles(); String baseName = corefOutputDir; for (File outputFile : filesInBaseFolder) { String outputFileName = outputFile.getName(); baseName = baseName + "/" + outputFileName.split("\\.")[0]; break; } String goldOutput = baseName + ".gold.txt"; String afterCorefOutput = baseName + ".coref.predicted.txt"; try { String summary = CorefScorer.getEvalSummary(scorerPath, goldOutput, afterCorefOutput); double finalScore = getFinalConllScore(summary); return finalScore; } catch (IOException e) { Redwood.log("Error: failed to get coref score from directory"); return -1; } } }