/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.util;
import joshua.zmert.EvaluationMetric;
import java.util.TreeSet;
import java.text.DecimalFormat;
// BUG: try using joshua.util.io.LineReader instead
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
public class JoshuaEval {
final static DecimalFormat f4 = new DecimalFormat("###0.0000");
// if true, evaluation is performed for each candidate translation as
// well as on the entire candidate set
static boolean verbose;
// number of candidate translations
static int numSentences;
// number of reference translations per sentence
static int refsPerSen;
// 0: no normalization, 1: "NIST-style" tokenization, and also rejoin 'm, 're, *'s, 've, 'll, 'd, and n't,
// 2: apply 1 and also rejoin dashes between letters, 3: apply 1 and also drop non-ASCII characters
// 4: apply 1+2+3
static private int textNormMethod;
// refSentences[i][r] is the rth reference translation of the ith sentence
static String[][] refSentences;
// name of evaluation metric
static String metricName;
// options for the evaluation metric (e.g. for BLEU, maxGramLength and effLengthMethod)
static String[] metricOptions;
// the scorer
static EvaluationMetric evalMetric;
// if true, the reference set(s) is (are) evaluated
static boolean evaluateRefs;
// file names for input files. When refsPerSen > 1, refFileName can be
// the name of a single file, or a file name prefix.
static String refFileName;
static String candFileName;
// format of the candidate file: "plain" if one candidate per sentence, and "nbest" if a decoder output
static String candFileFormat;
// if format is nbest, evaluate the r'th candidate of each sentence
static int candRank;
private static void evaluateCands_plain(String inFileName) {
evaluate(candFileName, "plain", 1, 1);
}
private static void evaluateCands_nbest(String inFileName, int testIndex) {
evaluate(candFileName, "nbest", -1, testIndex);
}
private static void evaluateRefSet(int r) {
evaluate(refFileName, "plain", refsPerSen, r);
}
private static void evaluate(String inFileName, String inFileFormat, int candPerSen, int testIndex) {
// candPerSen: how many candidates are provided per sentence?
// (if inFileFormat is nbest, then candPerSen is ignored, since it is variable)
// testIndex: which of the candidates (for each sentence) should be tested?
// e.g. testIndex=1 means first candidate should be evaluated
// testIndex=candPerSen means last candidate should be evaluated
if (inFileFormat.equals("plain") && candPerSen < 1) {
println("candPerSen must be positive for a file in plain format.");
System.exit(30);
}
if (inFileFormat.equals("plain") && (testIndex < 1 || testIndex > candPerSen)) {
println("For the plain format, testIndex must be in [1,candPerSen]");
System.exit(31);
}
String[] topCand_str = new String[numSentences];
// BUG: all of this needs to be replaced with the SegmentFileParser and related interfaces.
try {
// read the candidates
InputStream inStream = new FileInputStream(new File(inFileName));
BufferedReader inFile = new BufferedReader(new InputStreamReader(inStream, "utf8"));
String line, candidate_str;
if (inFileFormat.equals("plain")) {
for (int i = 0; i < numSentences; ++i) {
// skip candidates 1 through testIndex-1
for (int n = 1; n < testIndex; ++n) {
line = inFile.readLine();
}
// read testIndex'th candidate
candidate_str = inFile.readLine();
topCand_str[i] = normalize(candidate_str, textNormMethod);
for (int n = testIndex+1; n <= candPerSen; ++n){
// skip candidates testIndex+1 through candPerSen-1
// (this probably only applies when evaluating a combined reference file)
line = inFile.readLine();
}
} // for (i)
} else { // nbest format
int i = 0;
int n = 1;
line = inFile.readLine();
while (line != null && i < numSentences) {
/*
line format:
.* ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val .*
*/
while (n < candRank) {
line = inFile.readLine();
++n;
}
// at the moment, line stores the candRank'th candidate (1-indexed) of the i'th sentence (0-indexed)
if (line == null) {
println("Not enough candidates in " + inFileName + " to extract the " + candRank + "'th candidate for each sentence.");
println("(Failed to extract one for the " + i + "'th sentence (0-indexed).)");
System.exit(32);
}
int read_i = Integer.parseInt(line.substring(0,line.indexOf(" |||")).trim());
if (read_i == i) {
line = line.substring(line.indexOf("||| ")+4); // get rid of initial text
candidate_str = line.substring(0,line.indexOf(" |||"));
topCand_str[i] = normalize(candidate_str, textNormMethod);
if (i < numSentences-1) {
while (read_i == i) {
line = inFile.readLine();
read_i = Integer.parseInt(line.substring(0,line.indexOf(" |||")).trim());
}
}
n = 1;
i += 1;
} else {
println("Not enough candidates in " + inFileName + " to extract the " + candRank + "'th candidate for each sentence.");
println("(Failed to extract one for the " + i + "'th sentence (0-indexed).)");
System.exit(32);
}
} // while (line != null)
if (i != numSentences) {
println("Not enough candidates were found (i = " + i + "; was expecting " + numSentences + ")");
System.exit(33);
}
} // nbest format
inFile.close();
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
System.exit(99901);
} catch (IOException e) {
System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
System.exit(99902);
}
int[] IA = new int[numSentences];
for (int i = 0; i < numSentences; ++i) { IA[i] = i; }
int[][] SS = evalMetric.suffStats(topCand_str,IA);
int suffStatsCount = evalMetric.get_suffStatsCount();
int[] totStats = new int[suffStatsCount];
for (int s = 0; s < suffStatsCount; ++s) {
totStats[s] = 0;
for (int i = 0; i < numSentences; ++i) {
totStats[s] += SS[i][s];
}
}
evalMetric.printDetailedScore_fromStats(totStats,false);
if (verbose) {
println("");
println("Printing detailed scores for individual sentences...");
for (int i = 0; i < numSentences; ++i) {
print("Sentence #" + i + ": ");
int[] stats = new int[suffStatsCount];
for (int s = 0; s < suffStatsCount; ++s) { stats[s] = SS[i][s]; }
evalMetric.printDetailedScore_fromStats(stats,true);
// already prints a \n
}
}
} // void evaluate(...)
private static void printUsage(int argsLen) {
println("Oops, you provided " + argsLen + " args!");
println("");
println("Usage:");
println(" JoshuaEval [-cand candFile] [-format candFileformat] [-rank r]\n [-ref refFile] [-rps refsPerSen] [-m metricName metric options]\n [-evr evalRefs] [-v verbose]");
println("");
println(" (*) -cand candFile: candidate translations\n [[default: candidates.txt]]");
println(" (*) -format candFileFormat: is the candidate file a plain file (one candidate\n per sentence) or does it contain multiple candidates per sentence as\n a decoder's output)? For the first, use \"plain\". For the second,\n use \"nbest\".\n [[default: plain]]");
println(" (*) -rank r: if format=nbest, evaluate the set of r'th candidates.\n [[default: 1]]");
println(" (*) -ref refFile: reference translations (or file name prefix)\n [[default: references.txt]]");
println(" (*) -rps refsPerSen: number of reference translations per sentence\n [[default: 1]]");
println(" (*) -txtNrm textNormMethod: how should text be normalized?\n (0) don't normalize text,\n or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n or (2) apply 1 and also rejoin dashes between letters,\n or (3) apply 1 and also drop non-ASCII characters,\n or (4) apply 1+2+3\n [[default: 1]]");
println(" (*) -m metricName metric options: name of evaluation metric and its options\n [[default: BLEU 4 closest]]");
println(" (*) -evr evalRefs: evaluate references (1) or not (0) (sanity check)\n [[default: 0]]");
println(" (*) -v verbose: evaluate individual sentences (1) or not (0)\n [[default: 0]]");
println("");
println("Ex.: java JoshuaEval -cand nbest.out -ref ref.all -rps 4 -m BLEU 4 shortest");
}
private static void processArgsAndInitialize(String[] args) {
EvaluationMetric.set_knownMetrics();
// set default values
candFileName = "candidates.txt";
candFileFormat = "plain";
candRank = 1;
refFileName = "references.txt";
refsPerSen = 1;
textNormMethod = 1;
metricName = "BLEU";
metricOptions = new String[2];
metricOptions[0] = "4";
metricOptions[1] = "closest";
evaluateRefs = false;
verbose = false;
int i = 0;
while (i < args.length) {
String option = args[i];
if (option.equals("-cand")) {
candFileName = args[i+1];
} else if (option.equals("-format")) {
candFileFormat = args[i+1];
if (!candFileFormat.equals("plain") && !candFileFormat.equals("nbest")) {
println("candFileFormat must be either plain or nbest.");
System.exit(10);
}
} else if (option.equals("-rank")) {
candRank = Integer.parseInt(args[i+1]);
if (refsPerSen < 1) {
println("Argument for -rank must be positive.");
System.exit(10);
}
} else if (option.equals("-ref")) {
refFileName = args[i+1];
} else if (option.equals("-rps")) {
refsPerSen = Integer.parseInt(args[i+1]);
if (refsPerSen < 1) {
println("refsPerSen must be positive.");
System.exit(10);
}
} else if (option.equals("-txtNrm")) {
textNormMethod = Integer.parseInt(args[i+1]);
if (textNormMethod < 0 || textNormMethod > 4) {
println("textNormMethod should be between 0 and 4");
System.exit(10);
}
} else if (option.equals("-m")) {
metricName = args[i+1];
if (EvaluationMetric.knownMetricName(metricName)) {
int optionCount = EvaluationMetric.metricOptionCount(metricName);
metricOptions = new String[optionCount];
for (int opt = 0; opt < optionCount; ++opt) {
metricOptions[opt] = args[i+opt+2];
}
i += optionCount;
} else {
println("Unknown metric name " + metricName + ".");
System.exit(10);
}
} else if (option.equals("-evr")) {
int evr = Integer.parseInt(args[i+1]);
if (evr == 1) {
evaluateRefs = true;
} else if (evr == 0) {
evaluateRefs = false;
} else {
println("evalRefs must be either 0 or 1.");
System.exit(10);
}
} else if (option.equals("-v")) {
int v = Integer.parseInt(args[i+1]);
if (v == 1) {
verbose = true;
} else if (v == 0) {
verbose = false;
} else {
println("verbose must be either 0 or 1.");
System.exit(10);
}
} else {
println("Unknown option " + option); System.exit(10);
}
i += 2;
} // while (i)
if (refsPerSen > 1) {
// the provided refFileName might be a prefix
File dummy = new File(refFileName);
if (!dummy.exists()) {
refFileName = createUnifiedRefFile(refFileName,refsPerSen);
}
} else {
checkFile(refFileName);
}
// initialize
numSentences = countLines(refFileName) / refsPerSen;
// read in reference sentences
refSentences = new String[numSentences][refsPerSen];
try {
InputStream inStream_refs = new FileInputStream(new File(refFileName));
BufferedReader inFile_refs = new BufferedReader(new InputStreamReader(inStream_refs, "utf8"));
for (i = 0; i < numSentences; ++i) {
for (int r = 0; r < refsPerSen; ++r) {
// read the rth reference translation for the ith sentence
refSentences[i][r] = normalize(inFile_refs.readLine(), textNormMethod);
}
}
inFile_refs.close();
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
System.exit(99901);
} catch (IOException e) {
System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
System.exit(99902);
}
// set static data members for the EvaluationMetric class
EvaluationMetric.set_numSentences(numSentences);
EvaluationMetric.set_refsPerSen(refsPerSen);
EvaluationMetric.set_refSentences(refSentences);
// do necessary initialization for the evaluation metric
evalMetric = EvaluationMetric.getMetric(metricName,metricOptions);
println("Processing " + numSentences + " sentences...");
} // processArgsAndInitialize(String[] args)
private static void checkFile(String fileName) {
if (!fileExists(fileName)) {
println("The file " + fileName + " was not found!");
System.exit(40);
}
}
private static boolean fileExists(String fileName) {
File checker = new File(fileName);
return checker.exists();
}
private static String createUnifiedRefFile(String prefix, int numFiles) {
if (numFiles < 2) {
println("Warning: createUnifiedRefFile called with numFiles = " + numFiles + "; doing nothing.");
return prefix;
} else {
File checker;
checker = new File(prefix + "1");
if (!checker.exists()) {
checker = new File(prefix + ".1");
if (!checker.exists()) {
println("Can't find reference files.");
System.exit(50);
} else {
prefix = prefix + ".";
}
}
String outFileName;
if (prefix.endsWith(".")) {
outFileName = prefix + "all";
} else {
outFileName = prefix + ".all";
}
try {
PrintWriter outFile = new PrintWriter(outFileName);
BufferedReader[] inFile = new BufferedReader[numFiles];
int nextIndex;
checker = new File(prefix + "0");
if (checker.exists()) {
nextIndex = 0;
} else {
nextIndex = 1;
}
int lineCount = countLines(prefix + nextIndex);
for (int r = 0; r < numFiles; ++r) {
if (countLines(prefix + nextIndex) != lineCount) {
println("Line count mismatch in " + (prefix+nextIndex) + ".");
System.exit(60);
}
InputStream inStream = new FileInputStream(new File(prefix + nextIndex));
inFile[r] = new BufferedReader(new InputStreamReader(inStream, "utf8"));
++nextIndex;
}
String line;
for (int i = 0; i < lineCount; ++i) {
for (int r = 0; r < numFiles; ++r) {
line = inFile[r].readLine();
outFile.println(line);
}
}
outFile.close();
for (int r = 0; r < numFiles; ++r) {
inFile[r].close();
}
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in MertCore.createUnifiedRefFile(String,int): " + e.getMessage());
System.exit(99901);
} catch (IOException e) {
System.err.println("IOException in MertCore.createUnifiedRefFile(String,int): " + e.getMessage());
System.exit(99902);
}
return outFileName;
}
} // createUnifiedRefFile(String prefix, int numFiles)
private static String normalize(String str, int normMethod)
{
if (normMethod == 0) return str;
// replace HTML/SGML
str = str.replaceAll(""","\"");
str = str.replaceAll("&","&");
str = str.replaceAll("<","<");
str = str.replaceAll(">",">");
str = str.replaceAll("'","'");
// split on these characters:
// ! " # $ % & ( ) * + / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
// i.e. ASCII 33-126, except alphanumeric, and except "," "-" "." "'"
// ! "# $%& ( ) * +/:;<=> ?@ [ \ ] ^_` { | }~
String split_on = "!\"#\\$%&\\(\\)\\*\\+/:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~";
// println("split_on: " + split_on);
for (int k = 0; k < split_on.length(); ++k) {
// for each split character, reprocess the string
String regex = "" + split_on.charAt(k);
if (regex.equals("\\")) {
++k;
regex += split_on.charAt(k);
}
str = str.replaceAll(regex," " + regex + " ");
}
// split on "." and "," and "-", conditioned on proper context
str = " " + str + " ";
str = str.replaceAll("\\s+"," ");
TreeSet<Integer> splitIndices = new TreeSet<Integer>();
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
if (ch == '.' || ch == ',') {
// split if either of the previous or next characters is a non-digit
char prev_ch = str.charAt(i-1);
char next_ch = str.charAt(i+1);
if (prev_ch < '0' || prev_ch > '9' || next_ch < '0' || next_ch > '9') {
splitIndices.add(i);
}
} else if (ch == '-') {
// split if preceded by a digit
char prev_ch = str.charAt(i-1);
if (prev_ch >= '0' && prev_ch <= '9') {
splitIndices.add(i);
}
}
}
String str0 = str;
str = "";
for (int i = 0; i < str0.length(); ++i) {
if (splitIndices.contains(i)) {
str += " " + str0.charAt(i) + " ";
} else {
str += str0.charAt(i);
}
}
// rejoin i'm, we're, *'s, won't, don't, etc
str = " " + str + " ";
str = str.replaceAll("\\s+"," ");
str = str.replaceAll(" i 'm "," i'm ");
str = str.replaceAll(" we 're "," we're ");
str = str.replaceAll(" 's ","'s ");
str = str.replaceAll(" 've ","'ve ");
str = str.replaceAll(" 'll ","'ll ");
str = str.replaceAll(" 'd ","'d ");
str = str.replaceAll(" n't ","n't ");
// remove spaces around dashes
if (normMethod == 2 || normMethod == 4) {
TreeSet<Integer> skipIndices = new TreeSet<Integer>();
str = " " + str + " ";
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
if (ch == '-') {
// rejoin if surrounded by spaces, and then letters
if (str.charAt(i-1) == ' ' && str.charAt(i+1) == ' ') {
if (Character.isLetter(str.charAt(i-2)) && Character.isLetter(str.charAt(i+2))) {
skipIndices.add(i-1);
skipIndices.add(i+1);
}
}
}
}
str0 = str;
str = "";
for (int i = 0; i < str0.length(); ++i) {
if (!skipIndices.contains(i)) {
str += str0.charAt(i);
}
}
}
// drop non-ASCII characters
if (normMethod == 3 || normMethod == 4) {
str0 = str;
str = "";
for (int i = 0; i < str0.length(); ++i) {
char ch = str0.charAt(i);
if (ch <= 127) { // i.e. if ASCII
str += ch;
}
}
}
str = str.replaceAll("\\s+"," ");
str = str.trim();
return str;
}
// TODO: we should handle errors properly for the three use sites of this function, and should remove the function.
// OK, but we don't want it to use LineReader, so it can function within the standalone release of Z-MERT. -- O.Z.
private static int countLines(String fileName) {
int count = 0;
try {
BufferedReader inFile = new BufferedReader(new FileReader(fileName));
String line;
do {
line = inFile.readLine();
if (line != null) ++count;
} while (line != null);
inFile.close();
} catch (IOException e) {
System.err.println("IOException in MertCore.countLines(String): " + e.getMessage());
System.exit(99902);
}
return count;
}
private static void println(Object obj) { System.out.println(obj); }
private static void print(Object obj) { System.out.print(obj); }
public static void main(String[] args) {
if (args.length == 0) {
printUsage(args.length);
System.exit(0);
} else {
processArgsAndInitialize(args);
}
// non-specified args will be set to default values in processArgsAndInitialize
if (candFileFormat.equals("plain")) {
println("Evaluating candidate translations in plain file " + candFileName + "...");
evaluateCands_plain(candFileName);
} else if (candFileFormat.equals("nbest")) {
println("Evaluating set of " + candRank + "'th candidate translations from " + candFileName + "...");
evaluateCands_nbest(candFileName,candRank);
}
println("");
if (evaluateRefs) {
// evaluate the references themselves; useful if developing a new evaluation metric
println("");
println("PERFORMING SANITY CHECK:");
println("------------------------");
println("");
println("This metric's scores range from "
+ evalMetric.worstPossibleScore() + " (worst) to "
+ evalMetric.bestPossibleScore() + " (best).");
for (int r = 1; r <= refsPerSen; ++r) {
println("");
println("(*) Evaluating reference set " + r + ":");
println("");
evaluateRefSet(r);
println("");
}
}
// System.exit(0);
} // main(String[] args)
}