/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.zmert;
import java.io.*;
public class METEOR extends EvaluationMetric
{
protected String targetLanguage;
protected boolean normalize;
protected boolean keepPunctuation;
private int maxComputations;
public METEOR(String[] Metric_options)
{
// M_o[0]: -l language, one of {en,cz,fr,de,es}
// M_o[1]: -normalize, one of {norm_yes,norm_no}
// M_o[2]: -keepPunctuation, one of {keepPunc,removePunc}
// M_o[3]: maxComputations, positive integer
// default in meteor v0.8: en, norm_no, removePunc
if (Metric_options[0].equals("en")) {
targetLanguage = "en";
} else if (Metric_options[0].equals("cz")) {
targetLanguage = "cz";
} else if (Metric_options[0].equals("fr")) {
targetLanguage = "fr";
} else if (Metric_options[0].equals("de")) {
targetLanguage = "de";
} else if (Metric_options[0].equals("es")) {
targetLanguage = "es";
} else {
System.out.println("Unknown language string " + Metric_options[0] + ".");
System.out.println("Should be one of {en,cz,fr,de,es}.");
System.exit(1);
}
if (Metric_options[1].equals("norm_yes")) {
normalize = true;
} else if (Metric_options[1].equals("norm_no")) {
normalize = false;
} else {
System.out.println("Unknown normalize string " + Metric_options[1] + ".");
System.out.println("Should be one of norm_yes or norm_no.");
System.exit(1);
}
if (Metric_options[2].equals("keepPunc")) {
keepPunctuation = true;
} else if (Metric_options[1].equals("removePunk")) {
keepPunctuation = false;
} else {
System.out.println("Unknown keepPunctuation string " + Metric_options[1] + ".");
System.out.println("Should be one of keepPunc or removePunk.");
System.exit(1);
}
maxComputations = Integer.parseInt(Metric_options[3]);
if (maxComputations < 1) {
System.out.println("Maximum computations must be positive");
System.exit(2);
}
initialize(); // set the data members of the metric
}
protected void initialize()
{
metricName = "METEOR";
toBeMinimized = false;
suffStatsCount = 5;
}
public double bestPossibleScore() { return 1.0; }
public double worstPossibleScore() { return 0.0; }
public int[] suffStats(String cand_str, int i)
{
// this method should never be used when the metric is METEOR,
// because METEOR.java overrides suffStats(String[],int[]) below,
// which is the only method that calls suffStats(Sting,int).
return null;
}
public int[][] suffStats(String[] cand_strings, int[] cand_indices)
{
// calculate sufficient statistics for each sentence in an arbitrary set of candidates
int candCount = cand_strings.length;
if (cand_indices.length != candCount) {
System.out.println("Array lengths mismatch in suffStats(String[],int[]); returning null.");
return null;
}
int[][] stats = new int[candCount][suffStatsCount];
try {
// 1) Create input files for meteor
// 1a) Create hypothesis file
FileOutputStream outStream = new FileOutputStream("hyp.txt.METEOR", false); // false: don't append
OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
BufferedWriter outFile = new BufferedWriter(outStreamWriter);
for (int d = 0; d < candCount; ++d) {
writeLine(cand_strings[d],outFile);
}
outFile.close();
// 1b) Create reference file
outStream = new FileOutputStream("ref.txt.METEOR", false); // false: don't append
outStreamWriter = new OutputStreamWriter(outStream, "utf8");
outFile = new BufferedWriter(outStreamWriter);
for (int d = 0; d < candCount; ++d) {
for (int r = 0; r < refsPerSen; ++r) {
writeLine(refSentences[cand_indices[d]][r],outFile);
}
}
outFile.close();
// 2) Launch meteor as an external process
String cmd_str = "./meteor hyp.txt.METEOR ref.txt.METEOR";
cmd_str += " -l " + targetLanguage;
cmd_str += " -r " + refsPerSen;
if (normalize) { cmd_str += " -normalize"; }
if (keepPunctuation) { cmd_str += " -keepPunctuation"; }
cmd_str += " -ssOut";
Runtime rt = Runtime.getRuntime();
Process p = rt.exec(cmd_str);
StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), 0);
StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), 0);
errorGobbler.start();
outputGobbler.start();
@SuppressWarnings("unused")
int exitValue = p.waitFor();
// 3) Read SS from output file produced by meteor
BufferedReader inFile = new BufferedReader(new FileReader("TER_out.ter"));
String line = "";
line = inFile.readLine(); // skip hyp line
line = inFile.readLine(); // skip ref line
for (int d = 0; d < candCount; ++d) {
line = inFile.readLine(); // read info
String[] strA = line.split("\\s+");
stats[d][0] = (int)Double.parseDouble(strA[0]);
stats[d][1] = (int)Double.parseDouble(strA[1]);
stats[d][2] = (int)Double.parseDouble(strA[2]);
stats[d][3] = (int)Double.parseDouble(strA[3]);
stats[d][4] = (int)Double.parseDouble(strA[4]);
}
} catch (IOException e) {
System.err.println("IOException in METEOR.suffStats(String[],int[]): " + e.getMessage());
System.exit(99902);
} catch (InterruptedException e) {
System.err.println("InterruptedException in METEOR.suffStats(String[],int[]): " + e.getMessage());
System.exit(99903);
}
return stats;
}
public double score(int[] stats)
{
if (stats.length != suffStatsCount) {
System.out.println("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. " + suffStatsCount + ") in METEOR.score(int[])");
System.exit(1);
}
double sc = 0.0;
// sc = ???
return sc;
}
public void printDetailedScore_fromStats(int[] stats, boolean oneLiner)
{
if (oneLiner) {
System.out.println("METEOR = METEOR(" + stats[0] + "," + stats[1] + "," + stats[2] + "," + stats[3] + "," + stats[4] + " = " + score(stats));
} else {
System.out.println("# matches = " + stats[0]);
System.out.println("test length = " + stats[1]);
System.out.println("ref length = " + stats[2]);
System.out.println("# chunks = " + stats[3]);
System.out.println("length cost = " + stats[4]);
System.out.println("METEOR = " + score(stats));
}
}
private void writeLine(String line, BufferedWriter writer) throws IOException
{
writer.write(line, 0, line.length());
writer.newLine();
writer.flush();
}
}