MertCore.java example

Explorer
relax-decode-master
- third-party
/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */

package joshua.zmert;
import joshua.decoder.*;
import java.util.*;
import java.io.*;
import java.util.zip.*;
import java.text.DecimalFormat;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;

public class MertCore
{
  private TreeSet<Integer>[] indicesOfInterest_all;


  private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
  private final Runtime myRuntime = Runtime.getRuntime();

  private final static double NegInf = (-1.0 / 0.0);
  private final static double PosInf = (+1.0 / 0.0);
  private final static double epsilon = 1.0 / 1000000;

  private int progress;

  private int verbosity; // anything of priority <= verbosity will be printed
                         // (lower value for priority means more important)

  private Random randGen;
  private int generatedRands;

  private int numSentences;
    // number of sentences in the dev set
    // (aka the "MERT training" set)

  private int numDocuments;
    // number of documents in the dev set
    // this should be 1, unless doing doc-level optimization

  private int[] docOfSentence;
    // docOfSentence[i] stores which document contains the i'th sentence.
    // docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)

  private int[] docSubsetInfo;
    // stores information regarding which subset of the documents are evaluated
    // [0]: method (0-6)
    // [1]: first (1-indexed)
    // [2]: last (1-indexed)
    // [3]: size
    // [4]: center
    // [5]: arg1
    // [6]: arg2
    // [1-6] are 0 for method 0, [6] is 0 for methods 1-4 as well
    // only [1] and [2] are needed for optimization. The rest are only needed for an output message.

  private int refsPerSen;
    // number of reference translations per sentence

  private int textNormMethod;
    // 0: no normalization, 1: "NIST-style" tokenization, and also rejoin 'm, 're, *'s, 've, 'll, 'd, and n't,
    // 2: apply 1 and also rejoin dashes between letters, 3: apply 1 and also drop non-ASCII characters
    // 4: apply 1+2+3

  private int numParams;
    // number of features for the log-linear model

  private double[] normalizationOptions;
    // How should a lambda[] vector be normalized (before decoding)?
    //   nO[0] = 0: no normalization
    //   nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
    //   nO[0] = 2: scale so that the maximum absolute value is nO[1]
    //   nO[0] = 3: scale so that the minimum absolute value is nO[1]
    //   nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]

  /* *********************************************************** */
  /*   NOTE: indexing starts at 1 in the following few arrays:   */
  /* *********************************************************** */

  private String[] paramNames;
    // feature names, needed to read/create config file

  private double[] lambda;
    // the current weight vector. NOTE: indexing starts at 1.

  private boolean[] isOptimizable;
    // isOptimizable[c] = true iff lambda[c] should be optimized

  private double[] minThValue;
  private double[] maxThValue;
    // when investigating thresholds along the lambda[c] dimension, only values
    // in the [minThValue[c],maxThValue[c]] range will be considered.
    // (*) minThValue and maxThValue can be real values as well as -Infinity and +Infinity
    //     (coded as -Inf and +Inf, respectively, in an input file)

  private double[] minRandValue;
  private double[] maxRandValue;
    // when choosing a random value for the lambda[c] parameter, it will be
    // chosen from the [minRandValue[c],maxRandValue[c]] range.
    // (*) minRandValue and maxRandValue must be real values, but not -Inf or +Inf

  private int damianos_method;
  private double damianos_param;
  private double damianos_mult;

  private double[] defaultLambda;
    // "default" parameter values; simply the values read in the parameter file

  /* *********************************************************** */
  /* *********************************************************** */

  private JoshuaDecoder myDecoder;
    // COMMENT OUT if decoder is not Joshua

  private String decoderCommand;
    // the command that runs the decoder; read from decoderCommandFileName

  private int decVerbosity;
    // verbosity level for decoder output.  If 0, decoder output is ignored.
    // If 1, decoder output is printed.

  private int validDecoderExitValue;
    // return value from running the decoder command that indicates success

  private int numOptThreads;
    // number of threads to run things in parallel

  private int saveInterFiles;
    // 0: nothing, 1: only configs, 2: only n-bests, 3: both configs and n-bests

  private int compressFiles;
    // should Z-MERT gzip the large files?  If 0, no compression takes place.
    // If 1, compression is performed on: decoder output files, temp sents files,
    //       and temp feats files.

  private int sizeOfNBest;
    // size of N-best list generated by decoder at each iteration
    // (aka simply N, but N is a bad variable name)

  private long seed;
    // seed used to create random number generators

  private boolean randInit;
    // if true, parameters are initialized randomly.  If false, parameters
    // are initialized using values from parameter file.

  private int initsPerIt;
    // number of intermediate initial points per iteration

  private int maxMERTIterations, minMERTIterations, prevMERTIterations;
    // max: maximum number of MERT iterations
    // min: minimum number of MERT iterations before an early MERT exit
    // prev: number of previous MERT iterations from which to consider candidates (in addition to
    //       the candidates from the current iteration)

  private double stopSigValue;
    // early MERT exit if no weight changes by more than stopSigValue
    // (but see minMERTIterations above and stopMinIts below)

  private int stopMinIts;
    // some early stopping criterion must be satisfied in stopMinIts *consecutive* iterations
    // before an early exit (but see minMERTIterations above)

  private boolean oneModificationPerIteration;
    // if true, each MERT iteration performs at most one parameter modification.
    // If false, a new MERT iteration starts (i.e. a new N-best list is
    // generated) only after the previous iteration reaches a local maximum.

  private String metricName;
    // name of evaluation metric optimized by MERT

  private String metricName_display;
    // name of evaluation metric optimized by MERT, possibly with "doc-level " prefixed

  private String[] metricOptions;
    // options for the evaluation metric (e.g. for BLEU, maxGramLength and effLengthMethod)

  private EvaluationMetric evalMetric;
    // the evaluation metric used by MERT

  private int suffStatsCount;
    // number of sufficient statistics for the evaluation metric

  private String tmpDirPrefix;
    // prefix for the ZMERT.temp.* files

  private int passIterationToDecoder;
    // should the iteration number be passed as an argument to decoderCommandFileName?
    // If 1, iteration number is passed.  If 0, launch with no arguments.

  private String dirPrefix; // where are all these files located?
  private String paramsFileName, docInfoFileName, finalLambdaFileName;
  private String sourceFileName, refFileName, decoderOutFileName;
  private String decoderConfigFileName, decoderCommandFileName;
  private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
    // e.g. output.it[1-x].someOldRun would be specified as:
    //      output.it?.someOldRun
    //      and we'd have prefix = "output.it" and suffix = ".sameOldRun"

//  private int useDisk;

  public MertCore()
  {
  }

  public MertCore(String[] args)
  {
    EvaluationMetric.set_knownMetrics();
    processArgsArray(args);
    initialize(0);
  }

  public MertCore(String configFileName)
  {
    EvaluationMetric.set_knownMetrics();
    processArgsArray(cfgFileToArgsArray(configFileName));
    initialize(0);
  }

  private void initialize(int randsToSkip)
  {
    println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon,4);

    randGen = new Random(seed);
    for (int r = 1; r <= randsToSkip; ++r) {
      randGen.nextDouble();
    }
    generatedRands = randsToSkip;

    if (randsToSkip == 0) {
      println("----------------------------------------------------",1);
      println("Initializing...",1);
      println("----------------------------------------------------",1);
      println("",1);

      println("Random number generator initialized using seed: " + seed,1);
      println("",1);
    }

    numSentences = countLines(refFileName) / refsPerSen;

    processDocInfo();
      // sets numDocuments and docOfSentence[]

    if (numDocuments > 1) metricName_display = "doc-level " + metricName;

    set_docSubsetInfo(docSubsetInfo);



    numParams = countNonEmptyLines(paramsFileName) - 1;
      // the parameter file contains one line per parameter
      // and one line for the normalization method


    paramNames = new String[1+numParams];
    lambda = new double[1+numParams]; // indexing starts at 1 in these arrays
    isOptimizable = new boolean[1+numParams];
    minThValue = new double[1+numParams];
    maxThValue = new double[1+numParams];
    minRandValue = new double[1+numParams];
    maxRandValue = new double[1+numParams];
//    precision = new double[1+numParams];
    defaultLambda = new double[1+numParams];
    normalizationOptions = new double[3];

    try {
      // read parameter names
      BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));

      for (int c = 1; c <= numParams; ++c) {
        String line = "";
        while (line != null && line.length() == 0) { // skip empty lines
          line = inFile_names.readLine();
        }
        paramNames[c] = (line.substring(0,line.indexOf("|||"))).trim();
      }

      inFile_names.close();
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99901);
    } catch (IOException e) {
      System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99902);
    }

    processParamFile();
      // sets the arrays declared just above

//    SentenceInfo.createV(); // uncomment ONLY IF using vocabulary implementation of SentenceInfo


    String[][] refSentences = new String[numSentences][refsPerSen];

    try {

      // read in reference sentences
      InputStream inStream_refs = new FileInputStream(new File(refFileName));
      BufferedReader inFile_refs = new BufferedReader(new InputStreamReader(inStream_refs, "utf8"));

      for (int i = 0; i < numSentences; ++i) {
        for (int r = 0; r < refsPerSen; ++r) {
          // read the rth reference translation for the ith sentence
          refSentences[i][r] = inFile_refs.readLine();
        }
      }

      inFile_refs.close();

      // normalize reference sentences
      for (int i = 0; i < numSentences; ++i) {
        for (int r = 0; r < refsPerSen; ++r) {
          // normalize the rth reference translation for the ith sentence
          refSentences[i][r] = normalize(refSentences[i][r], textNormMethod);
        }
      }


      // read in decoder command, if any
      decoderCommand = null;
      if (decoderCommandFileName != null) {
        if (fileExists(decoderCommandFileName)) {
          BufferedReader inFile_comm = new BufferedReader(new FileReader(decoderCommandFileName));
          decoderCommand = inFile_comm.readLine();
          inFile_comm.close();
        }
      }
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99901);
    } catch (IOException e) {
      System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99902);
    }


    // set static data members for the EvaluationMetric class
    EvaluationMetric.set_numSentences(numSentences);
    EvaluationMetric.set_numDocuments(numDocuments);
    EvaluationMetric.set_refsPerSen(refsPerSen);
    EvaluationMetric.set_refSentences(refSentences);
    EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);

    evalMetric = EvaluationMetric.getMetric(metricName,metricOptions);

    suffStatsCount = evalMetric.get_suffStatsCount();

    // set static data members for the IntermediateOptimizer class
    IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence, docSubsetInfo,
                                         numParams, normalizationOptions,
                                         isOptimizable, minThValue, maxThValue,
                                         oneModificationPerIteration, evalMetric,
                                         tmpDirPrefix, verbosity);



    if (randsToSkip == 0) { // i.e. first iteration
      println("Number of sentences: " + numSentences,1);
      println("Number of documents: " + numDocuments,1);
      println("Optimizing " + metricName_display,1);

print("docSubsetInfo: {",1);
for (int f = 0; f < 6; ++f) print(docSubsetInfo[f] + ", ",1);
println(docSubsetInfo[6] + "}",1);

      println("Number of features: " + numParams,1);
      print("Feature names: {",1);
      for (int c = 1; c <= numParams; ++c) {
        print("\"" + paramNames[c] + "\"",1);
        if (c < numParams) print(",",1);
      }
      println("}",1);
      println("",1);

      println("c    Default value\tOptimizable?\tCrit. val. range\tRand. val. range",1);

      for (int c = 1; c <= numParams; ++c) {
        print(c + "     " + f4.format(lambda[c]) + "\t\t",1);
        if (!isOptimizable[c]) {
          println(" No",1);
        } else {
          print(" Yes\t\t",1);
  //        print("[" + minThValue[c] + "," + maxThValue[c] + "] @ " + precision[c] + " precision",1);
          print(" [" + minThValue[c] + "," + maxThValue[c] + "]",1);
          print("\t\t",1);
          print(" [" + minRandValue[c] + "," + maxRandValue[c] + "]",1);
          println("",1);
        }
      }

      println("",1);
      print("Weight vector normalization method: ",1);
      if (normalizationOptions[0] == 0) {
        println("none.",1);
      } else if (normalizationOptions[0] == 1) {
        println("weights will be scaled so that the \"" + paramNames[(int)normalizationOptions[1]]
             + "\" weight has an absolute value of " + normalizationOptions[2] + ".",1);
      } else if (normalizationOptions[0] == 2) {
        println("weights will be scaled so that the maximum absolute value is "
              + normalizationOptions[1] + ".",1);
      } else if (normalizationOptions[0] == 3) {
        println("weights will be scaled so that the minimum absolute value is "
              + normalizationOptions[1] + ".",1);
      } else if (normalizationOptions[0] == 4) {
        println("weights will be scaled so that the L-" + normalizationOptions[1]
              + " norm is " + normalizationOptions[2] + ".",1);
      }

      println("",1);

      println("----------------------------------------------------",1);
      println("",1);

      // rename original config file so it doesn't get overwritten
      // (original name will be restored in finish())
      renameFile(decoderConfigFileName,decoderConfigFileName+".ZMERT.orig");

    } // if (randsToSkip == 0)


    if (decoderCommand == null && fakeFileNameTemplate == null) {
      println("Loading Joshua decoder...",1);
      myDecoder = new JoshuaDecoder(decoderConfigFileName+".ZMERT.orig");
      println("...finished loading @ " + (new Date()),1);
      println("");
    } else {
      myDecoder = null;
    }



    @SuppressWarnings("unchecked")
    TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
    indicesOfInterest_all = temp_TSA;

    for (int i = 0; i < numSentences; ++i) {
      indicesOfInterest_all[i] = new TreeSet<Integer>();
    }


  } // void initialize(...)

  public void run_MERT()
  {
    run_MERT(minMERTIterations,maxMERTIterations,prevMERTIterations);
  }

  public void run_MERT(int minIts, int maxIts, int prevIts)
  {
    println("----------------------------------------------------",1);
    println("Z-MERT run started @ " + (new Date()),1);
//    printMemoryUsage();
    println("----------------------------------------------------",1);
    println("",1);

    if (randInit) {
      println("Initializing lambda[] randomly.",1);

      // initialize optimizable parameters randomly (sampling uniformly from
      // that parameter's random value range)
      lambda = randomLambda();
    }

    println("Initial lambda[]: " + lambdaToString(lambda),1);
    println("",1);

    double FINAL_score = evalMetric.worstPossibleScore();


//    int[] lastUsedIndex = new int[numSentences];
    int[] maxIndex = new int[numSentences];
      // used to grow featVal_array dynamically
//    HashMap<Integer,int[]>[] suffStats_array = new HashMap[numSentences];
      // suffStats_array[i] maps candidates of interest for sentence i to an array
      // storing the sufficient statistics for that candidate
    for (int i = 0; i < numSentences; ++i) {
//      lastUsedIndex[i] = -1;
      maxIndex[i] = sizeOfNBest - 1;
//      suffStats_array[i] = new HashMap<Integer,int[]>();
    }
/*
    double[][][] featVal_array = new double[1+numParams][][];
      // indexed by [param][sentence][candidate]
    featVal_array[0] = null; // param indexing starts at 1
    for (int c = 1; c <= numParams; ++c) {
      featVal_array[c] = new double[numSentences][];
      for (int i = 0; i < numSentences; ++i) {
        featVal_array[c][i] = new double[maxIndex[i]];
          // will grow dynamically as needed
      }
    }
*/
    int earlyStop = 0;
      // number of consecutive iteration an early stopping criterion was satisfied

    for (int iteration = 1; ; ++iteration) {

      double[] A = run_single_iteration(iteration, minIts, maxIts, prevIts, earlyStop, maxIndex);
      if (A != null) {
        FINAL_score = A[0];
        earlyStop = (int)A[1];
        if (A[2] == 1) break;
      } else {
        break;
      }

    } // for (iteration)

    println("",1);

    println("----------------------------------------------------",1);
    println("Z-MERT run ended @ " + (new Date()),1);
//    printMemoryUsage();
    println("----------------------------------------------------",1);
    println("",1);
    println("FINAL lambda: " + lambdaToString(lambda)
          + " (" + metricName_display + ": " + FINAL_score + ")",1);
    // check if a lambda is outside its threshold range
    for (int c = 1; c <= numParams; ++c) {
      if (lambda[c] < minThValue[c] || lambda[c] > maxThValue[c]) {
        println("Warning: after normalization, lambda[" + c + "]=" + f4.format(lambda[c])
              + " is outside its critical value range.",1);
      }
    }
    println("",1);

    // delete intermediate .temp.*.it* decoder output files
    for (int iteration = 1; iteration <= maxIts; ++iteration) {
      if (compressFiles == 1) {
        deleteFile(tmpDirPrefix+"temp.sents.it"+iteration+".gz");
        deleteFile(tmpDirPrefix+"temp.feats.it"+iteration+".gz");
        if (fileExists(tmpDirPrefix+"temp.stats.it"+iteration+".copy.gz")) {
          deleteFile(tmpDirPrefix+"temp.stats.it"+iteration+".copy.gz");
        } else {
          deleteFile(tmpDirPrefix+"temp.stats.it"+iteration+".gz");
        }
      } else {
        deleteFile(tmpDirPrefix+"temp.sents.it"+iteration);
        deleteFile(tmpDirPrefix+"temp.feats.it"+iteration);
        if (fileExists(tmpDirPrefix+"temp.stats.it"+iteration+".copy")) {
          deleteFile(tmpDirPrefix+"temp.stats.it"+iteration+".copy");
        } else {
          deleteFile(tmpDirPrefix+"temp.stats.it"+iteration);
        }
      }
    }

  } // void run_MERT(int maxIts)


  @SuppressWarnings("unchecked")
public double[] run_single_iteration(
    int iteration, int minIts, int maxIts, int prevIts, int earlyStop, int[]maxIndex)
  {
    double FINAL_score = 0;

    double[] retA = new double[3];
      // retA[0]: FINAL_score
      // retA[1]: earlyStop
      // retA[2]: should this be the last iteration?

    boolean done = false;
    retA[2] = 1; // will only be made 0 if we don't break from the following loop


    double[][][] featVal_array = new double[1+numParams][][];
      // indexed by [param][sentence][candidate]
    featVal_array[0] = null; // param indexing starts at 1
    for (int c = 1; c <= numParams; ++c) {
      featVal_array[c] = new double[numSentences][];
      for (int i = 0; i < numSentences; ++i) {
        featVal_array[c][i] = new double[maxIndex[i]+1];
          // will grow dynamically as needed
      }
    }


    while (!done) { // NOTE: this "loop" will only be carried out once
      println("--- Starting Z-MERT iteration #" + iteration + " @ " + (new Date()) + " ---",1);

//      printMemoryUsage();

      // run the decoder on all the sentences, producing for each sentence a set of
      // sizeOfNBest candidates, with numParams feature values for each candidate

      /******************************/
      // CREATE DECODER CONFIG FILE //
      /******************************/

      createConfigFile(lambda,decoderConfigFileName,decoderConfigFileName+".ZMERT.orig");
        // i.e. use the original config file as a template

      /***************/
      // RUN DECODER //
      /***************/

      if (iteration == 1) {
        println("Decoding using initial weight vector " + lambdaToString(lambda),1);
      } else {
        println("Redecoding using weight vector " + lambdaToString(lambda),1);
      }

      String[] decRunResult = run_decoder(iteration); // iteration passed in case fake decoder will be used
        // [0] name of file to be processed
        // [1] indicates how the output file was obtained:
        //   1: external decoder
        //   2: fake decoder
        //   3: internal decoder

      if (!decRunResult[1].equals("2")) {
        println("...finished decoding @ " + (new Date()),1);
      }

      checkFile(decRunResult[0]);

      println("Producing temp files for iteration "+iteration,3);

      produceTempFiles(decRunResult[0], iteration);

      if (saveInterFiles == 1 || saveInterFiles == 3) { // make copy of intermediate config file
        if (!copyFile(decoderConfigFileName,decoderConfigFileName+".ZMERT.it"+iteration)) {
          println("Warning: attempt to make copy of decoder config file (to create" + decoderConfigFileName+".ZMERT.it"+iteration + ") was unsuccessful!",1);
        }
      }
      if (saveInterFiles == 2 || saveInterFiles == 3) { // make copy of intermediate decoder output file...

        if (!decRunResult[1].equals("2")) { // ...but only if no fake decoder
          if (!decRunResult[0].endsWith(".gz")) {
            if (!copyFile(decRunResult[0],decRunResult[0]+".ZMERT.it"+iteration)) {
              println("Warning: attempt to make copy of decoder output file (to create" + decRunResult[0]+".ZMERT.it"+iteration + ") was unsuccessful!",1);
            }
          } else {
            String prefix = decRunResult[0].substring(0,decRunResult[0].length()-3);
            if (!copyFile(prefix+".gz",prefix+".ZMERT.it"+iteration+".gz")) {
              println("Warning: attempt to make copy of decoder output file (to create" + prefix+".ZMERT.it"+iteration+".gz" + ") was unsuccessful!",1);
            }
          }

          if (compressFiles == 1 && !decRunResult[0].endsWith(".gz")) {
            gzipFile(decRunResult[0]+".ZMERT.it"+iteration);
          }
        } // if (!fake)

      }

      int[] candCount = new int[numSentences];
      int[] lastUsedIndex = new int[numSentences];
      @SuppressWarnings("unchecked")
      ConcurrentHashMap<Integer,int[]>[] suffStats_array = new ConcurrentHashMap[numSentences];
      for (int i = 0; i < numSentences; ++i) {
        candCount[i] = 0;
        lastUsedIndex[i] = -1;
//        suffStats_array[i].clear();
        suffStats_array[i] = new ConcurrentHashMap<Integer,int[]>();
      }

      double[][] initialLambda = new double[1+initsPerIt][1+numParams];
        // the intermediate "initial" lambdas
      double[][] finalLambda = new double[1+initsPerIt][1+numParams];
        // the intermediate "final" lambdas

      // set initialLambda[][]
      System.arraycopy(lambda,1,initialLambda[1],1,numParams);
      for (int j = 2; j <= initsPerIt; ++j) {
        if (damianos_method == 0) {
          initialLambda[j] = randomLambda();
        } else {
          initialLambda[j] = randomPerturbation(initialLambda[1], iteration, damianos_method, damianos_param, damianos_mult);
        }
      }



      double[] initialScore = new double[1+initsPerIt];
      double[] finalScore = new double[1+initsPerIt];

      int[][][] best1Cand_suffStats = new int[1+initsPerIt][numSentences][suffStatsCount];
      double[][] best1Score = new double[1+initsPerIt][numSentences];
        // Those two arrays are used to calculate initialScore[]
        // (the "score" in best1Score refers to that assigned by the
        //  decoder; the "score" in initialScore refers to that
        //  assigned by the evaluation metric)

      int firstIt = Math.max(1,iteration-prevIts);
        // i.e. only process candidates from the current iteration and candidates
        // from up to prevIts previous iterations.
      println("Reading candidate translations from iterations " + firstIt + "-" + iteration,1);
      println("(and computing " + metricName + " sufficient statistics for previously unseen candidates)",1);
      print("  Progress: ");

      int[] newCandidatesAdded = new int[1+iteration];
      for (int it = 1; it <= iteration; ++it) { newCandidatesAdded[it] = 0; }





      try {

        // each inFile corresponds to the output of an iteration
        // (index 0 is not used; no corresponding index for the current iteration)
        BufferedReader[] inFile_sents = new BufferedReader[iteration];
        BufferedReader[] inFile_feats = new BufferedReader[iteration];
        BufferedReader[] inFile_stats = new BufferedReader[iteration];

        for (int it = firstIt; it < iteration; ++it) {
          InputStream inStream_sents, inStream_feats, inStream_stats;
          if (compressFiles == 0) {
            inStream_sents = new FileInputStream(tmpDirPrefix+"temp.sents.it"+it);
            inStream_feats = new FileInputStream(tmpDirPrefix+"temp.feats.it"+it);
            inStream_stats = new FileInputStream(tmpDirPrefix+"temp.stats.it"+it);
          } else {
            inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.sents.it"+it+".gz"));
            inStream_feats = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.feats.it"+it+".gz"));
            inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.stats.it"+it+".gz"));
          }

          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
          inFile_feats[it] = new BufferedReader(new InputStreamReader(inStream_feats, "utf8"));
          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
        }


        InputStream inStream_sentsCurrIt, inStream_featsCurrIt, inStream_statsCurrIt;
        if (compressFiles == 0) {
          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix+"temp.sents.it"+iteration);
          inStream_featsCurrIt = new FileInputStream(tmpDirPrefix+"temp.feats.it"+iteration);
        } else {
          inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.sents.it"+iteration+".gz"));
          inStream_featsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.feats.it"+iteration+".gz"));
        }

        BufferedReader inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));
        BufferedReader inFile_featsCurrIt = new BufferedReader(new InputStreamReader(inStream_featsCurrIt, "utf8"));

        BufferedReader inFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below is set to true
        PrintWriter outFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below is set to false
        boolean statsCurrIt_exists = false;
        if (fileExists(tmpDirPrefix+"temp.stats.it"+iteration)) {
          inStream_statsCurrIt = new FileInputStream(tmpDirPrefix+"temp.stats.it"+iteration);
          inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt, "utf8"));
          statsCurrIt_exists = true;
          copyFile(tmpDirPrefix+"temp.stats.it"+iteration,tmpDirPrefix+"temp.stats.it"+iteration+".copy");
        } else if (fileExists(tmpDirPrefix+"temp.stats.it"+iteration+".gz")) {
          inStream_statsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.stats.it"+iteration+".gz"));
          inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt, "utf8"));
          statsCurrIt_exists = true;
          copyFile(tmpDirPrefix+"temp.stats.it"+iteration+".gz",tmpDirPrefix+"temp.stats.it"+iteration+".copy.gz");
        } else {
          outFile_statsCurrIt = new PrintWriter(tmpDirPrefix+"temp.stats.it"+iteration);
        }

        PrintWriter outFile_statsMerged = new PrintWriter(tmpDirPrefix+"temp.stats.merged");
          // write sufficient statistics from all the sentences
          // from the output files into a single file
        PrintWriter outFile_statsMergedKnown = new PrintWriter(tmpDirPrefix+"temp.stats.mergedKnown");
          // write sufficient statistics from all the sentences
          // from the output files into a single file

        FileOutputStream outStream_unknownCands = new FileOutputStream(tmpDirPrefix+"temp.currIt.unknownCands", false);
        OutputStreamWriter outStreamWriter_unknownCands = new OutputStreamWriter(outStream_unknownCands, "utf8");
        BufferedWriter outFile_unknownCands = new BufferedWriter(outStreamWriter_unknownCands);

        PrintWriter outFile_unknownIndices = new PrintWriter(tmpDirPrefix+"temp.currIt.unknownIndices");


        String sents_str, feats_str, stats_str;

        // BUG: this assumes a candidate string cannot be produced for two
        //      different source sentences, which is not necessarily true
        //   (It's not actually a bug, but only because existingCandStats gets
        //    cleared before moving to the next source sentence.)
        // FIX: should be made an array, indexed by i
        HashMap<String,String> existingCandStats = new HashMap<String,String>();
          // Stores precalculated sufficient statistics for candidates, in case
          // the same candidate is seen again. (SS stored as a String.)
          // Q: Why do we care?  If we see the same candidate again, aren't we going
          //    to ignore it?  So, why do we care about the SS of this repeat candidate?
          // A: A "repeat" candidate may not be a repeat candidate in later
          //    iterations if the user specifies a value for prevMERTIterations
          //    that causes MERT to skip candidates from early iterations.
        double[] currFeatVal = new double[1+numParams];
        String[] featVal_str;

        int totalCandidateCount = 0;



        int[] sizeUnknown_currIt = new int[numSentences];



        for (int i = 0; i < numSentences; ++i) {

          for (int j = 1; j <= initsPerIt; ++j) {
            best1Score[j][i] = NegInf;
          }

          for (int it = firstIt; it < iteration; ++it) {
          // Why up to but *excluding* iteration?
          // Because the last iteration is handled a little differently, since
          // the SS must be claculated (and the corresponding file created),
          // which is not true for previous iterations.

            for (int n = 0; n <= sizeOfNBest; ++n) {
            // Why up to and *including* sizeOfNBest?
            // So that it would read the "||||||" separator even if there is
            // a complete list of sizeOfNBest candidates.

              // for the nth candidate for the ith sentence, read the sentence, feature values,
              // and sufficient statistics from the various temp files

              sents_str = inFile_sents[it].readLine();
              feats_str = inFile_feats[it].readLine();
              stats_str = inFile_stats[it].readLine();

              if (sents_str.equals("||||||")) {
                n = sizeOfNBest+1;
              } else if (!existingCandStats.containsKey(sents_str)) {

                outFile_statsMergedKnown.println(stats_str);

                featVal_str = feats_str.split("\\s+");

                for (int c = 1; c <= numParams; ++c) {
                  currFeatVal[c] = Double.parseDouble(featVal_str[c-1]);
//                  print("fV[" + c + "]=" + currFeatVal[c] + " ",4);
                }
//                println("",4);


                for (int j = 1; j <= initsPerIt; ++j) {
                  double score = 0; // i.e. score assigned by decoder
                  for (int c = 1; c <= numParams; ++c) {
                    score += initialLambda[j][c] * currFeatVal[c];
                  }
                  if (score > best1Score[j][i]) {
                    best1Score[j][i] = score;
                    String[] tempStats = stats_str.split("\\s+");
                    for (int s = 0; s < suffStatsCount; ++s)
                      best1Cand_suffStats[j][i][s] = Integer.parseInt(tempStats[s]);
                  }
                } // for (j)

                existingCandStats.put(sents_str,stats_str);

                setFeats(featVal_array,i,lastUsedIndex,maxIndex,currFeatVal);
                candCount[i] += 1;

                newCandidatesAdded[it] += 1;

              } // if unseen candidate

            } // for (n)

          } // for (it)

          outFile_statsMergedKnown.println("||||||");


          // now process the candidates of the current iteration
          // now determine the new candidates of the current iteration

          /* remember:
               BufferedReader inFile_sentsCurrIt
               BufferedReader inFile_featsCurrIt
               PrintWriter outFile_statsCurrIt
          */

          String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest+1];

          Vector<String> unknownCands_V = new Vector<String>();
            // which candidates (of the i'th source sentence) have not been seen before
            // this iteration?

          for (int n = 0; n <= sizeOfNBest; ++n) {
          // Why up to and *including* sizeOfNBest?
          // So that it would read the "||||||" separator even if there is
          // a complete list of sizeOfNBest candidates.

            // for the nth candidate for the ith sentence, read the sentence,
            // and store it in the sentsCurrIt_currSrcSent array

            sents_str = inFile_sentsCurrIt.readLine();
            sentsCurrIt_currSrcSent[n] = sents_str; // Note: possibly "||||||"

            if (sents_str.equals("||||||")) {
              n = sizeOfNBest+1;
            } else if (!existingCandStats.containsKey(sents_str)) {
              unknownCands_V.add(sents_str);
              writeLine(sents_str,outFile_unknownCands);
              outFile_unknownIndices.println(i);
              newCandidatesAdded[iteration] += 1;
              existingCandStats.put(sents_str,"U"); // i.e. unknown
              // we add sents_str to avoid duplicate entries in unknownCands_V
            }

          } // for (n)



          // now unknownCands_V has the candidates for which we need to calculate
          // sufficient statistics (for the i'th source sentence)
          int sizeUnknown = unknownCands_V.size();
          sizeUnknown_currIt[i] = sizeUnknown;

          /*********************************************/
/*
          String[] unknownCands = new String[sizeUnknown];
          unknownCands_V.toArray(unknownCands);
          int[] indices = new int[sizeUnknown];
          for (int d = 0; d < sizeUnknown; ++d) {
            existingCandStats.remove(unknownCands[d]);
            // remove the (unknownCands[d],"U") entry from existingCandStats
            // (we had added it while constructing unknownCands_V to avoid duplicate entries)
            indices[d] = i;
          }
*/
          /*********************************************/

          existingCandStats.clear();

        } // for (i)

/*
          int[][] newSuffStats = null;
          if (!statsCurrIt_exists && sizeUnknown > 0) {
            newSuffStats = evalMetric.suffStats(unknownCands, indices);
          }
*/

        outFile_statsMergedKnown.close();
        outFile_unknownCands.close();
        outFile_unknownIndices.close();


        for (int it = firstIt; it < iteration; ++it) {
          inFile_sents[it].close();
          inFile_stats[it].close();

          InputStream inStream_sents, inStream_stats;
          if (compressFiles == 0) {
            inStream_sents = new FileInputStream(tmpDirPrefix+"temp.sents.it"+it);
            inStream_stats = new FileInputStream(tmpDirPrefix+"temp.stats.it"+it);
          } else {
            inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.sents.it"+it+".gz"));
            inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.stats.it"+it+".gz"));
          }

          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
        }

        inFile_sentsCurrIt.close();
        if (compressFiles == 0) {
          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix+"temp.sents.it"+iteration);
        } else {
          inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix+"temp.sents.it"+iteration+".gz"));
        }
        inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));



        // calculate SS for unseen candidates and write them to file
        FileInputStream inStream_statsCurrIt_unknown = null;
        BufferedReader inFile_statsCurrIt_unknown = null;

        if (!statsCurrIt_exists && newCandidatesAdded[iteration] > 0) {
          // create the file...
          evalMetric.createSuffStatsFile(tmpDirPrefix+"temp.currIt.unknownCands", tmpDirPrefix+"temp.currIt.unknownIndices", tmpDirPrefix+"temp.stats.unknown", sizeOfNBest);

          // ...and open it
          inStream_statsCurrIt_unknown = new FileInputStream(tmpDirPrefix+"temp.stats.unknown");
          inFile_statsCurrIt_unknown = new BufferedReader(new InputStreamReader(inStream_statsCurrIt_unknown, "utf8"));
        }

        // OPEN mergedKnown file
        FileInputStream instream_statsMergedKnown = new FileInputStream(tmpDirPrefix+"temp.stats.mergedKnown");
        BufferedReader inFile_statsMergedKnown = new BufferedReader(new InputStreamReader(instream_statsMergedKnown, "utf8"));


        for (int i = 0; i < numSentences; ++i) {

          // reprocess candidates from previous iterations
          for (int it = firstIt; it < iteration; ++it) {
            for (int n = 0; n <= sizeOfNBest; ++n) {

              sents_str = inFile_sents[it].readLine();
              stats_str = inFile_stats[it].readLine();

              if (sents_str.equals("||||||")) {
                n = sizeOfNBest+1;
              } else if (!existingCandStats.containsKey(sents_str)) {
                existingCandStats.put(sents_str,stats_str);
              } // if unseen candidate

            } // for (n)
          } // for (it)

          // copy relevant portion from mergedKnown to the merged file
          String line_mergedKnown = inFile_statsMergedKnown.readLine();
          while (!line_mergedKnown.equals("||||||")) {
            outFile_statsMerged.println(line_mergedKnown);
            line_mergedKnown = inFile_statsMergedKnown.readLine();
          }


          int d = -1;


          int[] stats = new int[suffStatsCount];

          for (int n = 0; n <= sizeOfNBest; ++n) {
          // Why up to and *including* sizeOfNBest?
          // So that it would read the "||||||" separator even if there is
          // a complete list of sizeOfNBest candidates.

            // for the nth candidate for the ith sentence, read the sentence, feature values,
            // and sufficient statistics from the various temp files

            sents_str = inFile_sentsCurrIt.readLine();
            feats_str = inFile_featsCurrIt.readLine();

            if (sents_str.equals("||||||")) {
              n = sizeOfNBest+1;
            } else if (!existingCandStats.containsKey(sents_str)) {

              ++d;

              if (!statsCurrIt_exists) {
                stats_str = inFile_statsCurrIt_unknown.readLine();

                String[] temp_stats = stats_str.split("\\s+");
                for (int s = 0; s < suffStatsCount; ++s) {
                  stats[s] = Integer.parseInt(temp_stats[s]);
                }

/*
                stats_str = "";
                for (int s = 0; s < suffStatsCount-1; ++s) {
                  stats[s] = newSuffStats[d][s];
                  stats_str += (stats[s] + " ");
                }
                stats[suffStatsCount-1] = newSuffStats[d][suffStatsCount-1];
                stats_str += stats[suffStatsCount-1];
*/

                outFile_statsCurrIt.println(stats_str);
              } else {
                stats_str = inFile_statsCurrIt.readLine();
                String[] temp_stats = stats_str.split("\\s+");
                for (int s = 0; s < suffStatsCount; ++s) {
                  stats[s] = Integer.parseInt(temp_stats[s]);
                }
              }

              outFile_statsMerged.println(stats_str);

              featVal_str = feats_str.split("\\s+");

              for (int c = 1; c <= numParams; ++c) {
                currFeatVal[c] = Double.parseDouble(featVal_str[c-1]);
//                print("fV[" + c + "]=" + currFeatVal[c] + " ",4);
              }
//              println("",4);


              for (int j = 1; j <= initsPerIt; ++j) {
                double score = 0; // i.e. score assigned by decoder
                for (int c = 1; c <= numParams; ++c) {
                  score += initialLambda[j][c] * currFeatVal[c];
                }
                if (score > best1Score[j][i]) {
                  best1Score[j][i] = score;
                  for (int s = 0; s < suffStatsCount; ++s)
                    best1Cand_suffStats[j][i][s] = stats[s];
                }
              } // for (j)

              existingCandStats.put(sents_str,stats_str);

              setFeats(featVal_array,i,lastUsedIndex,maxIndex,currFeatVal);
              candCount[i] += 1;

//              newCandidatesAdded[iteration] += 1;
              // moved to code above detecting new candidates

            } else {
              if (statsCurrIt_exists)
                inFile_statsCurrIt.readLine();
              else {
                // write SS to outFile_statsCurrIt
                stats_str = existingCandStats.get(sents_str);
                outFile_statsCurrIt.println(stats_str);
              }
            }

          } // for (n)

          // now d = sizeUnknown_currIt[i] - 1

          if (statsCurrIt_exists)
            inFile_statsCurrIt.readLine();
          else
            outFile_statsCurrIt.println("||||||");

          existingCandStats.clear();
          totalCandidateCount += candCount[i];

          if ((i+1) % 500 == 0) { print((i+1) + "\n" + "            ",1); }
          else if ((i+1) % 100 == 0) { print("+",1); }
          else if ((i+1) % 25 == 0) { print(".",1); }

        } // for (i)

        outFile_statsMerged.close();




        println("",1); // finish progress line

        for (int it = firstIt; it < iteration; ++it) {
          inFile_sents[it].close();
          inFile_feats[it].close();
          inFile_stats[it].close();
        }

        inFile_sentsCurrIt.close();
        inFile_featsCurrIt.close();
        if (statsCurrIt_exists)
          inFile_statsCurrIt.close();
        else
          outFile_statsCurrIt.close();

        if (compressFiles == 1 && !statsCurrIt_exists) {
          gzipFile(tmpDirPrefix+"temp.stats.it"+iteration);
        }

        deleteFile(tmpDirPrefix+"temp.currIt.unknownCands");
        deleteFile(tmpDirPrefix+"temp.currIt.unknownIndices");
        deleteFile(tmpDirPrefix+"temp.stats.unknown");
        deleteFile(tmpDirPrefix+"temp.stats.mergedKnown");

//        cleanupMemory();

        println("Processed " + totalCandidateCount + " distinct candidates "
              + "(about " + totalCandidateCount/numSentences + " per sentence):",1);
        for (int it = firstIt; it <= iteration; ++it) {
          println("newCandidatesAdded[it=" + it + "] = " + newCandidatesAdded[it]
                + " (about " + newCandidatesAdded[it]/numSentences + " per sentence)",1);
        }

        println("",1);

      } catch (FileNotFoundException e) {
        System.err.println("FileNotFoundException in MertCore.run_single_iteration(6): " + e.getMessage());
        System.exit(99901);
      } catch (IOException e) {
        System.err.println("IOException in MertCore.run_single_iteration(6): " + e.getMessage());
        System.exit(99902);
      }


      if (newCandidatesAdded[iteration] == 0) {
        if (!oneModificationPerIteration) {
          println("No new candidates added in this iteration; exiting Z-MERT.",1);
          println("",1);
          println("---  Z-MERT iteration #" + iteration + " ending @ " + (new Date()) + "  ---",1);
          println("",1);
          return null; // THIS MEANS THAT THE OLD VALUES SHOULD BE KEPT BY THE CALLER
        } else {
          println("Note: No new candidates added in this iteration.",1);
        }
      }

      // run the initsPerIt optimizations, in parallel, across numOptThreads threads
      ExecutorService pool = Executors.newFixedThreadPool(numOptThreads);
      Semaphore blocker = new Semaphore(0);
      Vector<String>[] threadOutput = new Vector[initsPerIt+1];

      for (int j = 1; j <= initsPerIt; ++j) {
        threadOutput[j] = new Vector<String>();
        pool.execute(new IntermediateOptimizer(j, blocker, threadOutput[j],
                             initialLambda[j], finalLambda[j], best1Cand_suffStats[j],
                             finalScore, candCount, featVal_array, suffStats_array));
      }

      pool.shutdown();

      try {
        blocker.acquire(initsPerIt);
      } catch(java.lang.InterruptedException e) {
        System.err.println("InterruptedException in MertCore.run_single_iteration(): " + e.getMessage());
        System.exit(99906);
      }

      // extract output from threadOutput[]
      for (int j = 1; j <= initsPerIt; ++j) {
        for (String str : threadOutput[j]) {
          println(str); // no verbosity check needed; thread already checked
        }
      }

      int best_j = 1;
      double bestFinalScore = finalScore[1];
      for (int j = 2; j <= initsPerIt; ++j) {
        if (evalMetric.isBetter(finalScore[j],bestFinalScore)) {
          best_j = j;
          bestFinalScore = finalScore[j];
        }
      }

      if (initsPerIt > 1) {
        println("Best final lambda is lambda[j=" + best_j + "] "
              + "(" + metricName_display + ": " + f4.format(bestFinalScore) + ").",1);
        println("",1);
      }

      FINAL_score = bestFinalScore;

      boolean anyParamChanged = false;
      boolean anyParamChangedSignificantly = false;

      for (int c = 1; c <= numParams; ++c) {
        if (finalLambda[best_j][c] != lambda[c]) {
          anyParamChanged = true;
        }
        if (Math.abs(finalLambda[best_j][c] - lambda[c]) > stopSigValue) {
          anyParamChangedSignificantly = true;
        }
      }

      System.arraycopy(finalLambda[best_j],1,lambda,1,numParams);
      println("---  Z-MERT iteration #" + iteration + " ending @ " + (new Date()) + "  ---",1);
      println("",1);

      if (!anyParamChanged) {
        println("No parameter value changed in this iteration; exiting Z-MERT.",1);
        println("",1);
        break; // exit for (iteration) loop preemptively
      }

      // check if a lambda is outside its threshold range
      for (int c = 1; c <= numParams; ++c) {
        if (lambda[c] < minThValue[c] || lambda[c] > maxThValue[c]) {
          println("Warning: after normalization, lambda[" + c + "]="
                + f4.format(lambda[c]) + " is outside its critical value range.",1);
        }
      }

      // was an early stopping criterion satisfied?
      boolean critSatisfied = false;
      if (!anyParamChangedSignificantly && stopSigValue >= 0) {
        println("Note: No parameter value changed significantly "
              + "(i.e. by more than " + stopSigValue + ") in this iteration.",1);
        critSatisfied = true;
      }

      if (critSatisfied) { ++earlyStop; println("",1); }
      else { earlyStop = 0; }

      // if min number of iterations executed, investigate if early exit should happen
      if (iteration >= minIts && earlyStop >= stopMinIts) {
        println("Some early stopping criteria has been observed "
              + "in " + stopMinIts + " consecutive iterations; exiting Z-MERT.",1);
        println("",1);
        break; // exit for (iteration) loop preemptively
      }

      // if max number of iterations executed, exit
      if (iteration >= maxIts) {
        println("Maximum number of MERT iterations reached; exiting Z-MERT.",1);
        println("",1);
        break; // exit for (iteration) loop
      }

      println("Next iteration will decode with lambda: " + lambdaToString(lambda),1);
      println("",1);

//      printMemoryUsage();
      for (int i = 0; i < numSentences; ++i) {
        suffStats_array[i].clear();
      }
//      cleanupMemory();
//      println("",2);


      retA[2] = 0; // i.e. this should NOT be the last iteration
      done = true;

    } // while (!done) // NOTE: this "loop" will only be carried out once


    // delete .temp.stats.merged file, since it is not needed in the next
    // iteration (it will be recreated from scratch)
    deleteFile(tmpDirPrefix+"temp.stats.merged");

    retA[0] = FINAL_score;
    retA[1] = earlyStop;
    return retA;

  } // run_single_iteration

  private String lambdaToString(double[] lambdaA)
  {
    String retStr = "{";
    for (int c = 1; c <= numParams-1; ++c) {
      retStr += "" + lambdaA[c] + ", ";
    }
    retStr += "" + lambdaA[numParams] + "}";

    return retStr;
  }

  private String[] run_decoder(int iteration)
  {
    String[] retSA = new String[2];
      // [0] name of file to be processed
      // [1] indicates how the output file was obtained:
      //   1: external decoder
      //   2: fake decoder
      //   3: internal decoder

    if (fakeFileNameTemplate != null && fileExists(fakeFileNamePrefix+iteration+fakeFileNameSuffix)) {
      String fakeFileName = fakeFileNamePrefix+iteration+fakeFileNameSuffix;
      println("Not running decoder; using " + fakeFileName + " instead.",1);
/*
      if (fakeFileName.endsWith(".gz")) {
        copyFile(fakeFileName,decoderOutFileName+".gz");
        gunzipFile(decoderOutFileName+".gz");
      } else {
        copyFile(fakeFileName,decoderOutFileName);
      }
*/
      retSA[0] = fakeFileName;
      retSA[1] = "2";

    } else if (decoderCommand == null) {

      if (myDecoder == null) {
        println("Loading Joshua decoder...",1);
        myDecoder = new JoshuaDecoder(decoderConfigFileName+".ZMERT.orig");
        println("...finished loading @ " + (new Date()),1);
        println("");
      }

      println("Running Joshua decoder on source file " + sourceFileName + "...",1);
//      myDecoder.initialize(decoderConfigFileName);
      double[] zeroBased_lambda = new double[numParams];
      System.arraycopy(lambda,1,zeroBased_lambda,0,numParams);
      myDecoder.changeBaselineFeatureWeights(zeroBased_lambda);
      myDecoder.decodeTestSet(sourceFileName, decoderOutFileName);

      retSA[0] = decoderOutFileName;
      retSA[1] = "3";

    } else {
      println("Running external decoder...",1);

      try {
        Runtime rt = Runtime.getRuntime();
        String cmd = decoderCommandFileName;
        if (passIterationToDecoder == 1) {
          cmd = cmd + " " + iteration;
        }
        Process p = rt.exec(cmd);

        StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), decVerbosity);
        StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), decVerbosity);

        errorGobbler.start();
        outputGobbler.start();

        int decStatus = p.waitFor();
        if (decStatus != validDecoderExitValue) {
          println("Call to decoder returned " + decStatus
                + "; was expecting " + validDecoderExitValue + ".");
          System.exit(30);
        }
      } catch (IOException e) {
        System.err.println("IOException in MertCore.run_decoder(int): " + e.getMessage());
        System.exit(99902);
      } catch (InterruptedException e) {
        System.err.println("InterruptedException in MertCore.run_decoder(int): " + e.getMessage());
        System.exit(99903);
      }

      retSA[0] = decoderOutFileName;
      retSA[1] = "1";

    }

    return retSA;

  }

  private void produceTempFiles(String nbestFileName, int iteration)
  {
    try {
      String sentsFileName = tmpDirPrefix+"temp.sents.it"+iteration;
      String featsFileName = tmpDirPrefix+"temp.feats.it"+iteration;

      FileOutputStream outStream_sents = new FileOutputStream(sentsFileName, false);
      OutputStreamWriter outStreamWriter_sents = new OutputStreamWriter(outStream_sents, "utf8");
      BufferedWriter outFile_sents = new BufferedWriter(outStreamWriter_sents);

      PrintWriter outFile_feats = new PrintWriter(featsFileName);


      InputStream inStream_nbest = null;
      if (nbestFileName.endsWith(".gz")) {
        inStream_nbest = new GZIPInputStream(new FileInputStream(nbestFileName));
      } else {
        inStream_nbest = new FileInputStream(nbestFileName);
      }
      BufferedReader inFile_nbest = new BufferedReader(new InputStreamReader(inStream_nbest, "utf8"));

      String line; //, prevLine;
      String candidate_str = "";
      String feats_str = "";

      int i = 0; int n = 0;
      line = inFile_nbest.readLine();

      while (line != null) {

/*
line format:

i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val .*

*/

        // in a well formed file, we'd find the nth candidate for the ith sentence

        int read_i = Integer.parseInt((line.substring(0,line.indexOf("|||"))).trim());

        if (read_i != i) {
          writeLine("||||||",outFile_sents);
          outFile_feats.println("||||||");
          n = 0; ++i;
        }

        line = (line.substring(line.indexOf("|||")+3)).trim(); // get rid of initial text

        candidate_str = (line.substring(0,line.indexOf("|||"))).trim();
        feats_str = (line.substring(line.indexOf("|||")+3)).trim();
          // get rid of candidate string

        int junk_i = feats_str.indexOf("|||");
        if (junk_i >= 0) {
          feats_str = (feats_str.substring(0,junk_i)).trim();
        }

        writeLine(normalize(candidate_str,textNormMethod), outFile_sents);
        outFile_feats.println(feats_str);

        ++n;
        if (n == sizeOfNBest) {
          writeLine("||||||",outFile_sents);
          outFile_feats.println("||||||");
          n = 0; ++i;
        }

        line = inFile_nbest.readLine();
      }

      if (i != numSentences) { // last sentence had too few candidates
        writeLine("||||||",outFile_sents);
        outFile_feats.println("||||||");
      }

      inFile_nbest.close();
      outFile_sents.close();
      outFile_feats.close();

      if (compressFiles == 1) {
        gzipFile(sentsFileName);
        gzipFile(featsFileName);
      }

    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.produceTempFiles(int): " + e.getMessage());
      System.exit(99901);
    } catch (IOException e) {
      System.err.println("IOException in MertCore.produceTempFiles(int): " + e.getMessage());
      System.exit(99902);
    }

  }

  private void createConfigFile(double[] params, String cfgFileName, String templateFileName)
  {
    try {
      // i.e. create cfgFileName, which is similar to templateFileName, but with
      // params[] as parameter values

      BufferedReader inFile = new BufferedReader(new FileReader(templateFileName));
      PrintWriter outFile = new PrintWriter(cfgFileName);

      String line = inFile.readLine();

      while (line != null) {
        int c_match = -1;
        for (int c = 1; c <= numParams; ++c) {
          if (line.startsWith(paramNames[c] + " ")) { c_match = c; break; }
        }

        if (c_match == -1) {
          outFile.println(line);
        } else {
          outFile.println(paramNames[c_match] + " " + params[c_match]);
        }

        line = inFile.readLine();
      }

      inFile.close();
      outFile.close();
    } catch (IOException e) {
      System.err.println("IOException in MertCore.createConfigFile(double[],String,String): " + e.getMessage());
      System.exit(99902);
    }
  }

  private void processParamFile()
  {
    // process parameter file
    Scanner inFile_init = null;
    try {
      inFile_init = new Scanner(new FileReader(paramsFileName));
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.processParamFile(): " + e.getMessage());
      System.exit(99901);
    }

    String dummy = "";

    // initialize lambda[] and other related arrays
    for (int c = 1; c <= numParams; ++c) {
      // skip parameter name
      while (!dummy.equals("|||")) { dummy = inFile_init.next(); }

      // read default value
      lambda[c] = inFile_init.nextDouble();
      defaultLambda[c] = lambda[c];

      // read isOptimizable
      dummy = inFile_init.next();
      if (dummy.equals("Opt")) { isOptimizable[c] = true; }
      else if (dummy.equals("Fix")) { isOptimizable[c] = false; }
      else {
        println("Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
        System.exit(21);
      }

      if (!isOptimizable[c]) { // skip next four values
        dummy = inFile_init.next();
        dummy = inFile_init.next();
        dummy = inFile_init.next();
        dummy = inFile_init.next();
      } else {
        // set minThValue[c] and maxThValue[c] (range for thresholds to investigate)
        dummy = inFile_init.next();
        if (dummy.equals("-Inf")) { minThValue[c] = NegInf; }
        else if (dummy.equals("+Inf")) {
          println("minThValue[" + c + "] cannot be +Inf!");
          System.exit(21);
        } else { minThValue[c] = Double.parseDouble(dummy); }

        dummy = inFile_init.next();
        if (dummy.equals("-Inf")) {
          println("maxThValue[" + c + "] cannot be -Inf!");
          System.exit(21);
        } else if (dummy.equals("+Inf")) { maxThValue[c] = PosInf; }
        else { maxThValue[c] = Double.parseDouble(dummy); }

        // set minRandValue[c] and maxRandValue[c] (range for random values)
        dummy = inFile_init.next();
        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
          println("minRandValue[" + c + "] cannot be -Inf or +Inf!");
          System.exit(21);
        } else { minRandValue[c] = Double.parseDouble(dummy); }

        dummy = inFile_init.next();
        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
          println("maxRandValue[" + c + "] cannot be -Inf or +Inf!");
          System.exit(21);
        } else { maxRandValue[c] = Double.parseDouble(dummy); }

  
        // check for illogical values
        if (minThValue[c] > maxThValue[c]) {
          println("minThValue[" + c + "]=" + minThValue[c]
                + " > " + maxThValue[c] + "=maxThValue[" + c + "]!");
          System.exit(21);
        }
        if (minRandValue[c] > maxRandValue[c]) {
          println("minRandValue[" + c + "]=" + minRandValue[c]
                + " > " + maxRandValue[c] + "=maxRandValue[" + c + "]!");
          System.exit(21);
        }

        // check for odd values
        if (!(minThValue[c] <= lambda[c] && lambda[c] <= maxThValue[c])) {
          println("Warning: lambda[" + c + "] has initial value (" + lambda[c] + ")",1);
          println("         that is outside its critical value range "
                + "[" + minThValue[c] + "," + maxThValue[c] + "]",1);
        }

        if (minThValue[c] == maxThValue[c]) {
          println("Warning: lambda[" + c + "] has "
                + "minThValue = maxThValue = " + minThValue[c] + ".",1);
        }

        if (minRandValue[c] == maxRandValue[c]) {
          println("Warning: lambda[" + c + "] has "
                + "minRandValue = maxRandValue = " + minRandValue[c] + ".",1);
        }

        if (minRandValue[c] < minThValue[c] || minRandValue[c] > maxThValue[c]
         || maxRandValue[c] < minThValue[c] || maxRandValue[c] > maxThValue[c]) {
          println("Warning: The random value range for lambda[" + c + "] is not contained",1);
          println("         within its critical value range.",1);
        }

      } // if (!isOptimizable[c])

/*
      precision[c] = inFile_init.nextDouble();
      if (precision[c] < 0) {
        println("precision[" + c + "]=" + precision[c] + " < 0!  Must be non-negative.");
        System.exit(21);
      }
*/

    }

    // set normalizationOptions[]
    String origLine = "";
    while (origLine != null && origLine.length() == 0) { origLine = inFile_init.nextLine(); }


    // How should a lambda[] vector be normalized (before decoding)?
    //   nO[0] = 0: no normalization
    //   nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
    //   nO[0] = 2: scale so that the maximum absolute value is nO[1]
    //   nO[0] = 3: scale so that the minimum absolute value is nO[1]
    //   nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]

// normalization = none
// normalization = absval 1 lm
// normalization = maxabsval 1
// normalization = minabsval 1
// normalization = LNorm 2 1

    dummy = (origLine.substring(origLine.indexOf("=")+1)).trim();
    String[] dummyA = dummy.split("\\s+");

    if (dummyA[0].equals("none")) {
      normalizationOptions[0] = 0;
    } else if (dummyA[0].equals("absval")) {
      normalizationOptions[0] = 1;
      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
      String pName = dummyA[2];
      for (int i = 3; i < dummyA.length; ++i) { // in case parameter name has multiple words
        pName = pName + " " + dummyA[i];
      }
      normalizationOptions[2] = c_fromParamName(pName);;

      if (normalizationOptions[1] <= 0) {
        println("Value for the absval normalization method must be positive.");
        System.exit(21);
      }
      if (normalizationOptions[2] == 0) {
        println("Unrecognized feature name " + normalizationOptions[2]
              + " for absval normalization method.",1);
        System.exit(21);
      }
    } else if (dummyA[0].equals("maxabsval")) {
      normalizationOptions[0] = 2;
      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
      if (normalizationOptions[1] <= 0) {
        println("Value for the maxabsval normalization method must be positive.");
        System.exit(21);
      }
    } else if (dummyA[0].equals("minabsval")) {
      normalizationOptions[0] = 3;
      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
      if (normalizationOptions[1] <= 0) {
        println("Value for the minabsval normalization method must be positive.");
        System.exit(21);
      }
    } else if (dummyA[0].equals("LNorm")) {
      normalizationOptions[0] = 4;
      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
      normalizationOptions[2] = Double.parseDouble(dummyA[2]);
      if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
        println("Both values for the LNorm normalization method must be positive.");
        System.exit(21);
      }
    } else {
      println("Unrecognized normalization method " + dummyA[0] + "; "
            + "must be one of none, absval, maxabsval, and LNorm.");
      System.exit(21);
    } // if (dummyA[0])

    inFile_init.close();
  }

  private void processDocInfo()
  {
    // sets numDocuments and docOfSentence[]
    docOfSentence = new int[numSentences];

    if (docInfoFileName == null) {
      for (int i = 0; i < numSentences; ++i) docOfSentence[i] = 0;
      numDocuments = 1;
    } else {

      try {

        // 4 possible formats:
        //   1) List of numbers, one per document, indicating # sentences in each document.
        //   2) List of "docName size" pairs, one per document, indicating name of document and # sentences.
        //   3) List of docName's, one per sentence, indicating which doument each sentence belongs to.
        //   4) List of docName_number's, one per sentence, indicating which doument each sentence belongs to,
        //      and its order in that document. (can also use '-' instead of '_')

        int docInfoSize = countNonEmptyLines(docInfoFileName);

        if (docInfoSize < numSentences) { // format #1 or #2
          numDocuments = docInfoSize;
          int i = 0;

          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
          String line = inFile.readLine();
          boolean format1 = (!(line.contains(" ")));

          for (int doc = 0; doc < numDocuments; ++doc) {

            if (doc != 0) line = inFile.readLine();

            int docSize = 0;
            if (format1) {
              docSize = Integer.parseInt(line);
            } else {
              docSize = Integer.parseInt(line.split("\\s+")[1]);
            }

            for (int i2 = 1; i2 <= docSize; ++i2) {
              docOfSentence[i] = doc;
              ++i;
            }

          }

          // now i == numSentences

          inFile.close();

        } else if (docInfoSize == numSentences) { // format #3 or #4

          boolean format3 = false;

          HashSet<String> seenStrings = new HashSet<String>();
          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
          for (int i = 0; i < numSentences; ++i) {
            // set format3 = true if a duplicate is found
            String line = inFile.readLine();
            if (seenStrings.contains(line)) format3 = true;
            seenStrings.add(line);
          }

          inFile.close();

          HashSet<String> seenDocNames = new HashSet<String>();
          HashMap<String,Integer> docOrder = new HashMap<String,Integer>();
            // maps a document name to the order (0-indexed) in which it was seen

          inFile = new BufferedReader(new FileReader(docInfoFileName));
          for (int i = 0; i < numSentences; ++i) {
            String line = inFile.readLine();

            String docName = "";
            if (format3) {
              docName = line;
            } else {
              int sep_i = Math.max(line.lastIndexOf('_'),line.lastIndexOf('-'));
              docName = line.substring(0,sep_i);
            }

            if (!seenDocNames.contains(docName)) {
              seenDocNames.add(docName);
              docOrder.put(docName,seenDocNames.size()-1);
            }

            int docOrder_i = docOrder.get(docName);

            docOfSentence[i] = docOrder_i;

          }

          inFile.close();

          numDocuments = seenDocNames.size();

        } else { // badly formatted

        }

      } catch (FileNotFoundException e) {
        System.err.println("FileNotFoundException in MertCore.processDocInfo(): " + e.getMessage());
        System.exit(99901);
      } catch (IOException e) {
        System.err.println("IOException in MertCore.processDocInfo(): " + e.getMessage());
        System.exit(99902);
      }
    }

  }

  private boolean copyFile(String origFileName, String newFileName)
  {
    try {
      File inputFile = new File(origFileName);
      File outputFile = new File(newFileName);

      InputStream in = new FileInputStream(inputFile);
      OutputStream out = new FileOutputStream(outputFile);

      byte[] buffer = new byte[1024];
      int len;
      while ((len = in.read(buffer)) > 0){
        out.write(buffer, 0, len);
      }
      in.close();
      out.close();

/*
      InputStream inStream = new FileInputStream(new File(origFileName));
      BufferedReader inFile = new BufferedReader(new InputStreamReader(inStream, "utf8"));

      FileOutputStream outStream = new FileOutputStream(newFileName, false);
      OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
      BufferedWriter outFile = new BufferedWriter(outStreamWriter);

      String line;
      while(inFile.ready()) {
        line = inFile.readLine();
        writeLine(line, outFile);
      }

      inFile.close();
      outFile.close();
*/
      return true;
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.copyFile(String,String): " + e.getMessage());
      return false;
    } catch (IOException e) {
      System.err.println("IOException in MertCore.copyFile(String,String): " + e.getMessage());
      return false;
    }
  }

  private void renameFile(String origFileName, String newFileName)
  {
    if (fileExists(origFileName)) {
      deleteFile(newFileName);
      File oldFile = new File(origFileName);
      File newFile = new File(newFileName);
      if (!oldFile.renameTo(newFile)) {
        println("Warning: attempt to rename " + origFileName + " to " + newFileName + " was unsuccessful!",1);
      }
    } else {
      println("Warning: file " + origFileName + " does not exist! (in MertCore.renameFile)",1);
    }
  }

  private void deleteFile(String fileName)
  {
    if (fileExists(fileName)) {
      File fd = new File(fileName);
      if (!fd.delete()) {
        println("Warning: attempt to delete " + fileName + " was unsuccessful!",1);
      }
    }
  }

  private void writeLine(String line, BufferedWriter writer) throws IOException
  {
    writer.write(line, 0, line.length());
    writer.newLine();
    writer.flush();
  }

  public void finish()
  {
    if (myDecoder != null) {
      myDecoder.cleanUp();
    }

    // create config file with final values
    createConfigFile(lambda, decoderConfigFileName+".ZMERT.final",decoderConfigFileName+".ZMERT.orig");

    // delete current decoder config file and decoder output
    deleteFile(decoderConfigFileName);
    deleteFile(decoderOutFileName);

    // restore original name for config file (name was changed
    // in initialize() so it doesn't get overwritten)
    renameFile(decoderConfigFileName+".ZMERT.orig",decoderConfigFileName);

    if (finalLambdaFileName != null) {
      try {
        PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
        for (int c = 1; c <= numParams; ++c) {
          outFile_lambdas.println(paramNames[c] + " ||| " + lambda[c]);
        }
        outFile_lambdas.close();

      } catch (IOException e) {
        System.err.println("IOException in MertCore.finish(): " + e.getMessage());
        System.exit(99902);
      }
    }

  }

  private String[] cfgFileToArgsArray(String fileName)
  {
    checkFile(fileName);

    Vector<String> argsVector = new Vector<String>();

    BufferedReader inFile = null;
    try {
      inFile = new BufferedReader(new FileReader(fileName));
      String line, origLine;
      do {
        line = inFile.readLine();
        origLine = line; // for error reporting purposes

        if (line != null && line.length() > 0 && line.charAt(0) != '#') {

          if (line.indexOf("#") != -1) { // discard comment
            line = line.substring(0,line.indexOf("#"));
          }

          line = line.trim();

          // now line should look like "-xxx XXX"

          String[] paramA = line.split("\\s+");

          if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
            argsVector.add(paramA[0]);
            argsVector.add(paramA[1]);
          } else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet") || paramA[0].equals("-damianos"))) {
            // -m (metricName), -docSet, and -damianos are allowed to have extra optinos
            for (int opt = 0; opt < paramA.length; ++opt) { argsVector.add(paramA[opt]); }
          } else {
            println("Malformed line in config file:");
            println(origLine);
            System.exit(70);
          }

        }
      }  while (line != null);

      inFile.close();
    } catch (FileNotFoundException e) {
      println("Z-MERT configuration file " + fileName + " was not found!");
      System.err.println("FileNotFoundException in MertCore.cfgFileToArgsArray(String): " + e.getMessage());
      System.exit(99901);
    } catch (IOException e) {
      System.err.println("IOException in MertCore.cfgFileToArgsArray(String): " + e.getMessage());
      System.exit(99902);
    }

    String[] argsArray = new String[argsVector.size()];

    for (int i = 0; i < argsVector.size(); ++i) {
      argsArray[i] = argsVector.elementAt(i);
    }

    return argsArray;
  }

  private void processArgsArray(String[] args)
  {
    processArgsArray(args,true);
  }

  private void processArgsArray(String[] args, boolean firstTime) {
	/* set default values */
	// Relevant files
	dirPrefix = null;
	sourceFileName = null;
	refFileName = "reference.txt";
	refsPerSen = 1;
	textNormMethod = 1;
	paramsFileName = "params.txt";
	docInfoFileName = null;
	finalLambdaFileName = null;
	// MERT specs
	metricName = "BLEU";
	metricName_display = metricName;
	metricOptions = new String[2];
	metricOptions[0] = "4";
	metricOptions[1] = "closest";
	docSubsetInfo = new int[7];
	docSubsetInfo[0] = 0;
	maxMERTIterations = 20;
	prevMERTIterations = 20;
	minMERTIterations = 5;
	stopMinIts = 3;
	stopSigValue = -1;
//
//	/* possibly other early stopping criteria here */
//
	numOptThreads = 1;
	saveInterFiles = 3;
	compressFiles = 0;
	initsPerIt = 20;
	oneModificationPerIteration = false;
	randInit = false;
	seed = System.currentTimeMillis();
//	useDisk = 2;
	// Decoder specs
	decoderCommandFileName = null;
	passIterationToDecoder = 0;
	decoderOutFileName = "output.nbest";
	validDecoderExitValue = 0;
	decoderConfigFileName = "dec_cfg.txt";
	sizeOfNBest = 100;
	fakeFileNameTemplate = null;
	fakeFileNamePrefix = null;
	fakeFileNameSuffix = null;
	// Output specs
	verbosity = 1;
	decVerbosity = 0;
	
	damianos_method = 0;
	damianos_param = 0.0;
	damianos_mult = 0.0;
	
	int i = 0;
	
	while (i < args.length) {
		String option = args[i];
		// Relevant files
		if (option.equals("-dir")) { dirPrefix = args[i+1];
		} else if (option.equals("-s")) { sourceFileName = args[i+1];
		} else if (option.equals("-r")) { refFileName = args[i+1];
		} else if (option.equals("-rps")) {
			refsPerSen = Integer.parseInt(args[i+1]);
			if (refsPerSen < 1) {
				println("refsPerSen must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-txtNrm")) {
			textNormMethod = Integer.parseInt(args[i+1]);
			if (textNormMethod < 0 || textNormMethod > 4) {
				println("textNormMethod should be between 0 and 4");
				System.exit(10);
			}
		} else if (option.equals("-p")) {
			paramsFileName = args[i+1];
		} else if (option.equals("-docInfo")) {
			docInfoFileName = args[i+1];
		} else if (option.equals("-fin")) { finalLambdaFileName = args[i+1];
			// MERT specs
		} else if (option.equals("-m")) {
			metricName = args[i+1];
			metricName_display = metricName;
			if (EvaluationMetric.knownMetricName(metricName)) {
				int optionCount = EvaluationMetric.metricOptionCount(metricName);
				metricOptions = new String[optionCount];
				for (int opt = 0; opt < optionCount; ++opt) {
					metricOptions[opt] = args[i+opt+2];
				}
				i += optionCount;
			} else {
				println("Unknown metric name " + metricName + ".");
				System.exit(10);
			}
		} else if (option.equals("-docSet")) {
			String method = args[i+1];

			if (method.equals("all")) {
				docSubsetInfo[0] = 0;
				i += 0;
			} else if (method.equals("bottom")) {
				String a = args[i+2];
				if (a.endsWith("d")) {
					docSubsetInfo[0] = 1;
					a = a.substring(0,a.indexOf("d"));
				} else {
					docSubsetInfo[0] = 2;
					a = a.substring(0,a.indexOf("%"));
				}
				docSubsetInfo[5] = Integer.parseInt(a);
				i += 1;
			} else if (method.equals("top")) {
				String a = args[i+2];
				if (a.endsWith("d")) {
					docSubsetInfo[0] = 3;
					a = a.substring(0,a.indexOf("d"));
				} else {
					docSubsetInfo[0] = 4;
					a = a.substring(0,a.indexOf("%"));
				}
				docSubsetInfo[5] = Integer.parseInt(a);
				i += 1;
			} else if (method.equals("window")) {
				String a1 = args[i+2];
				a1 = a1.substring(0,a1.indexOf("d")); // size of window
				String a2 = args[i+4];
				if (a2.indexOf("p") > 0) {
					docSubsetInfo[0] = 5;
					a2 = a2.substring(0,a2.indexOf("p"));
				} else {
					docSubsetInfo[0] = 6;
					a2 = a2.substring(0,a2.indexOf("r"));
				}
				docSubsetInfo[5] = Integer.parseInt(a1);
				docSubsetInfo[6] = Integer.parseInt(a2);
				i += 3;
			} else {
				println("Unknown docSet method " + method + ".");
				System.exit(10);
			}
		} else if (option.equals("-maxIt")) {
			maxMERTIterations = Integer.parseInt(args[i+1]);
			if (maxMERTIterations < 1) {
				println("maxMERTIts must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-minIt")) {
			minMERTIterations = Integer.parseInt(args[i+1]);
			if (minMERTIterations < 1) {
				println("minMERTIts must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-prevIt")) {
			prevMERTIterations = Integer.parseInt(args[i+1]);
			if (prevMERTIterations < 0) {
				println("prevMERTIts must be non-negative.");
				System.exit(10);
			}
		} else if (option.equals("-stopIt")) {
			stopMinIts = Integer.parseInt(args[i+1]);
			if (stopMinIts < 1) {
				println("stopMinIts must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-stopSig")) {
			stopSigValue = Double.parseDouble(args[i+1]);
		}
//
//	/* possibly other early stopping criteria here */
//
		else if (option.equals("-thrCnt")) {
			numOptThreads = Integer.parseInt(args[i+1]);
			if (numOptThreads < 1) {
				println("threadCount must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-save")) {
			saveInterFiles = Integer.parseInt(args[i+1]);
			if (saveInterFiles < 0 || saveInterFiles > 3) {
				println("save should be between 0 and 3");
				System.exit(10);
			}
		} else if (option.equals("-compress")) {
			compressFiles = Integer.parseInt(args[i+1]);
			if (compressFiles < 0 || compressFiles > 1) {
				println("compressFiles should be either 0 or 1"); 
				System.exit(10);
			}
		} else if (option.equals("-ipi")) {
			initsPerIt = Integer.parseInt(args[i+1]);
			if (initsPerIt < 1) {
				println("initsPerIt must be positive.");
				System.exit(10);
			}
		} else if (option.equals("-opi")) {
			int opi = Integer.parseInt(args[i+1]);
			if (opi == 1) {
				oneModificationPerIteration = true;
			} else if (opi == 0) {
				oneModificationPerIteration = false;
			} else {
				println("oncePerIt must be either 0 or 1.");
				System.exit(10);
			}
		} else if (option.equals("-rand")) {
			int rand = Integer.parseInt(args[i+1]);
			if (rand == 1) {
				randInit = true;
			} else if (rand == 0) {
				randInit = false;
			} else {
				println("randInit must be either 0 or 1.");
				System.exit(10);
			}
		} else if (option.equals("-seed")) {
			if (args[i+1].equals("time")) {
				seed = System.currentTimeMillis();
			} else {
				seed = Long.parseLong(args[i+1]);
			}
		}
/*
		else if (option.equals("-ud")) {
			useDisk = Integer.parseInt(args[i+1]);
			if (useDisk < 0 || useDisk > 2) {
				println("useDisk should be between 0 and 2");
				System.exit(10);
			}
		}
*/
		// Decoder specs
		else if (option.equals("-cmd")) {
			decoderCommandFileName = args[i+1];
		} else if (option.equals("-passIt")) {
			passIterationToDecoder = Integer.parseInt(args[i+1]);
			if (passIterationToDecoder < 0 || passIterationToDecoder > 1) {
				println("passIterationToDecoder should be either 0 or 1"); 
				System.exit(10);
			}
		} else if (option.equals("-decOut")) {
			decoderOutFileName = args[i+1];
		} else if (option.equals("-decExit")) {
			validDecoderExitValue = Integer.parseInt(args[i+1]);
		} else if (option.equals("-dcfg")) {
			decoderConfigFileName = args[i+1];
		} else if (option.equals("-N")) {
			sizeOfNBest = Integer.parseInt(args[i+1]);
			if (sizeOfNBest < 1) {
				println("N must be positive.");
				System.exit(10);
			}
		}
		// Output specs
		else if (option.equals("-v")) {
			verbosity = Integer.parseInt(args[i+1]);
			if (verbosity < 0 || verbosity > 4) {
				println("verbosity should be between 0 and 4");
				System.exit(10);
			}
		} else if (option.equals("-decV")) {
			decVerbosity = Integer.parseInt(args[i+1]);
			if (decVerbosity < 0 || decVerbosity > 1) {
				println("decVerbosity should be either 0 or 1"); 
				System.exit(10);
			}
		} else if (option.equals("-fake")) {
			fakeFileNameTemplate = args[i+1];
			int QM_i = fakeFileNameTemplate.indexOf("?");
			if (QM_i <= 0) {
				println("fakeFileNameTemplate must contain '?' to indicate position of iteration number");
				System.exit(10);
			}
			fakeFileNamePrefix = fakeFileNameTemplate.substring(0,QM_i);
			fakeFileNameSuffix = fakeFileNameTemplate.substring(QM_i+1);
		} else if (option.equals("-damianos")) {
			damianos_method = Integer.parseInt(args[i+1]);
			if (damianos_method < 0 || damianos_method > 3) {
				println("damianos_method should be between 0 and 3");
				System.exit(10);
			}
			damianos_param = Double.parseDouble(args[i+2]);
			damianos_mult = Double.parseDouble(args[i+3]);
			i += 2;
		} else {
			println("Unknown option " + option);
			System.exit(10);
		}
		
		i += 2;
		
	} // while (i)

    if (maxMERTIterations < minMERTIterations) {

      if (firstTime)
        println("Warning: maxMERTIts is smaller than minMERTIts; "
              + "decreasing minMERTIts from " + minMERTIterations + " to maxMERTIts "
              + "(i.e. " + maxMERTIterations + ").",1);

      minMERTIterations = maxMERTIterations;
    }

    if (dirPrefix != null) { // append dirPrefix to file names
      refFileName = fullPath(dirPrefix,refFileName);
      decoderOutFileName = fullPath(dirPrefix,decoderOutFileName);
      paramsFileName = fullPath(dirPrefix,paramsFileName);
      decoderConfigFileName = fullPath(dirPrefix,decoderConfigFileName);

      if (sourceFileName != null) { sourceFileName = fullPath(dirPrefix,sourceFileName); }
      if (docInfoFileName != null) { docInfoFileName = fullPath(dirPrefix,docInfoFileName); }
      if (finalLambdaFileName != null) { finalLambdaFileName = fullPath(dirPrefix,finalLambdaFileName); }
      if (decoderCommandFileName != null) { decoderCommandFileName = fullPath(dirPrefix,decoderCommandFileName); }
      if (fakeFileNamePrefix != null) { fakeFileNamePrefix = fullPath(dirPrefix,fakeFileNamePrefix); }
    }

    // TODO: make this an argument
    // TODO: also use this for the state file? could be tricky, since that file is created by ZMERT.java
    // TODO: change name from tmpDirPrefix to tmpFilePrefix?
    int k = decoderOutFileName.lastIndexOf("/");
    if (k >= 0) {
      tmpDirPrefix = decoderOutFileName.substring(0,k+1) + "ZMERT.";
    } else {
      tmpDirPrefix = "ZMERT.";
    }
    println("tmpDirPrefix: " + tmpDirPrefix);

    checkFile(paramsFileName);
    checkFile(decoderConfigFileName);

    boolean canRunCommand = fileExists(decoderCommandFileName);
    if (decoderCommandFileName != null && !canRunCommand) {
      // i.e. a decoder command file was specified, but it was not found
      if (firstTime)
        println("Warning: specified decoder command file "
              + decoderCommandFileName + " was not found.",1);
    }
    boolean canRunJoshua = fileExists(sourceFileName);
    if (sourceFileName != null && !canRunJoshua) {
      // i.e. a source file was specified, but it was not found
      if (firstTime)
        println("Warning: specified source file "
              + sourceFileName + " was not found.",1);
    }
    boolean canRunFake = (fakeFileNameTemplate != null);

    if (!canRunCommand && !canRunJoshua) { // can only run fake decoder

      if (!canRunFake) {
        println("Z-MERT cannot decode; must provide one of: command file (for external decoder),");
        println("                                           source file (for Joshua decoder),");
        println("                                        or prefix for existing output files (for fake decoder).");
        System.exit(12);
      }

      int lastGoodIt = 0;
      for (int it = 1; it <= maxMERTIterations; ++it) {
        if (fileExists(fakeFileNamePrefix+it+fakeFileNameSuffix)) {
          lastGoodIt = it;
        } else {
          break; // from for (it) loop
        }
      }

      if (lastGoodIt == 0) {
        println("Fake decoder cannot find first output file " + (fakeFileNamePrefix+1+fakeFileNameSuffix));
        System.exit(13);
      } else if (lastGoodIt < maxMERTIterations) {
        if (firstTime)
          println("Warning: can only run fake decoder; existing output files "
                + "are only available for the first " + lastGoodIt + " iteration(s).",1);
      }

    }



    if (refsPerSen > 1) {
      // the provided refFileName might be a prefix
      File dummy = new File(refFileName);
      if (!dummy.exists()) {
        refFileName = createUnifiedRefFile(refFileName,refsPerSen);
      }
    } else {
      checkFile(refFileName);
    }


    if (firstTime) {
      println("Processed the following args array:",1);
      print("  ",1);
      for (i = 0; i < args.length; ++i) {
        print(args[i] + " ",1);
      }
      println("",1);
      println("",1);
    }

  } // processArgs(String[] args)

  private void set_docSubsetInfo(int[] info)
  {

/*
1: -docSet bottom 8d
2: -docSet bottom 25%				the bottom ceil(0.20*numDocs) documents
3: -docSet top 8d
4: -docSet top 25%					the top ceil(0.20*numDocs) documents

5: -docSet window 11d around 90percentile		11 docs centered around 80th percentile
												(complain if not enough docs; don't adjust)
6: -docSet window 11d around 40rank				11 docs centered around doc ranked 50
		                						(complain if not enough docs; don't adjust)


[0]: method (0-6)
[1]: first (1-indexed)
[2]: last (1-indexed)
[3]: size
[4]: center
[5]: arg1 (-1 for method 0)
[6]: arg2 (-1 for methods 0-4)
*/
    if (info[0] == 0) { // all
      info[1] = 1;
      info[2] = numDocuments;
      info[3] = numDocuments;
      info[4] = (info[1] + info[2]) / 2;
    } if (info[0] == 1) { // bottom d
      info[3] = info[5];
      info[2] = numDocuments;
      info[1] = numDocuments - info[3] + 1;
      info[4] = (info[1] + info[2]) / 2;
    } if (info[0] == 2) { // bottom p
      info[3] = (int)(Math.ceil((info[5]/100.0) * numDocuments));
      info[2] = numDocuments;
      info[1] = numDocuments - info[3] + 1;
      info[4] = (info[1] + info[2]) / 2;
    } if (info[0] == 3) { // top d
      info[3] = info[5];
      info[1] = 1;
      info[2] = info[3];
      info[4] = (info[1] + info[2]) / 2;
    } if (info[0] == 4) { // top p
      info[3] = (int)(Math.ceil((info[5]/100.0) * numDocuments));
      info[1] = 1;
      info[2] = info[3];
      info[4] = (info[1] + info[2]) / 2;
    } if (info[0] == 5) { // window around percentile
      info[3] = info[5];
      info[4] = (int)(Math.floor((info[6]/100.0) * numDocuments));
      info[1] = info[4] - ((info[3]-1) / 2);
      info[2] = info[4] + ((info[3]-1) / 2);
    } if (info[0] == 6) { // window around rank
      info[3] = info[5];
      info[4] = info[6];
      info[1] = info[4] - ((info[3]-1) / 2);
      info[2] = info[4] + ((info[3]-1) / 2);
    }

  }

  private void checkFile(String fileName)
  {
    if (!fileExists(fileName)) {
      println("The file " + fileName + " was not found!");
      System.exit(40);
    }
  }

  private boolean fileExists(String fileName)
  {
    if (fileName == null) return false;
    File checker = new File(fileName);
    return checker.exists();
  }

  private void gzipFile(String inputFileName)
  {
    gzipFile(inputFileName, inputFileName + ".gz");
  }

  private void gzipFile(String inputFileName, String gzippedFileName)
  {
    // NOTE: this will delete the original file

    try {
      FileInputStream in = new FileInputStream(inputFileName);
      GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(gzippedFileName));

      byte[] buffer = new byte[4096];
      int len;
      while ((len = in.read(buffer)) > 0) {
        out.write(buffer, 0, len);
      }

      in.close();
      out.finish();
      out.close();

      deleteFile(inputFileName);

    } catch (IOException e) {
      System.err.println("IOException in MertCore.gzipFile(String,String): " + e.getMessage());
      System.exit(99902);
    }
  }

  private void gunzipFile(String gzippedFileName)
  {
    if (gzippedFileName.endsWith(".gz")) {
      gunzipFile(gzippedFileName, gzippedFileName.substring(0,gzippedFileName.length()-3));
    } else {
      gunzipFile(gzippedFileName, gzippedFileName + ".dec");
    }
  }

  private void gunzipFile(String gzippedFileName, String outputFileName)
  {
    // NOTE: this will delete the original file

    try {
      GZIPInputStream in = new GZIPInputStream(new FileInputStream(gzippedFileName));
      FileOutputStream out = new FileOutputStream(outputFileName);

      byte[] buffer = new byte[4096];
      int len;
      while ((len = in.read(buffer)) > 0) {
        out.write(buffer, 0, len);
      }

      in.close();
      out.close();

      deleteFile(gzippedFileName);

    } catch (IOException e) {
      System.err.println("IOException in MertCore.gunzipFile(String,String): " + e.getMessage());
      System.exit(99902);
    }
  }

  private String createUnifiedRefFile(String prefix, int numFiles)
  {
    if (numFiles < 2) {
      println("Warning: createUnifiedRefFile called with numFiles = " + numFiles + "; "
            + "doing nothing.",1);
      return prefix;
    } else {
      File checker;
      checker = new File(prefix+"1");

      if (!checker.exists()) {
        checker = new File(prefix+".1");
        if (!checker.exists()) {
          println("Can't find reference files.");
          System.exit(50);
        } else {
          prefix = prefix + ".";
        }
      }

      String outFileName;
      if (prefix.endsWith(".")) { outFileName = prefix+"all"; }
      else { outFileName = prefix+".all"; }

      try {
        PrintWriter outFile = new PrintWriter(outFileName);

        BufferedReader[] inFile = new BufferedReader[numFiles];

        int nextIndex;
        checker = new File(prefix+"0");
        if (checker.exists()) { nextIndex = 0; }
        else { nextIndex = 1; }
        int lineCount = countLines(prefix+nextIndex);

        for (int r = 0; r < numFiles; ++r) {
          if (countLines(prefix+nextIndex) != lineCount) {
            println("Line count mismatch in " + (prefix+nextIndex) + ".");
            System.exit(60);
          }
          InputStream inStream = new FileInputStream(new File(prefix+nextIndex));
          inFile[r] = new BufferedReader(new InputStreamReader(inStream, "utf8"));
          ++nextIndex;
        }

        String line;

        for (int i = 0; i < lineCount; ++i) {
          for (int r = 0; r < numFiles; ++r) {
            line = inFile[r].readLine();
            outFile.println(line);
          }
        }

        outFile.close();

        for (int r = 0; r < numFiles; ++r) { inFile[r].close(); }
      } catch (FileNotFoundException e) {
        System.err.println("FileNotFoundException in MertCore.createUnifiedRefFile(String,int): " + e.getMessage());
        System.exit(99901);
      } catch (IOException e) {
        System.err.println("IOException in MertCore.createUnifiedRefFile(String,int): " + e.getMessage());
        System.exit(99902);
      }

      return outFileName;

    }

  } // createUnifiedRefFile(String prefix, int numFiles)

  private String normalize(String str, int normMethod)
  {
    if (normMethod == 0) return str;

    // replace HTML/SGML
    str = str.replaceAll(""","\"");
    str = str.replaceAll("&","&");
    str = str.replaceAll("<","<");
    str = str.replaceAll(">",">");
    str = str.replaceAll("'","'");



    // split on these characters:
    // ! " # $ % & ( ) * + / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
    // i.e. ASCII 33-126, except alphanumeric, and except "," "-" "." "'"

    //                 ! "#  $%&  (  )  *  +/:;<=>  ?@  [   \  ]  ^_`  {  |  }~
    String split_on = "!\"#\\$%&\\(\\)\\*\\+/:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~";

//    println("split_on: " + split_on);

    for (int k = 0; k < split_on.length(); ++k) {
      // for each split character, reprocess the string
      String regex = "" + split_on.charAt(k);
      if (regex.equals("\\")) {
        ++k;
        regex += split_on.charAt(k);
      }
      str = str.replaceAll(regex," " + regex + " ");
    }



    // split on "." and "," and "-", conditioned on proper context

    str = " " + str + " ";
    str = str.replaceAll("\\s+"," ");

    TreeSet<Integer> splitIndices = new TreeSet<Integer>();

    for (int i = 0; i < str.length(); ++i) {
      char ch = str.charAt(i);
      if (ch == '.' || ch == ',') {
        // split if either of the previous or next characters is a non-digit
        char prev_ch = str.charAt(i-1);
        char next_ch = str.charAt(i+1);
        if (prev_ch < '0' || prev_ch > '9' || next_ch < '0' || next_ch > '9') {
          splitIndices.add(i);
        }
      } else if (ch == '-') {
        // split if preceded by a digit
        char prev_ch = str.charAt(i-1);
        if (prev_ch >= '0' && prev_ch <= '9') {
          splitIndices.add(i);
        }
      }
    }

    String str0 = str;
    str = "";

    for (int i = 0; i < str0.length(); ++i) {
      if (splitIndices.contains(i)) {
        str += " " + str0.charAt(i) + " ";
      } else {
        str += str0.charAt(i);
      }
    }



    // rejoin i'm, we're, *'s, won't, don't, etc

    str = " " + str + " ";
    str = str.replaceAll("\\s+"," ");

    str = str.replaceAll(" i 'm "," i'm ");
    str = str.replaceAll(" we 're "," we're ");
    str = str.replaceAll(" 's ","'s ");
    str = str.replaceAll(" 've ","'ve ");
    str = str.replaceAll(" 'll ","'ll ");
    str = str.replaceAll(" 'd ","'d ");
    str = str.replaceAll(" n't ","n't ");



    // remove spaces around dashes
    if (normMethod == 2 || normMethod == 4) {

      TreeSet<Integer> skipIndices = new TreeSet<Integer>();
      str = " " + str + " ";

      for (int i = 0; i < str.length(); ++i) {
        char ch = str.charAt(i);
        if (ch == '-') {
          // rejoin if surrounded by spaces, and then letters
          if (str.charAt(i-1) == ' ' && str.charAt(i+1) == ' ') {
            if (Character.isLetter(str.charAt(i-2)) && Character.isLetter(str.charAt(i+2))) {
              skipIndices.add(i-1);
              skipIndices.add(i+1);
            }
          }
        }
      }

      str0 = str;
      str = "";

      for (int i = 0; i < str0.length(); ++i) {
        if (!skipIndices.contains(i)) {
          str += str0.charAt(i);
        }
      }
    }



    // drop non-ASCII characters
    if (normMethod == 3 || normMethod == 4) {

      str0 = str;
      str = "";

      for (int i = 0; i < str0.length(); ++i) {
        char ch = str0.charAt(i);
        if (ch <= 127) { // i.e. if ASCII
          str += ch;
        }
      }
    }



    str = str.replaceAll("\\s+"," ");

    str = str.trim();

    return str;
  }

  private int countLines(String fileName)
  {
    int count = 0;

    try {
      BufferedReader inFile = new BufferedReader(new FileReader(fileName));

      String line;
      do {
        line = inFile.readLine();
        if (line != null) ++count;
      }  while (line != null);

      inFile.close();
    } catch (IOException e) {
      System.err.println("IOException in MertCore.countLines(String): " + e.getMessage());
      System.exit(99902);
    }

    return count;
  }

  private int countNonEmptyLines(String fileName)
  {
    int count = 0;

    try {
      BufferedReader inFile = new BufferedReader(new FileReader(fileName));

      String line;
      do {
        line = inFile.readLine();
        if (line != null && line.length() > 0) ++count;
      }  while (line != null);

      inFile.close();
    } catch (IOException e) {
      System.err.println("IOException in MertCore.countNonEmptyLines(String): " + e.getMessage());
      System.exit(99902);
    }

    return count;
  }

  private String fullPath(String dir, String fileName)
  {
    File dummyFile = new File(dir,fileName);
    return dummyFile.getAbsolutePath();
  }

  @SuppressWarnings("unused")
  private void cleanupMemory()
  {
    cleanupMemory(100,false);
  }

  @SuppressWarnings("unused")
  private void cleanupMemorySilently()
  {
    cleanupMemory(100,true);
  }

  @SuppressWarnings("static-access")
  private void cleanupMemory(int reps, boolean silent)
  {
    int bytesPerMB = 1024 * 1024;

    long totalMemBefore = myRuntime.totalMemory();
    long freeMemBefore = myRuntime.freeMemory();
    long usedMemBefore = totalMemBefore - freeMemBefore;


    long usedCurr = usedMemBefore; long usedPrev = usedCurr;

    // perform garbage collection repeatedly, until there is no decrease in
    // the amount of used memory
    for (int i = 1; i <= reps; ++i) {
      myRuntime.runFinalization();
      myRuntime.gc();
      (Thread.currentThread()).yield();

      usedPrev = usedCurr;
      usedCurr = myRuntime.totalMemory() - myRuntime.freeMemory();

      if (usedCurr == usedPrev) break;
    }


    if (!silent) {
      long totalMemAfter = myRuntime.totalMemory();
      long freeMemAfter = myRuntime.freeMemory();
      long usedMemAfter = totalMemAfter - freeMemAfter;

      println("GC: d_used = " + ((usedMemAfter - usedMemBefore) / bytesPerMB) + " MB "
            + "(d_tot = " + ((totalMemAfter - totalMemBefore) / bytesPerMB) + " MB).",2);
    }
  }

  @SuppressWarnings("unused")
  private void printMemoryUsage()
  {
    int bytesPerMB = 1024 * 1024;
    long totalMem = myRuntime.totalMemory();
    long freeMem = myRuntime.freeMemory();
    long usedMem = totalMem - freeMem;

    println("Allocated memory: " + (totalMem / bytesPerMB) + " MB "
          + "(of which " + (usedMem / bytesPerMB) + " MB is being used).",2);
  }

  private void println(Object obj, int priority) { if (priority <= verbosity) println(obj); }
  private void print(Object obj, int priority) { if (priority <= verbosity) print(obj); }

  private void println(Object obj) { System.out.println(obj); }
  private void print(Object obj) { System.out.print(obj); }

  private void showProgress()
  {
    ++progress;
    if (progress % 100000 == 0) print(".",2);
  }

  private double[] randomLambda()
  {
    double[] retLambda = new double[1+numParams];

    for (int c = 1; c <= numParams; ++c) {
      if (isOptimizable[c]) {
        double randVal = randGen.nextDouble(); // number in [0.0,1.0]
        ++generatedRands;
        randVal = randVal * (maxRandValue[c] - minRandValue[c]); // number in [0.0,max-min]
        randVal = minRandValue[c] + randVal; // number in [min,max]
        retLambda[c] = randVal;
      } else {
        retLambda[c] = defaultLambda[c];
      }
    }

    return retLambda;
  }

  private double[] randomPerturbation(double[] origLambda, int i, double method, double param, double mult)
  {
    double sigma = 0.0;
    if (method == 1) {
      sigma = 1.0/Math.pow(i,param);
    } else if (method == 2) {
      sigma = Math.exp(-param*i);
    } else if (method == 3) {
      sigma = Math.max(0.0 , 1.0 - (i/param));
    }

    sigma = mult*sigma;

    double[] retLambda = new double[1+numParams];

    for (int c = 1; c <= numParams; ++c) {
      if (isOptimizable[c]) {
        double randVal = 2*randGen.nextDouble() - 1.0; // number in [-1.0,1.0]
        ++generatedRands;
        randVal = randVal * sigma; // number in [-sigma,sigma]
        randVal = randVal * origLambda[c]; // number in [-sigma*orig[c],sigma*orig[c]]
        randVal = randVal + origLambda[c]; // number in [orig[c]-sigma*orig[c],orig[c]+sigma*orig[c]]
                                           //         = [orig[c]*(1-sigma),orig[c]*(1+sigma)]
        retLambda[c] = randVal;
      } else {
        retLambda[c] = origLambda[c];
      }
    }

    return retLambda;
  }

  private int c_fromParamName (String pName)
  {
    for (int c = 1; c <= numParams; ++c) {
      if (paramNames[c].equals(pName)) return c;
    }
    return 0; // no parameter with that name!
  }

  private void setFeats(
    double[][][] featVal_array, int i, int[] lastUsedIndex,
    int[] maxIndex, double[] featVal)
  {
    int k = lastUsedIndex[i] + 1;

    if (k > maxIndex[i]) {
      for (int c = 1; c <= numParams; ++c) {
        double[] temp = featVal_array[c][i];
        featVal_array[c][i] = new double[1+maxIndex[i]+sizeOfNBest];

        for (int k2 = 0; k2 <= maxIndex[i]; ++k2) {
          featVal_array[c][i][k2] = temp[k2];
        }
      }
      maxIndex[i] += sizeOfNBest;
//      cleanupMemorySilently(); // UNCOMMENT THIS if cleaning up memory
    }

    for (int c = 1; c <= numParams; ++c) {
      featVal_array[c][i][k] = featVal[c];
    }
    lastUsedIndex[i] += 1;
  }

  @SuppressWarnings("unused")
  private HashSet<Integer> indicesToDiscard(double[] slope, double[] offset)
  {
    // some lines can be eliminated: the ones that have a lower offset
    // than some other line with the same slope.
    // That is, for any k1 and k2:
    //   if slope[k1] = slope[k2] and offset[k1] > offset[k2],
    //   then k2 can be eliminated.
    // (This is actually important to do as it eliminates a bug.)
//    print("discarding: ",4);

    int numCandidates = slope.length;
    HashSet<Integer> discardedIndices = new HashSet<Integer>();
    HashMap<Double,Integer> indicesOfSlopes = new HashMap<Double,Integer>();
    // maps slope to index of best candidate that has that slope.
    // ("best" as in the one with the highest offset)

    for (int k1 = 0; k1 < numCandidates; ++k1) {
      double currSlope = slope[k1];
      if (!indicesOfSlopes.containsKey(currSlope)) {
        indicesOfSlopes.put(currSlope,k1);
      } else {
        int existingIndex = indicesOfSlopes.get(currSlope);
        if (offset[existingIndex] > offset[k1]) {
          discardedIndices.add(k1);
//          print(k1 + " ",4);
        } else if (offset[k1] > offset[existingIndex]) {
          indicesOfSlopes.put(currSlope,k1);
          discardedIndices.add(existingIndex);
//          print(existingIndex + " ",4);
        }
      }
    }


    // old way of doing it; takes quadratic time (vs. linear time above)
/*
    for (int k1 = 0; k1 < numCandidates; ++k1) {
      for (int k2 = 0; k2 < numCandidates; ++k2) {
        if (k1 != k2 && slope[k1] == slope[k2] && offset[k1] > offset[k2]) {
          discardedIndices.add(k2);
//          print(k2 + " ",4);
        }
      }
    }
*/

//    println("",4);
    return discardedIndices;
  } // indicesToDiscard(double[] slope, double[] offset)

  public static void main(String[] args)
  {

    MertCore DMC = new MertCore(); // dummy MertCore object

    // if bad args[], System.exit(80)

    String configFileName = args[0];
    String stateFileName = args[1];
    int currIteration = Integer.parseInt(args[2]);


    int randsToSkip = 0;
    int earlyStop = 0;
    double FINAL_score = 0.0;
    int[] maxIndex = null;

    if (currIteration == 1) {
      EvaluationMetric.set_knownMetrics();
      DMC.processArgsArray(DMC.cfgFileToArgsArray(configFileName),true);

      randsToSkip = 0;
      DMC.initialize(randsToSkip);

      DMC.println("----------------------------------------------------",1);
      DMC.println("Z-MERT run started @ " + (new Date()),1);
//      DMC.printMemoryUsage();
      DMC.println("----------------------------------------------------",1);
      DMC.println("",1);

      if (DMC.randInit) {
        DMC.println("Initializing lambda[] randomly.",1);

        // initialize optimizable parameters randomly (sampling uniformly from
        // that parameter's random value range)
        DMC.lambda = DMC.randomLambda();
      }

      DMC.println("Initial lambda[]: " + DMC.lambdaToString(DMC.lambda),1);
      DMC.println("",1);

      FINAL_score = DMC.evalMetric.worstPossibleScore();
      maxIndex = new int[DMC.numSentences];
      for (int i = 0; i < DMC.numSentences; ++i) { maxIndex[i] = DMC.sizeOfNBest - 1; }
      earlyStop = 0;
    } else {

      EvaluationMetric.set_knownMetrics();
      DMC.processArgsArray(DMC.cfgFileToArgsArray(configFileName),false);

      double[] serA = null;
      try {
        ObjectInputStream in = new ObjectInputStream(new FileInputStream(stateFileName));
        serA = (double[])in.readObject();
        in.close();
        // contents of serA[]:
        //   (*) last iteration
        //   (*) number of random numbers generated already
        //   (*) earlyStop
        //   (*) FINAL_score
        //   (*) lambda[]
        //   (*) maxIndex[]
        // => length should be 4+numParams+numSentences
      } catch (FileNotFoundException e) {
        System.err.println("FileNotFoundException in MertCore.main(String[]): " + e.getMessage());
        System.exit(99901);
      } catch (IOException e) {
        System.err.println("IOException in MertCore.main(String[]): " + e.getMessage());
        System.exit(99902);
      } catch (ClassNotFoundException e) {
        System.err.println("ClassNotFoundException in MertCore.main(String[]): " + e.getMessage());
        System.exit(99904);
      }

      if (serA.length < 2) {
        DMC.println("State file contains an array of length " + serA.length + "; "
                  + "was expecting at least 2");
        System.exit(81);
      }

      if ((int)serA[0] != currIteration-1) {
        DMC.println("Iteration in state file is " + (int)serA[0] + "; "
                  + "was expecting " + (currIteration-1));
        System.exit(82);
      }

      randsToSkip = (int)serA[1];
      DMC.initialize(randsToSkip); // declares lambda[], sets numParams and numSentences

      if (serA.length != 4+DMC.numParams+DMC.numSentences) {
        DMC.println("State file contains an array of length " + serA.length + "; "
                  + "was expecting " + (4+DMC.numParams+DMC.numSentences));
        System.exit(83);
      }

      earlyStop = (int)serA[2];
      FINAL_score = serA[3];

      for (int c = 1; c <= DMC.numParams; ++c) { DMC.lambda[c] = serA[3+c]; }

      maxIndex = new int[DMC.numSentences];
      for (int i = 0; i < DMC.numSentences; ++i) { maxIndex[i] = (int)serA[3+DMC.numParams+1+i]; }
    }


    double[] A = DMC.run_single_iteration(currIteration, DMC.minMERTIterations,
                   DMC.maxMERTIterations, DMC.prevMERTIterations, earlyStop, maxIndex);

    if (A != null) {
      FINAL_score = A[0];
      earlyStop = (int)A[1];
      randsToSkip = DMC.generatedRands;
    }


    if (A != null && A[2] != 1) {

      double[] serA = new double[4+DMC.numParams+DMC.numSentences];
      serA[0] = currIteration;
      serA[1] = randsToSkip;
      serA[2] = earlyStop;
      serA[3] = FINAL_score;
      for (int c = 1; c <= DMC.numParams; ++c) { serA[3+c] = DMC.lambda[c]; }
      for (int i = 0; i < DMC.numSentences; ++i) { serA[3+DMC.numParams+1+i] = maxIndex[i]; }

      try {
        ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(stateFileName));
        out.writeObject(serA);
        out.flush();
        out.close();
      } catch (FileNotFoundException e) {
        System.err.println("FileNotFoundException in MertCore.main(String[]): " + e.getMessage());
        System.exit(99901);
      } catch (IOException e) {
        System.err.println("IOException in MertCore.main(String[]): " + e.getMessage());
        System.exit(99902);
      }

      System.exit(91);

    } else {
      // done

      DMC.println("",1);

      DMC.println("----------------------------------------------------",1);
      DMC.println("Z-MERT run ended @ " + (new Date()),1);
//      DMC.printMemoryUsage();
      DMC.println("----------------------------------------------------",1);
      DMC.println("",1);
      DMC.println("FINAL lambda: " + DMC.lambdaToString(DMC.lambda)
                + " (" + DMC.metricName_display + ": " + FINAL_score + ")",1);
      // check if a lambda is outside its threshold range
      for (int c = 1; c <= DMC.numParams; ++c) {
        if (DMC.lambda[c] < DMC.minThValue[c] || DMC.lambda[c] > DMC.maxThValue[c]) {
          DMC.println("Warning: after normalization, lambda[" + c + "]=" + f4.format(DMC.lambda[c])
                    + " is outside its critical value range.",1);
        }
      }
      DMC.println("",1);

      // delete intermediate .temp.*.it* decoder output files
      for (int iteration = 1; iteration <= DMC.maxMERTIterations; ++iteration) {
        if (DMC.compressFiles == 1) {
          DMC.deleteFile(DMC.tmpDirPrefix+"temp.sents.it"+iteration+".gz");
          DMC.deleteFile(DMC.tmpDirPrefix+"temp.feats.it"+iteration+".gz");
          if (DMC.fileExists(DMC.tmpDirPrefix+"temp.stats.it"+iteration+".copy.gz")) {
            DMC.deleteFile(DMC.tmpDirPrefix+"temp.stats.it"+iteration+".copy.gz");
          } else {
            DMC.deleteFile(DMC.tmpDirPrefix+"temp.stats.it"+iteration+".gz");
          }
        } else {
          DMC.deleteFile(DMC.tmpDirPrefix+"temp.sents.it"+iteration);
          DMC.deleteFile(DMC.tmpDirPrefix+"temp.feats.it"+iteration);
          if (DMC.fileExists(DMC.tmpDirPrefix+"temp.stats.it"+iteration+".copy")) {
            DMC.deleteFile(DMC.tmpDirPrefix+"temp.stats.it"+iteration+".copy");
          } else {
            DMC.deleteFile(DMC.tmpDirPrefix+"temp.stats.it"+iteration);
          }
        }
      }


      DMC.finish();

      DMC.deleteFile(stateFileName);
      System.exit(90);
    }

  }

}

// based on:
// http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html?page=4
class StreamGobbler extends Thread {
	InputStream istream;
	boolean verbose;
	
	StreamGobbler(InputStream is, int p) {
		istream = is;
		verbose = (p != 0);
	}
	
	public void run() {
		try {
			InputStreamReader isreader = new InputStreamReader(istream);
			BufferedReader br = new BufferedReader(isreader);
			String line = null;
			while ((line = br.readLine()) != null) {
				if (verbose) System.out.println(line);
			}
		} catch (IOException ioe) {
			ioe.printStackTrace();
		}
	}
}


/*

fake:
-----
ex2_N300:
java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s src.txt -r ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_ex2.out -N 300 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 -fake nbest_ex2.out.N300.it > ex2_N300ipi20opi0_300max+defratios.it10.noMemRep.bugFixes.monitored.txt

ex2_N500:
java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s src.txt -r ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_ex2.out -N 500 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 -fake nbest_ex2.out.N500.it > ex2_N500ipi20opi0_300max+defratios.it05.noMemRep.bugFixes.monitored.txt

exL_N300__600max:
java -javaagent:shiftone-jrat.jar -Xmx600m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s mt06_source.txt -r mt06_ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_exL.out -N 300 -p params.txt -maxIt 5 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 -fake nbest_exL.out.it > exL_N300ipi20opi0_600max+defratios.it05.noMemRep.bugFixes.monitored.txt

exL_N300__300max:
java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s mt06_source.txt -r mt06_ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_exL.out -N 300 -p params.txt -maxIt 5 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 -fake nbest_exL.out.it > exL_N300ipi20opi0_300max+defratios.it05.noMemRep.bugFixes.monitored.txt

gen:
----
ex2_N300:
make sure top_n=300 in MERT_example\config_ex2.txt
java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s src.txt -r ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_ex2.out -N 300 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 > ex2_N300ipi20opi0_300max+defratios.itxx.monitored.txt.gen

ex2_N500:
make sure top_n=500 in MERT_example\config_ex2.txt
java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir MERT_example -s src.txt -r ref.all -rps 4 -cmd decoder_command_ex2.txt -dcfg config_ex2.txt -decOut nbest_ex2.out -N 500 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 > ex2_N500ipi20opi0_300max+defratios.itxx.monitored.txt.gen

exL_N300__600max:
run on CLSP machines only! (e.g. z12)
$JAVA_bin/java -javaagent:shiftone-jrat.jar -Xmx600m -cp bin joshua.ZMERT.ZMERT -dir YOURDIR -s mt06_source.txt -r mt06_ref.all -rps 4 -cmd decoder_command.txt -dcfg config_exL.txt -decOut nbest_exL.out -N 300 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 > exL_N300ipi20opi0_600max+defratios.itxx.monitored.txt.gen

exL_N300__300max:
run on CLSP machines only! (e.g. z12)
$JAVA_bin/java -javaagent:shiftone-jrat.jar -Xmx300m -cp bin joshua.ZMERT.ZMERT -dir YOURDIR -s mt06_source.txt -r mt06_ref.all -rps 4 -cmd decoder_command.txt -dcfg config_exL.txt -decOut nbest_exL.out -N 300 -p params.txt -maxIt 25 -opi 0 -ipi 20 -v 2 -rand 0 -seed 1226091488390 -save 1 > exL_N300ipi20opi0_600max+defratios.itxx.monitored.txt.gen

*/