IntermediateOptimizer.java example

Explorer
relax-decode-master
- third-party
/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */

package joshua.zmert;
import java.util.*;
import java.io.*;
import java.text.DecimalFormat;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;

public class IntermediateOptimizer implements Runnable
{
  /* non-static data members */
  private int j;
  private Semaphore blocker;
  private Vector<String> threadOutput;
  private String strToPrint;

  private double[] initialLambda;
  private double[] finalLambda;
  private int[][] best1Cand_suffStats;
  private double[] finalScore;
  private int[] candCount;
  private double[][][] featVal_array;
  private ConcurrentHashMap<Integer,int[]>[] suffStats_array;

  /* static data members */
  private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
  private final static double NegInf = (-1.0 / 0.0);
  private final static double PosInf = (+1.0 / 0.0);

  private static int numSentences;
  private static int numDocuments;
  private static int[] docOfSentence;
  private static int docSubset_firstRank;
  private static int docSubset_lastRank;
  private static boolean optimizeSubset;
  private static int numParams;
  private static double[] normalizationOptions;
  private static boolean[] isOptimizable;
  private static double[] minThValue;
  private static double[] maxThValue;
  private static boolean oneModificationPerIteration;
  private static EvaluationMetric evalMetric;
  private static String metricName;
  private static String metricName_display;
  private static int suffStatsCount;
  private static String tmpDirPrefix;
  private static int verbosity;

  public static void set_MERTparams(
      int in_numSentences, int in_numDocuments, int[] in_docOfSentence, int[] in_docSubsetInfo,
      int in_numParams, double[] in_normalizationOptions,
      boolean[] in_isOptimizable, double[] in_minThValue, double[] in_maxThValue,
      boolean in_oneModificationPerIteration, EvaluationMetric in_evalMetric,
      String in_tmpDirPrefix, int in_verbosity)
  {
    numSentences = in_numSentences;
    numDocuments = in_numDocuments;
    docOfSentence = in_docOfSentence;

    docSubset_firstRank = in_docSubsetInfo[1];
    docSubset_lastRank = in_docSubsetInfo[2];
    if (in_docSubsetInfo[3] != numDocuments) optimizeSubset = true;
    else optimizeSubset = false;

    numParams = in_numParams;
    normalizationOptions = in_normalizationOptions;
    isOptimizable = in_isOptimizable;
    minThValue = in_minThValue;
    maxThValue = in_maxThValue;
    oneModificationPerIteration = in_oneModificationPerIteration;
    evalMetric = in_evalMetric;
    metricName = evalMetric.get_metricName();
    metricName_display = metricName;
    if (numDocuments > 1) metricName_display = "doc-level " + metricName;
    suffStatsCount = evalMetric.get_suffStatsCount();
    tmpDirPrefix = in_tmpDirPrefix;
    verbosity = in_verbosity;
  }

  public IntermediateOptimizer(
      int in_j, Semaphore in_blocker, Vector<String> in_threadOutput,
      double[] in_initialLambda, double[] in_finalLambda, int[][] in_best1Cand_suffStats,
      double[] in_finalScore, int[] in_candCount, double[][][] in_featVal_array,
      ConcurrentHashMap<Integer,int[]>[] in_suffStats_array)
  {
    j = in_j;
    blocker = in_blocker;
    threadOutput = in_threadOutput;
    strToPrint = "";

    initialLambda = in_initialLambda;
    finalLambda = in_finalLambda;
    best1Cand_suffStats = in_best1Cand_suffStats;
    finalScore = in_finalScore;
    candCount = in_candCount;
    featVal_array = in_featVal_array;
    suffStats_array = in_suffStats_array;
  }

//  private TreeMap<Double,TreeMap> thresholdsForParam(int c, int[] candCount, double[][][] featVal_array, double[] currLambda, TreeSet<Integer>[] indicesOfInterest)
  private void set_thresholdsForParam(
      TreeMap<Double,TreeMap<Integer,int[]>> thresholdsAll, int c,
      double[] currLambda, TreeSet<Integer>[] indicesOfInterest)
  {
/*
    TreeMap[] thresholds = new TreeMap[numSentences];
      // thresholds[i] stores thresholds for the cth parameter obtained by
      // processing the candidates of sentence i.  It not only stores the
      // thresholds themselves, but also a triple of {i,from,to}, where from/to
      // are indices that characterize the 1-best switch at this threshold.

    for (int i = 0; i < numSentences; ++i) {
      thresholds[i] = new TreeMap<Double,int[]>();
    }
*/

//    TreeMap<Double,int[]> thresholds = new TreeMap<Double,int[]>();

    // Find threshold points
//    TreeMap<Double,TreeMap> thresholdsAll = new TreeMap<Double,TreeMap>();
    thresholdsAll.clear();

    int ipCount = 0;
    for (int i = 0; i < numSentences; ++i) {
    // find threshold points contributed by ith sentence

//      println("Processing sentence #" + i,4);

      int numCandidates = candCount[i];
        // aka simply K

      double[] slope = new double[numCandidates];
        // will be h_c from candidatesInfo
        // repeated here for easy access
      double[] offset = new double[numCandidates];
        // SUM_j!=c currLambda_j*h_j(x)

      int minSlopeIndex = -1;          // index of line with steepest descent...
      double minSlope = PosInf;        // ...and its slope...
      double offset_minSlope = NegInf; // ...and its offset (needed to break ties)

      int maxSlopeIndex = -1;          // index of line with steepest ascent...
      double maxSlope = NegInf;        // ...and its slope...
      double offset_maxSlope = NegInf; // ...and its offset (needed to break ties)

      double bestScore_left = NegInf;  // these are used if the min/max values are
      double bestScore_right = NegInf; // not neg/pos infinity

      for (int k = 0; k < numCandidates; ++k) {
        slope[k] = featVal_array[c][i][k];

        offset[k] = 0.0;
        for (int c2 = 1; c2 <= numParams; ++c2) {
          if (c2 != c) { offset[k] += currLambda[c2]*featVal_array[c2][i][k]; }
        }

        // debugging
//        println("@ (i,k)=(" + i + "," + k + "), "
//               + "slope = " + slope[k] + "; offset = " + offset[k],4);

        if (minThValue[c] == NegInf) {
          if (slope[k] < minSlope || (slope[k] == minSlope && offset[k] > offset_minSlope)) {
            minSlopeIndex = k;
            minSlope = slope[k];
            offset_minSlope = offset[k];
          }
        } else {
          double score = offset[k] + ((minThValue[c]-0.1)*slope[k]);
          if (score > bestScore_left || (score == bestScore_left && slope[k] > minSlope)) {
            minSlopeIndex = k;
            minSlope = slope[k];
            bestScore_left = score;
          }
        }

        if (maxThValue[c] == PosInf) {
          if (slope[k] > maxSlope || (slope[k] == maxSlope && offset[k] > offset_maxSlope)) {
            maxSlopeIndex = k;
            maxSlope = slope[k];
            offset_maxSlope = offset[k];
          }
        } else {
          double score = offset[k] + ((maxThValue[c]+0.1)*slope[k]);
          if (score > bestScore_right || (score == bestScore_right && slope[k] < maxSlope)) {
            maxSlopeIndex = k;
            maxSlope = slope[k];
            bestScore_right = score;
          }
        }
      }

      // debugging
//      println("minSlope is @ k = " + minSlopeIndex + ": slope " + minSlope
//            + " (offset " + offset_minSlope + ")",4);
//      println("maxSlope is @ k = " + maxSlopeIndex + ": slope " + maxSlope
//            + " (offset " + offset_maxSlope + ")",4);


      // some lines can be eliminated: the ones that have a lower offset
      // than some other line with the same slope.
      // That is, for any k1 and k2:
      //   if slope[k1] = slope[k2] and offset[k1] > offset[k2],
      //   then k2 can be eliminated.
      // (This is actually important to do as it eliminates a bug.)
//      HashSet<Integer> discardedIndices = indicesToDiscard(slope,offset);


//      println("Extracting thresholds[(i,c)=(" + i + "," + c + ")]",4);

      int currIndex = minSlopeIndex;
        // As we traverse the currLambda_c dimension, the "winner" candidate will
        // change at intersection points.  currIndex tells us which candidate is
        // the winner in the interval currently under investigation.

        // We traverse the lambda_c dimension starting at -Inf.  The line with
        // steepest descent is the winner as lambda_c -> -Inf, so we initialize
        // currIndex to minSlopeIndex to reflect that fact.

        // Similarly, the winner as lambda_c -> +Inf is the line with the
        // steepest *ascent* (i.e. max slope), and so we continue finding
        // intersection points until we hit that line.

        // Notice that we didn't have to investigate the entire space (-Inf,+Inf)
        // if the parameter's range is more restricted than that.  That is why, in
        // the loop above, the "left-most" winner is not necessarily the one with
        // the steepest descent (though it will be if minThValue[c] is -Inf).
        // And similarly, the "right-most" winner is not necessarily the one with
        // the steepest ascent (though it will be if minThValue[c] is +Inf).  The
        // point of doing this is to avoid extracting thresholds that will end up
        // being discarded anyway due to range constraints, thus saving us a little
        // bit of time.

      int last_new_k = -1;

      while (currIndex != maxSlopeIndex) {

        if (currIndex < 0) break;
          // Due to rounding errors, the index identified as maxSlopeIndex above
          // might be different from the one this loop expects, in which case
          // it won't be found and currIndex remains -1.  So if currIndex is -1
          // a rounding error happened, which is cool since we can just break.

//        print("cI=" + currIndex + " ",4);

        // find the candidate whose line is the first to intersect the current
        // line.  ("first" meaning with an intersection point that has the
        //         lowest possible lambda_c value.)

        double nearestIntersectionPoint = PosInf;
        int nearestIntersectingLineIndex = -1;

        for (int k = 0; k < numCandidates; ++k) {
//          if (slope[k] > slope[currIndex] && !discardedIndices.contains(k)) {
          if (slope[k] > slope[currIndex]) {
          // only higher-sloped lines will intersect the current line
          // (If we didn't have discardedIndices a bug would creep up here.)

            // find intersection point ip_k
            double ip_k = (offset[k] - offset[currIndex])/(slope[currIndex] - slope[k]);
            if (ip_k < nearestIntersectionPoint) {
              nearestIntersectionPoint = ip_k;
              nearestIntersectingLineIndex = k;
            }
          }
        }

//        print("ip=" + f4.format(nearestIntersectionPoint) + " ",4);
        ++ipCount;

        if (nearestIntersectionPoint > minThValue[c] && nearestIntersectionPoint < maxThValue[c]) {

          int[] th_info = {currIndex,nearestIntersectingLineIndex};
          last_new_k = nearestIntersectingLineIndex;

          indicesOfInterest[i].add(currIndex); // old_k
//          indicesOfInterest_all[i].add(currIndex); // old_k   ***/

          if (!thresholdsAll.containsKey(nearestIntersectionPoint)) {
            TreeMap<Integer,int[]> A = new TreeMap<Integer,int[]>();
            A.put(i,th_info);
            thresholdsAll.put(nearestIntersectionPoint,A);
          } else {
            TreeMap<Integer,int[]> A = thresholdsAll.get(nearestIntersectionPoint);
            if (!A.containsKey(i)) {
              A.put(i,th_info);
            } else {
              int[] old_th_info = A.get(i);
              old_th_info[1] = th_info[1]; // replace the existing new_k
              A.put(i,th_info);
            }
            thresholdsAll.put(nearestIntersectionPoint,A);
          }
/*
          if (!thresholds.containsKey(nearestIntersectionPoint)) {
            thresholds.put(nearestIntersectionPoint,th_info);
              // i.e., at lambda_c = nIP, the (index of the) 1-best changes
              // from currIndex to nearestIntersectingLineIndex (which is
              // indicated in th_info)
          } else { // extremely rare, but causes problem if it does occur
            // in essence, just replace the new_k of the existing th_info
            int[] old_th_info = (int[])thresholds.get(nearestIntersectionPoint);
            old_th_info[1] = th_info[1];
            thresholds.put(nearestIntersectionPoint,old_th_info);
            // When does this happen?  If two consecutive intersection points are so close
            // to each other so as to appear as having the same value.  For instance, assume
            // we have two intersection points ip1 and ip2 corresponding to two transitions,
            // one from k_a to k_b, and the other from k_b to k_c.  It might be the case
            // that ip2-ip1 is extremeley small, so that the ip2 entry would actually REPLACE
            // the ip1 entry.  This would be bad.

            // Instead, we pretend that k_b never happened, and just assume there is a single
            // intersection point, ip (which equals whatever value Java calculates for ip1
            // and ip2), with a corresponding transition of k_a to k_c.
          }
*/
          } // if (in-range)

        currIndex = nearestIntersectingLineIndex;

      } // end while (currIndex != maxSlopeIndex)

      if (last_new_k != -1) {
        indicesOfInterest[i].add(last_new_k); // last new_k
//        indicesOfInterest_all[i].add(last_new_k); // last new_k  ***/
      }

//      println("cI=" + currIndex + "(=? " + maxSlopeIndex + " = mxSI)",4);

      // now thresholds has the values for lambda_c at which score changes
      // based on the candidates for the ith sentence

//      println("",4);

/*
      Iterator<Double> It = (thresholds.keySet()).iterator();
      int[] th_info = null;
      while (It.hasNext()) { // process intersection points contributed by this sentence
        double ip = It.next();
        if (ip > minThValue[c] && ip < maxThValue[c]) {
          th_info = thresholds.get(ip);
          if (!thresholdsAll.containsKey(ip)) {
            TreeMap A = new TreeMap();
            A.put(i,th_info);
            thresholdsAll.put(ip,A);
          } else {
            // not frequent, but does happen (when same intersection point
            // corresponds to a candidate switch for more than one i)
            TreeMap A = thresholdsAll.get(ip);
            A.put(i,th_info);
            thresholdsAll.put(ip,A);
          }

//          if (useDisk == 2) {
            // th_info[0] = old_k, th_info[1] = new_k
            indicesOfInterest[i].add(th_info[0]);
//          }

        } // if (in-range)

      } // while (It.hasNext())
*/

/*
//      if (useDisk == 2 && th_info != null) {
      if (th_info != null) {
        // new_k from the last th_info (previous new_k already appear as the next old_k)
        indicesOfInterest[i].add(th_info[1]);
      }
*/

//      thresholds.clear();

    } // for (i)

    // now thresholdsAll has the values for lambda_c at which score changes
    // based on the candidates for *all* the sentences (that satisfy
    // range constraints).
    // Each lambda_c value maps to a Vector of th_info.  An overwhelming majority
    // of these Vectors are of size 1.

    // indicesOfInterest[i] tells us which candidates for the ith sentence need
    // to be read from the merged decoder output file.

    if (thresholdsAll.size() != 0) {
      double smallest_th = thresholdsAll.firstKey();
      double largest_th = thresholdsAll.lastKey();
      println("# extracted thresholds: " + thresholdsAll.size(),2);
      println("Smallest extracted threshold: " + smallest_th,2);
      println("Largest extracted threshold: " + largest_th,2);

      if (maxThValue[c] != PosInf) {
        thresholdsAll.put(maxThValue[c],null);
      } else {
        thresholdsAll.put((thresholdsAll.lastKey() + 0.1),null);
      }
    }

//    return thresholdsAll;

  } // TreeMap<Double,TreeMap> thresholdsForParam (int c)

  private double[] line_opt(
      TreeMap<Double,TreeMap<Integer,int[]>> thresholdsAll, int[] indexOfCurrBest,
      int c, double[] lambda)
  {
    println("Line-optimizing lambda[" + c + "]...",3);

    double[] bestScoreInfo = new double[2];
      // to be returned: [0] will store the best lambda, and [1] will store its score

    if (thresholdsAll.size() == 0) {
      // no thresholds extracted!  Possible in theory...
      // simply return current value for this parameter
      println("No thresholds extracted!  Returning this parameter's current value...",2);

      bestScoreInfo[0] = lambda[c];
      bestScoreInfo[1] = evalMetric.worstPossibleScore();

      return bestScoreInfo;
    }

    double smallest_th = thresholdsAll.firstKey();
    double largest_th = thresholdsAll.lastKey();
    println("Minimum threshold: " + smallest_th,3);
    println("Maximum threshold: " + largest_th,3);

    double[] temp_lambda = new double[1+numParams];
    System.arraycopy(lambda,1,temp_lambda,1,numParams);

    double ip_prev = 0.0, ip_curr = 0.0;

    if (minThValue[c] != NegInf) {
      temp_lambda[c] = (minThValue[c] + smallest_th) / 2.0;
      ip_curr = minThValue[c];
    } else {
      temp_lambda[c] = smallest_th - 0.05;
      ip_curr = smallest_th - 0.1;
    }




    int[][] suffStats = new int[numSentences][suffStatsCount];
      // suffStats[i][s] stores the contribution to the sth sufficient
      // statistic from the candidate for the ith sentence (the candidate
      // indicated by indexOfCurrBest[i]).

    int[][] suffStats_doc = new int[numDocuments][suffStatsCount];
      // suffStats_doc[doc][s] := SUM_i suffStats[i][s], over sentences in the doc'th document
      // i.e. treat each document as a mini corpus
      // (if not doing document-level optimization, all sentences will belong in a single
      //  document: the 1st one, indexed 0)

    // initialize document SS
    for (int doc = 0; doc < numDocuments; ++doc) {
      for (int s = 0; s < suffStatsCount; ++s) {
        suffStats_doc[doc][s] = 0;
      }
    }

    // Now, set suffStats[][], and increment suffStats_doc[][]
    for (int i = 0; i < numSentences; ++i) {
      suffStats[i] = suffStats_array[i].get(indexOfCurrBest[i]);

      for (int s = 0; s < suffStatsCount; ++s) {
        suffStats_doc[docOfSentence[i]][s] += suffStats[i][s];
      }
    }



    double bestScore = 0.0;
    if (optimizeSubset) bestScore = evalMetric.score(suffStats_doc,docSubset_firstRank,docSubset_lastRank);
    else bestScore = evalMetric.score(suffStats_doc);
    double bestLambdaVal = temp_lambda[c];
    double nextLambdaVal = bestLambdaVal;
    println("At lambda[" + c + "] = " + bestLambdaVal + ","
          + "\t" + metricName_display + " = " + bestScore + " (*)",3);

    Iterator<Double> It = (thresholdsAll.keySet()).iterator();
    if (It.hasNext()) { ip_curr = It.next(); }

    while (It.hasNext()) {
      ip_prev = ip_curr;
      ip_curr = It.next();
      nextLambdaVal = (ip_prev + ip_curr)/2.0;

      TreeMap<Integer,int[]> th_info_M = thresholdsAll.get(ip_prev);
      Iterator<Integer> It2 = (th_info_M.keySet()).iterator();
      while (It2.hasNext()) {
        int i = It2.next();
          // i.e. the 1-best for the i'th sentence changes at this threshold value
        int docOf_i = docOfSentence[i];

        int[] th_info = th_info_M.get(i);
        @SuppressWarnings("unused")
        int old_k = th_info[0]; // should be equal to indexOfCurrBest[i]
        int new_k = th_info[1];

        for (int s = 0; s < suffStatsCount; ++s) {
          suffStats_doc[docOf_i][s] -= suffStats[i][s]; // subtract stats for candidate old_k
        }

        indexOfCurrBest[i] = new_k;
        suffStats[i] = suffStats_array[i].get(indexOfCurrBest[i]); // update the SS for the i'th sentence

        for (int s = 0; s < suffStatsCount; ++s) {
          suffStats_doc[docOf_i][s] += suffStats[i][s]; // add stats for candidate new_k
        }

      }

      double nextTestScore = 0.0;
      if (optimizeSubset) nextTestScore = evalMetric.score(suffStats_doc,docSubset_firstRank,docSubset_lastRank);
      else nextTestScore = evalMetric.score(suffStats_doc);

      print("At lambda[" + c + "] = " + nextLambdaVal + ","
          + "\t" + metricName_display + " = " + nextTestScore,3);

      if (evalMetric.isBetter(nextTestScore,bestScore)) {
        bestScore = nextTestScore;
        bestLambdaVal = nextLambdaVal;
        print(" (*)",3);
      }

      println("",3);

    } // while (It.hasNext())

    println("",3);

    // what is the purpose of this block of code ?????????????????????
/*
    if (maxThValue[c] != PosInf) {
      nextLambdaVal = (largest_th + maxThValue[c]) / 2.0;
    } else {
      nextLambdaVal = largest_th + 0.05;
    }
*/
    // ???????????????????????????????????????????????????????????????

    /*************************************************/
    /*************************************************/

    bestScoreInfo[0] = bestLambdaVal;
    bestScoreInfo[1] = bestScore;

    return bestScoreInfo;

  } // double[] line_opt(int c)

  private void set_suffStats_array(TreeSet<Integer>[] indicesOfInterest)
  {
    int candsOfInterestCount = 0;
    int candsOfInterestCount_all = 0;
    for (int i = 0; i < numSentences; ++i) {
      candsOfInterestCount += indicesOfInterest[i].size();
//      candsOfInterestCount_all += indicesOfInterest_all[i].size();  ****/
    }
    println("Processing merged stats file; extracting SS "
          + "for " + candsOfInterestCount + " candidates of interest.",2);
//    println("(*_all: " + candsOfInterestCount_all + ")",2); *****/


    try {

      // process the merged sufficient statistics file, and read (and store) the
      // stats for candidates of interest
      BufferedReader inFile = new BufferedReader(new FileReader(tmpDirPrefix+"temp.stats.merged"));
      String candidate_suffStats;

      for (int i = 0; i < numSentences; ++i) {
        int numCandidates = candCount[i];

        int currCand = 0;
        Iterator<Integer> It = indicesOfInterest[i].iterator();

        while (It.hasNext()) {
          int nextIndex = It.next();

          // skip candidates until you get to the nextIndex'th candidate
          while (currCand < nextIndex) {
            inFile.readLine();
            ++currCand;
          }

          // now currCand == nextIndex, and the next line in inFile
          // contains the sufficient statistics we want

          candidate_suffStats = inFile.readLine();
          ++currCand;

          String[] suffStats_str = candidate_suffStats.split("\\s+");

          int[] suffStats = new int[suffStatsCount];

          for (int s = 0; s < suffStatsCount; ++s) {
            suffStats[s] = Integer.parseInt(suffStats_str[s]);
          }

          suffStats_array[i].put(nextIndex,suffStats);

        }

        // skip the rest of ith sentence's candidates
        while (currCand < numCandidates) {
          inFile.readLine();
          ++currCand;
        }

      } // for (i)

      inFile.close();

    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99901);
    } catch (IOException e) {
      System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
      System.exit(99902);
    }

  } // set_suffStats_array(HashMap[] suffStats_array, TreeSet[] indicesOfInterest, Vector[] candidates)

  private double L_norm(double[] A, double pow)
  {
    // calculates the L-pow norm of A[]
    // NOTE: this calculation ignores A[0]
    double sum = 0.0;
    for (int i = 1; i < A.length; ++i) {
      sum += Math.pow(Math.abs(A[i]),pow);
    }
    return Math.pow(sum,1/pow);
  }

  private int[] initial_indexOfCurrBest(double[] temp_lambda, TreeSet<Integer>[] indicesOfInterest)
  {
    int[] indexOfCurrBest = new int[numSentences];
      // As we traverse lambda_c, indexOfCurrBest indicates which is the
      // current best candidate.

    // initialize indexOfCurrBest[]

    for (int i = 0; i < numSentences; ++i) {
      int numCandidates = candCount[i];

      double max = NegInf;
      int indexOfMax = -1;
      for (int k = 0; k < numCandidates; ++k) {
        double score = 0;

        for (int c2 = 1; c2 <= numParams; ++c2) {
          score += temp_lambda[c2] * featVal_array[c2][i][k];
        }
        if (score > max) {
          max = score;
          indexOfMax = k;
        }
      }

      indexOfCurrBest[i] = indexOfMax;

//      if (useDisk == 2) {
        // add indexOfCurrBest[i] to indicesOfInterest
        indicesOfInterest[i].add(indexOfMax);
//        indicesOfInterest_all[i].add(indexOfMax);
//      }

    }

    return indexOfCurrBest;

  } // int[] initial_indexOfCurrBest (int c)

  private double[] bestParamToChange(TreeMap<Double,TreeMap<Integer,int[]>>[] thresholdsAll, int lastChanged_c, double[] currLambda)
  {
    int c_best = 0; // which parameter to change?
    double bestLambdaVal = 0.0;
    double bestScore;
    if (evalMetric.getToBeMinimized()) {
      bestScore = evalMetric.worstPossibleScore() + 1.0;
    } else {
      bestScore = evalMetric.worstPossibleScore() - 1.0;
    }




    // prep for line_opt

    TreeSet<Integer>[] indicesOfInterest = null;
    // indicesOfInterest[i] tells us which candidates for the ith sentence need
    // to be read from the merged decoder output file.

//    if (useDisk == 2) {
      @SuppressWarnings("unchecked")
      TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
      indicesOfInterest = temp_TSA;
      for (int i = 0; i < numSentences; ++i) {
        indicesOfInterest[i] = new TreeSet<Integer>();
      }
//    }

    int[][] indexOfCurrBest = new int[1+numParams][numSentences];

    for (int c = 1; c <= numParams; ++c) {
      if (!isOptimizable[c]) {
        println("Not investigating lambda[j=" + j + "][" + c + "].",2);
      } else {
        if (c != lastChanged_c) {
          println("Investigating lambda[j=" + j + "][" + c + "]...",2);
//          thresholdsAll[c] = thresholdsForParam(c,candCount,featVal_array,currLambda,indicesOfInterest);
          set_thresholdsForParam(
            thresholdsAll[c],c,currLambda,indicesOfInterest);
        } else {
          println("Keeping thresholds for lambda[j=" + j + "][" + c + "] from previous step.",2);
        }
        // now thresholdsAll has the values for lambda_c at which score changes
        // based on the candidates for *all* the sentences (that satisfy
        // range constraints).
        // Each lambda_c value maps to a Vector of th_info.  An overwhelming majority
        // of these Vectors are of size 1.

        if (thresholdsAll[c].size() != 0) {

          double[] temp_lambda = new double[1+numParams];
          System.arraycopy(currLambda,1,temp_lambda,1,numParams);

          double smallest_th = thresholdsAll[c].firstKey();

          if (minThValue[c] != NegInf) {
            temp_lambda[c] = (minThValue[c] + smallest_th) / 2.0;
          } else {
            temp_lambda[c] = smallest_th - 0.05;
          }

          indexOfCurrBest[c] =
            initial_indexOfCurrBest(temp_lambda,indicesOfInterest);
        }
      }

      println("",2);

    }



//    if (useDisk == 2) {

      set_suffStats_array(indicesOfInterest);

//    } // if (useDisk == 2)



    for (int c = 1; c <= numParams; ++c) {
    // investigate currLambda[j][c]

      if (isOptimizable[c]) {
        double[] bestScoreInfo_c =
          line_opt(thresholdsAll[c],indexOfCurrBest[c],c,currLambda);
          // get best score and its lambda value

        double bestLambdaVal_c = bestScoreInfo_c[0];
        double bestScore_c = bestScoreInfo_c[1];

        if (evalMetric.isBetter(bestScore_c,bestScore)) {
          c_best = c;
          bestLambdaVal = bestLambdaVal_c;
          bestScore = bestScore_c;
        }

      } // if (!isOptimizable[c])

    }




    // delete according to indicesOfInterest

//    printMemoryUsage();

//    if (useDisk == 2) {

      for (int i = 0; i < numSentences; ++i) {

        indicesOfInterest[i].clear();

      }

//    }

//    cleanupMemory();
//    printMemoryUsage();
//    println("",2);








    double[] c_best_info = {c_best,bestLambdaVal,bestScore};
    return c_best_info;

  } // double[] bestParamToChange(int j, double[] currLambda)

  private void normalizeLambda(double[] origLambda)
  {
    // private String[] normalizationOptions;
      // How should a lambda[] vector be normalized (before decoding)?
      //   nO[0] = 0: no normalization
      //   nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
      //   nO[0] = 2: scale so that the maximum absolute value is nO[1]
      //   nO[0] = 3: scale so that the minimum absolute value is nO[1]
      //   nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]

    int normalizationMethod = (int)normalizationOptions[0];
    double scalingFactor = 1.0;
    if (normalizationMethod == 0) {

      scalingFactor = 1.0;

    } else if (normalizationMethod == 1) {

      int c = (int)normalizationOptions[2];
      scalingFactor = normalizationOptions[1]/Math.abs(origLambda[c]);

    } else if (normalizationMethod == 2) {

      double maxAbsVal = -1;
      int maxAbsVal_c = 0;
      for (int c = 1; c <= numParams; ++c) {
        if (Math.abs(origLambda[c]) > maxAbsVal) {
          maxAbsVal = Math.abs(origLambda[c]);
          maxAbsVal_c = c;
        }
      }
      scalingFactor = normalizationOptions[1]/Math.abs(origLambda[maxAbsVal_c]);

    } else if (normalizationMethod == 3) {

      double minAbsVal = PosInf;
      int minAbsVal_c = 0;
      for (int c = 1; c <= numParams; ++c) {
        if (Math.abs(origLambda[c]) < minAbsVal) {
          minAbsVal = Math.abs(origLambda[c]);
          minAbsVal_c = c;
        }
      }
      scalingFactor = normalizationOptions[1]/Math.abs(origLambda[minAbsVal_c]);

    } else if (normalizationMethod == 4) {

      double pow = normalizationOptions[1];
      double norm = L_norm(origLambda,pow);
      scalingFactor = normalizationOptions[2]/norm;

    }

    for (int c = 1; c <= numParams; ++c) {
      origLambda[c] *= scalingFactor;
    }

  }

  private void real_run() {
    @SuppressWarnings("unchecked")
    TreeMap<Double,TreeMap<Integer,int[]>>[] thresholdsAll = new TreeMap[1+numParams];
    thresholdsAll[0] = null;
    for (int c = 1; c <= numParams; ++c) {
      if (isOptimizable[c]) {
        thresholdsAll[c] = new TreeMap<Double,TreeMap<Integer,int[]>>();
      } else {
        thresholdsAll[c] = null;
      }
    }


//    cleanupMemory();

    println("+++ Optimization of lambda[j=" + j + "] starting @ " + (new Date()) + " +++",1);

    double[] currLambda = new double[1+numParams];
    System.arraycopy(initialLambda,1,currLambda,1,numParams);

    int[][] best1Cand_suffStats_doc = new int[numDocuments][suffStatsCount];
    for (int doc = 0; doc < numDocuments; ++doc) {
      for (int s = 0; s < suffStatsCount; ++s) {
        best1Cand_suffStats_doc[doc][s] = 0;
      }
    }

    for (int i = 0; i < numSentences; ++i) {
      for (int s = 0; s < suffStatsCount; ++s) {
        best1Cand_suffStats_doc[docOfSentence[i]][s] += best1Cand_suffStats[i][s];
      }
    }

    double initialScore = 0.0;
    if (optimizeSubset) initialScore = evalMetric.score(best1Cand_suffStats_doc,docSubset_firstRank,docSubset_lastRank);
    else initialScore = evalMetric.score(best1Cand_suffStats_doc);

    println("Initial lambda[j=" + j + "]: " + lambdaToString(initialLambda),1);
    println("(Initial " + metricName_display + "[j=" + j + "]: " + initialScore + ")",1);
    println("",1);
    finalScore[j] = initialScore;

    int c_best = 0; // which param to change?
    double bestLambdaVal = 0; // what value to change to?
    double bestScore = 0; // what score would be achieved?

    while (true) {

      double[] c_best_info = bestParamToChange(thresholdsAll,c_best,currLambda);
          // we pass in c_best because we don't need
          // to recalculate thresholds for it
      c_best = (int)c_best_info[0]; // which param to change?
      bestLambdaVal = c_best_info[1]; // what value to change to?
      bestScore = c_best_info[2]; // what score would be achieved?

      // now c_best is the parameter giving the most gain

      if (evalMetric.isBetter(bestScore,finalScore[j])) {
        println("*** Changing lambda[j=" + j + "][" + c_best + "] from "
              + f4.format(currLambda[c_best])
              + " (" + metricName_display + ": " + f4.format(finalScore[j]) + ") to "
              + f4.format(bestLambdaVal)
              + " (" + metricName_display + ": " + f4.format(bestScore) + ") ***",2);
        println("*** Old lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
        currLambda[c_best] = bestLambdaVal;
        finalScore[j] = bestScore;
        println("*** New lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
        println("",2);
      } else {
        println("*** Not changing any weight in lambda[j=" + j + "] ***",2);
        println("*** lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
        println("",2);
        break; // exit while (true) loop
      }

      if (oneModificationPerIteration) { break; } // exit while (true) loop

    } // while (true)

    // now currLambda is the optimized weight vector on the current candidate list
    // (corresponding to initialLambda)

    System.arraycopy(currLambda,1,finalLambda,1,numParams);
    normalizeLambda(finalLambda);
    // check if a lambda is outside its threshold range
    for (int c = 1; c <= numParams; ++c) {
      if (finalLambda[c] < minThValue[c] || finalLambda[c] > maxThValue[c]) {
        println("Warning: after normalization, final lambda[j=" + j + "][" + c + "]="
              + f4.format(finalLambda[c]) + " is outside its critical value range.",2);
      }
    }
    println("Final lambda[j=" + j + "]: " + lambdaToString(finalLambda),1);
    println("(Final " + metricName_display + "[j=" + j + "]: " + finalScore[j] + ")",1);
    println("",1);

    blocker.release();
  }

  public void run() {
    try {
      real_run();
    } catch (Exception e) {
      System.err.println("Exception in IntermediateOptimizer.run(): " + e.getMessage());
      System.exit(99905);
    }
    if (!strToPrint.equals("")) {
      threadOutput.add(strToPrint);
    }
  }

  private void println(String str, int priority) { if (priority <= verbosity) println(str); }
  private void print(String str, int priority) { if (priority <= verbosity) print(str); }

  private void println(String str) { threadOutput.add(strToPrint + str); strToPrint = ""; }
  private void print(String str) { strToPrint += str; }

  private String lambdaToString(double[] lambdaA)
  {
    String retStr = "{";
    for (int c = 1; c <= numParams-1; ++c) {
      retStr += "" + lambdaA[c] + ", ";
    }
    retStr += "" + lambdaA[numParams] + "}";

    return retStr;
  }
}