/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.zmert;
import java.util.*;
import java.io.*;
import java.text.DecimalFormat;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
public class IntermediateOptimizer implements Runnable
{
/* non-static data members */
private int j;
private Semaphore blocker;
private Vector<String> threadOutput;
private String strToPrint;
private double[] initialLambda;
private double[] finalLambda;
private int[][] best1Cand_suffStats;
private double[] finalScore;
private int[] candCount;
private double[][][] featVal_array;
private ConcurrentHashMap<Integer,int[]>[] suffStats_array;
/* static data members */
private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
private final static double NegInf = (-1.0 / 0.0);
private final static double PosInf = (+1.0 / 0.0);
private static int numSentences;
private static int numDocuments;
private static int[] docOfSentence;
private static int docSubset_firstRank;
private static int docSubset_lastRank;
private static boolean optimizeSubset;
private static int numParams;
private static double[] normalizationOptions;
private static boolean[] isOptimizable;
private static double[] minThValue;
private static double[] maxThValue;
private static boolean oneModificationPerIteration;
private static EvaluationMetric evalMetric;
private static String metricName;
private static String metricName_display;
private static int suffStatsCount;
private static String tmpDirPrefix;
private static int verbosity;
public static void set_MERTparams(
int in_numSentences, int in_numDocuments, int[] in_docOfSentence, int[] in_docSubsetInfo,
int in_numParams, double[] in_normalizationOptions,
boolean[] in_isOptimizable, double[] in_minThValue, double[] in_maxThValue,
boolean in_oneModificationPerIteration, EvaluationMetric in_evalMetric,
String in_tmpDirPrefix, int in_verbosity)
{
numSentences = in_numSentences;
numDocuments = in_numDocuments;
docOfSentence = in_docOfSentence;
docSubset_firstRank = in_docSubsetInfo[1];
docSubset_lastRank = in_docSubsetInfo[2];
if (in_docSubsetInfo[3] != numDocuments) optimizeSubset = true;
else optimizeSubset = false;
numParams = in_numParams;
normalizationOptions = in_normalizationOptions;
isOptimizable = in_isOptimizable;
minThValue = in_minThValue;
maxThValue = in_maxThValue;
oneModificationPerIteration = in_oneModificationPerIteration;
evalMetric = in_evalMetric;
metricName = evalMetric.get_metricName();
metricName_display = metricName;
if (numDocuments > 1) metricName_display = "doc-level " + metricName;
suffStatsCount = evalMetric.get_suffStatsCount();
tmpDirPrefix = in_tmpDirPrefix;
verbosity = in_verbosity;
}
public IntermediateOptimizer(
int in_j, Semaphore in_blocker, Vector<String> in_threadOutput,
double[] in_initialLambda, double[] in_finalLambda, int[][] in_best1Cand_suffStats,
double[] in_finalScore, int[] in_candCount, double[][][] in_featVal_array,
ConcurrentHashMap<Integer,int[]>[] in_suffStats_array)
{
j = in_j;
blocker = in_blocker;
threadOutput = in_threadOutput;
strToPrint = "";
initialLambda = in_initialLambda;
finalLambda = in_finalLambda;
best1Cand_suffStats = in_best1Cand_suffStats;
finalScore = in_finalScore;
candCount = in_candCount;
featVal_array = in_featVal_array;
suffStats_array = in_suffStats_array;
}
// private TreeMap<Double,TreeMap> thresholdsForParam(int c, int[] candCount, double[][][] featVal_array, double[] currLambda, TreeSet<Integer>[] indicesOfInterest)
private void set_thresholdsForParam(
TreeMap<Double,TreeMap<Integer,int[]>> thresholdsAll, int c,
double[] currLambda, TreeSet<Integer>[] indicesOfInterest)
{
/*
TreeMap[] thresholds = new TreeMap[numSentences];
// thresholds[i] stores thresholds for the cth parameter obtained by
// processing the candidates of sentence i. It not only stores the
// thresholds themselves, but also a triple of {i,from,to}, where from/to
// are indices that characterize the 1-best switch at this threshold.
for (int i = 0; i < numSentences; ++i) {
thresholds[i] = new TreeMap<Double,int[]>();
}
*/
// TreeMap<Double,int[]> thresholds = new TreeMap<Double,int[]>();
// Find threshold points
// TreeMap<Double,TreeMap> thresholdsAll = new TreeMap<Double,TreeMap>();
thresholdsAll.clear();
int ipCount = 0;
for (int i = 0; i < numSentences; ++i) {
// find threshold points contributed by ith sentence
// println("Processing sentence #" + i,4);
int numCandidates = candCount[i];
// aka simply K
double[] slope = new double[numCandidates];
// will be h_c from candidatesInfo
// repeated here for easy access
double[] offset = new double[numCandidates];
// SUM_j!=c currLambda_j*h_j(x)
int minSlopeIndex = -1; // index of line with steepest descent...
double minSlope = PosInf; // ...and its slope...
double offset_minSlope = NegInf; // ...and its offset (needed to break ties)
int maxSlopeIndex = -1; // index of line with steepest ascent...
double maxSlope = NegInf; // ...and its slope...
double offset_maxSlope = NegInf; // ...and its offset (needed to break ties)
double bestScore_left = NegInf; // these are used if the min/max values are
double bestScore_right = NegInf; // not neg/pos infinity
for (int k = 0; k < numCandidates; ++k) {
slope[k] = featVal_array[c][i][k];
offset[k] = 0.0;
for (int c2 = 1; c2 <= numParams; ++c2) {
if (c2 != c) { offset[k] += currLambda[c2]*featVal_array[c2][i][k]; }
}
// debugging
// println("@ (i,k)=(" + i + "," + k + "), "
// + "slope = " + slope[k] + "; offset = " + offset[k],4);
if (minThValue[c] == NegInf) {
if (slope[k] < minSlope || (slope[k] == minSlope && offset[k] > offset_minSlope)) {
minSlopeIndex = k;
minSlope = slope[k];
offset_minSlope = offset[k];
}
} else {
double score = offset[k] + ((minThValue[c]-0.1)*slope[k]);
if (score > bestScore_left || (score == bestScore_left && slope[k] > minSlope)) {
minSlopeIndex = k;
minSlope = slope[k];
bestScore_left = score;
}
}
if (maxThValue[c] == PosInf) {
if (slope[k] > maxSlope || (slope[k] == maxSlope && offset[k] > offset_maxSlope)) {
maxSlopeIndex = k;
maxSlope = slope[k];
offset_maxSlope = offset[k];
}
} else {
double score = offset[k] + ((maxThValue[c]+0.1)*slope[k]);
if (score > bestScore_right || (score == bestScore_right && slope[k] < maxSlope)) {
maxSlopeIndex = k;
maxSlope = slope[k];
bestScore_right = score;
}
}
}
// debugging
// println("minSlope is @ k = " + minSlopeIndex + ": slope " + minSlope
// + " (offset " + offset_minSlope + ")",4);
// println("maxSlope is @ k = " + maxSlopeIndex + ": slope " + maxSlope
// + " (offset " + offset_maxSlope + ")",4);
// some lines can be eliminated: the ones that have a lower offset
// than some other line with the same slope.
// That is, for any k1 and k2:
// if slope[k1] = slope[k2] and offset[k1] > offset[k2],
// then k2 can be eliminated.
// (This is actually important to do as it eliminates a bug.)
// HashSet<Integer> discardedIndices = indicesToDiscard(slope,offset);
// println("Extracting thresholds[(i,c)=(" + i + "," + c + ")]",4);
int currIndex = minSlopeIndex;
// As we traverse the currLambda_c dimension, the "winner" candidate will
// change at intersection points. currIndex tells us which candidate is
// the winner in the interval currently under investigation.
// We traverse the lambda_c dimension starting at -Inf. The line with
// steepest descent is the winner as lambda_c -> -Inf, so we initialize
// currIndex to minSlopeIndex to reflect that fact.
// Similarly, the winner as lambda_c -> +Inf is the line with the
// steepest *ascent* (i.e. max slope), and so we continue finding
// intersection points until we hit that line.
// Notice that we didn't have to investigate the entire space (-Inf,+Inf)
// if the parameter's range is more restricted than that. That is why, in
// the loop above, the "left-most" winner is not necessarily the one with
// the steepest descent (though it will be if minThValue[c] is -Inf).
// And similarly, the "right-most" winner is not necessarily the one with
// the steepest ascent (though it will be if minThValue[c] is +Inf). The
// point of doing this is to avoid extracting thresholds that will end up
// being discarded anyway due to range constraints, thus saving us a little
// bit of time.
int last_new_k = -1;
while (currIndex != maxSlopeIndex) {
if (currIndex < 0) break;
// Due to rounding errors, the index identified as maxSlopeIndex above
// might be different from the one this loop expects, in which case
// it won't be found and currIndex remains -1. So if currIndex is -1
// a rounding error happened, which is cool since we can just break.
// print("cI=" + currIndex + " ",4);
// find the candidate whose line is the first to intersect the current
// line. ("first" meaning with an intersection point that has the
// lowest possible lambda_c value.)
double nearestIntersectionPoint = PosInf;
int nearestIntersectingLineIndex = -1;
for (int k = 0; k < numCandidates; ++k) {
// if (slope[k] > slope[currIndex] && !discardedIndices.contains(k)) {
if (slope[k] > slope[currIndex]) {
// only higher-sloped lines will intersect the current line
// (If we didn't have discardedIndices a bug would creep up here.)
// find intersection point ip_k
double ip_k = (offset[k] - offset[currIndex])/(slope[currIndex] - slope[k]);
if (ip_k < nearestIntersectionPoint) {
nearestIntersectionPoint = ip_k;
nearestIntersectingLineIndex = k;
}
}
}
// print("ip=" + f4.format(nearestIntersectionPoint) + " ",4);
++ipCount;
if (nearestIntersectionPoint > minThValue[c] && nearestIntersectionPoint < maxThValue[c]) {
int[] th_info = {currIndex,nearestIntersectingLineIndex};
last_new_k = nearestIntersectingLineIndex;
indicesOfInterest[i].add(currIndex); // old_k
// indicesOfInterest_all[i].add(currIndex); // old_k ***/
if (!thresholdsAll.containsKey(nearestIntersectionPoint)) {
TreeMap<Integer,int[]> A = new TreeMap<Integer,int[]>();
A.put(i,th_info);
thresholdsAll.put(nearestIntersectionPoint,A);
} else {
TreeMap<Integer,int[]> A = thresholdsAll.get(nearestIntersectionPoint);
if (!A.containsKey(i)) {
A.put(i,th_info);
} else {
int[] old_th_info = A.get(i);
old_th_info[1] = th_info[1]; // replace the existing new_k
A.put(i,th_info);
}
thresholdsAll.put(nearestIntersectionPoint,A);
}
/*
if (!thresholds.containsKey(nearestIntersectionPoint)) {
thresholds.put(nearestIntersectionPoint,th_info);
// i.e., at lambda_c = nIP, the (index of the) 1-best changes
// from currIndex to nearestIntersectingLineIndex (which is
// indicated in th_info)
} else { // extremely rare, but causes problem if it does occur
// in essence, just replace the new_k of the existing th_info
int[] old_th_info = (int[])thresholds.get(nearestIntersectionPoint);
old_th_info[1] = th_info[1];
thresholds.put(nearestIntersectionPoint,old_th_info);
// When does this happen? If two consecutive intersection points are so close
// to each other so as to appear as having the same value. For instance, assume
// we have two intersection points ip1 and ip2 corresponding to two transitions,
// one from k_a to k_b, and the other from k_b to k_c. It might be the case
// that ip2-ip1 is extremeley small, so that the ip2 entry would actually REPLACE
// the ip1 entry. This would be bad.
// Instead, we pretend that k_b never happened, and just assume there is a single
// intersection point, ip (which equals whatever value Java calculates for ip1
// and ip2), with a corresponding transition of k_a to k_c.
}
*/
} // if (in-range)
currIndex = nearestIntersectingLineIndex;
} // end while (currIndex != maxSlopeIndex)
if (last_new_k != -1) {
indicesOfInterest[i].add(last_new_k); // last new_k
// indicesOfInterest_all[i].add(last_new_k); // last new_k ***/
}
// println("cI=" + currIndex + "(=? " + maxSlopeIndex + " = mxSI)",4);
// now thresholds has the values for lambda_c at which score changes
// based on the candidates for the ith sentence
// println("",4);
/*
Iterator<Double> It = (thresholds.keySet()).iterator();
int[] th_info = null;
while (It.hasNext()) { // process intersection points contributed by this sentence
double ip = It.next();
if (ip > minThValue[c] && ip < maxThValue[c]) {
th_info = thresholds.get(ip);
if (!thresholdsAll.containsKey(ip)) {
TreeMap A = new TreeMap();
A.put(i,th_info);
thresholdsAll.put(ip,A);
} else {
// not frequent, but does happen (when same intersection point
// corresponds to a candidate switch for more than one i)
TreeMap A = thresholdsAll.get(ip);
A.put(i,th_info);
thresholdsAll.put(ip,A);
}
// if (useDisk == 2) {
// th_info[0] = old_k, th_info[1] = new_k
indicesOfInterest[i].add(th_info[0]);
// }
} // if (in-range)
} // while (It.hasNext())
*/
/*
// if (useDisk == 2 && th_info != null) {
if (th_info != null) {
// new_k from the last th_info (previous new_k already appear as the next old_k)
indicesOfInterest[i].add(th_info[1]);
}
*/
// thresholds.clear();
} // for (i)
// now thresholdsAll has the values for lambda_c at which score changes
// based on the candidates for *all* the sentences (that satisfy
// range constraints).
// Each lambda_c value maps to a Vector of th_info. An overwhelming majority
// of these Vectors are of size 1.
// indicesOfInterest[i] tells us which candidates for the ith sentence need
// to be read from the merged decoder output file.
if (thresholdsAll.size() != 0) {
double smallest_th = thresholdsAll.firstKey();
double largest_th = thresholdsAll.lastKey();
println("# extracted thresholds: " + thresholdsAll.size(),2);
println("Smallest extracted threshold: " + smallest_th,2);
println("Largest extracted threshold: " + largest_th,2);
if (maxThValue[c] != PosInf) {
thresholdsAll.put(maxThValue[c],null);
} else {
thresholdsAll.put((thresholdsAll.lastKey() + 0.1),null);
}
}
// return thresholdsAll;
} // TreeMap<Double,TreeMap> thresholdsForParam (int c)
private double[] line_opt(
TreeMap<Double,TreeMap<Integer,int[]>> thresholdsAll, int[] indexOfCurrBest,
int c, double[] lambda)
{
println("Line-optimizing lambda[" + c + "]...",3);
double[] bestScoreInfo = new double[2];
// to be returned: [0] will store the best lambda, and [1] will store its score
if (thresholdsAll.size() == 0) {
// no thresholds extracted! Possible in theory...
// simply return current value for this parameter
println("No thresholds extracted! Returning this parameter's current value...",2);
bestScoreInfo[0] = lambda[c];
bestScoreInfo[1] = evalMetric.worstPossibleScore();
return bestScoreInfo;
}
double smallest_th = thresholdsAll.firstKey();
double largest_th = thresholdsAll.lastKey();
println("Minimum threshold: " + smallest_th,3);
println("Maximum threshold: " + largest_th,3);
double[] temp_lambda = new double[1+numParams];
System.arraycopy(lambda,1,temp_lambda,1,numParams);
double ip_prev = 0.0, ip_curr = 0.0;
if (minThValue[c] != NegInf) {
temp_lambda[c] = (minThValue[c] + smallest_th) / 2.0;
ip_curr = minThValue[c];
} else {
temp_lambda[c] = smallest_th - 0.05;
ip_curr = smallest_th - 0.1;
}
int[][] suffStats = new int[numSentences][suffStatsCount];
// suffStats[i][s] stores the contribution to the sth sufficient
// statistic from the candidate for the ith sentence (the candidate
// indicated by indexOfCurrBest[i]).
int[][] suffStats_doc = new int[numDocuments][suffStatsCount];
// suffStats_doc[doc][s] := SUM_i suffStats[i][s], over sentences in the doc'th document
// i.e. treat each document as a mini corpus
// (if not doing document-level optimization, all sentences will belong in a single
// document: the 1st one, indexed 0)
// initialize document SS
for (int doc = 0; doc < numDocuments; ++doc) {
for (int s = 0; s < suffStatsCount; ++s) {
suffStats_doc[doc][s] = 0;
}
}
// Now, set suffStats[][], and increment suffStats_doc[][]
for (int i = 0; i < numSentences; ++i) {
suffStats[i] = suffStats_array[i].get(indexOfCurrBest[i]);
for (int s = 0; s < suffStatsCount; ++s) {
suffStats_doc[docOfSentence[i]][s] += suffStats[i][s];
}
}
double bestScore = 0.0;
if (optimizeSubset) bestScore = evalMetric.score(suffStats_doc,docSubset_firstRank,docSubset_lastRank);
else bestScore = evalMetric.score(suffStats_doc);
double bestLambdaVal = temp_lambda[c];
double nextLambdaVal = bestLambdaVal;
println("At lambda[" + c + "] = " + bestLambdaVal + ","
+ "\t" + metricName_display + " = " + bestScore + " (*)",3);
Iterator<Double> It = (thresholdsAll.keySet()).iterator();
if (It.hasNext()) { ip_curr = It.next(); }
while (It.hasNext()) {
ip_prev = ip_curr;
ip_curr = It.next();
nextLambdaVal = (ip_prev + ip_curr)/2.0;
TreeMap<Integer,int[]> th_info_M = thresholdsAll.get(ip_prev);
Iterator<Integer> It2 = (th_info_M.keySet()).iterator();
while (It2.hasNext()) {
int i = It2.next();
// i.e. the 1-best for the i'th sentence changes at this threshold value
int docOf_i = docOfSentence[i];
int[] th_info = th_info_M.get(i);
@SuppressWarnings("unused")
int old_k = th_info[0]; // should be equal to indexOfCurrBest[i]
int new_k = th_info[1];
for (int s = 0; s < suffStatsCount; ++s) {
suffStats_doc[docOf_i][s] -= suffStats[i][s]; // subtract stats for candidate old_k
}
indexOfCurrBest[i] = new_k;
suffStats[i] = suffStats_array[i].get(indexOfCurrBest[i]); // update the SS for the i'th sentence
for (int s = 0; s < suffStatsCount; ++s) {
suffStats_doc[docOf_i][s] += suffStats[i][s]; // add stats for candidate new_k
}
}
double nextTestScore = 0.0;
if (optimizeSubset) nextTestScore = evalMetric.score(suffStats_doc,docSubset_firstRank,docSubset_lastRank);
else nextTestScore = evalMetric.score(suffStats_doc);
print("At lambda[" + c + "] = " + nextLambdaVal + ","
+ "\t" + metricName_display + " = " + nextTestScore,3);
if (evalMetric.isBetter(nextTestScore,bestScore)) {
bestScore = nextTestScore;
bestLambdaVal = nextLambdaVal;
print(" (*)",3);
}
println("",3);
} // while (It.hasNext())
println("",3);
// what is the purpose of this block of code ?????????????????????
/*
if (maxThValue[c] != PosInf) {
nextLambdaVal = (largest_th + maxThValue[c]) / 2.0;
} else {
nextLambdaVal = largest_th + 0.05;
}
*/
// ???????????????????????????????????????????????????????????????
/*************************************************/
/*************************************************/
bestScoreInfo[0] = bestLambdaVal;
bestScoreInfo[1] = bestScore;
return bestScoreInfo;
} // double[] line_opt(int c)
private void set_suffStats_array(TreeSet<Integer>[] indicesOfInterest)
{
int candsOfInterestCount = 0;
int candsOfInterestCount_all = 0;
for (int i = 0; i < numSentences; ++i) {
candsOfInterestCount += indicesOfInterest[i].size();
// candsOfInterestCount_all += indicesOfInterest_all[i].size(); ****/
}
println("Processing merged stats file; extracting SS "
+ "for " + candsOfInterestCount + " candidates of interest.",2);
// println("(*_all: " + candsOfInterestCount_all + ")",2); *****/
try {
// process the merged sufficient statistics file, and read (and store) the
// stats for candidates of interest
BufferedReader inFile = new BufferedReader(new FileReader(tmpDirPrefix+"temp.stats.merged"));
String candidate_suffStats;
for (int i = 0; i < numSentences; ++i) {
int numCandidates = candCount[i];
int currCand = 0;
Iterator<Integer> It = indicesOfInterest[i].iterator();
while (It.hasNext()) {
int nextIndex = It.next();
// skip candidates until you get to the nextIndex'th candidate
while (currCand < nextIndex) {
inFile.readLine();
++currCand;
}
// now currCand == nextIndex, and the next line in inFile
// contains the sufficient statistics we want
candidate_suffStats = inFile.readLine();
++currCand;
String[] suffStats_str = candidate_suffStats.split("\\s+");
int[] suffStats = new int[suffStatsCount];
for (int s = 0; s < suffStatsCount; ++s) {
suffStats[s] = Integer.parseInt(suffStats_str[s]);
}
suffStats_array[i].put(nextIndex,suffStats);
}
// skip the rest of ith sentence's candidates
while (currCand < numCandidates) {
inFile.readLine();
++currCand;
}
} // for (i)
inFile.close();
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in MertCore.initialize(int): " + e.getMessage());
System.exit(99901);
} catch (IOException e) {
System.err.println("IOException in MertCore.initialize(int): " + e.getMessage());
System.exit(99902);
}
} // set_suffStats_array(HashMap[] suffStats_array, TreeSet[] indicesOfInterest, Vector[] candidates)
private double L_norm(double[] A, double pow)
{
// calculates the L-pow norm of A[]
// NOTE: this calculation ignores A[0]
double sum = 0.0;
for (int i = 1; i < A.length; ++i) {
sum += Math.pow(Math.abs(A[i]),pow);
}
return Math.pow(sum,1/pow);
}
private int[] initial_indexOfCurrBest(double[] temp_lambda, TreeSet<Integer>[] indicesOfInterest)
{
int[] indexOfCurrBest = new int[numSentences];
// As we traverse lambda_c, indexOfCurrBest indicates which is the
// current best candidate.
// initialize indexOfCurrBest[]
for (int i = 0; i < numSentences; ++i) {
int numCandidates = candCount[i];
double max = NegInf;
int indexOfMax = -1;
for (int k = 0; k < numCandidates; ++k) {
double score = 0;
for (int c2 = 1; c2 <= numParams; ++c2) {
score += temp_lambda[c2] * featVal_array[c2][i][k];
}
if (score > max) {
max = score;
indexOfMax = k;
}
}
indexOfCurrBest[i] = indexOfMax;
// if (useDisk == 2) {
// add indexOfCurrBest[i] to indicesOfInterest
indicesOfInterest[i].add(indexOfMax);
// indicesOfInterest_all[i].add(indexOfMax);
// }
}
return indexOfCurrBest;
} // int[] initial_indexOfCurrBest (int c)
private double[] bestParamToChange(TreeMap<Double,TreeMap<Integer,int[]>>[] thresholdsAll, int lastChanged_c, double[] currLambda)
{
int c_best = 0; // which parameter to change?
double bestLambdaVal = 0.0;
double bestScore;
if (evalMetric.getToBeMinimized()) {
bestScore = evalMetric.worstPossibleScore() + 1.0;
} else {
bestScore = evalMetric.worstPossibleScore() - 1.0;
}
// prep for line_opt
TreeSet<Integer>[] indicesOfInterest = null;
// indicesOfInterest[i] tells us which candidates for the ith sentence need
// to be read from the merged decoder output file.
// if (useDisk == 2) {
@SuppressWarnings("unchecked")
TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
indicesOfInterest = temp_TSA;
for (int i = 0; i < numSentences; ++i) {
indicesOfInterest[i] = new TreeSet<Integer>();
}
// }
int[][] indexOfCurrBest = new int[1+numParams][numSentences];
for (int c = 1; c <= numParams; ++c) {
if (!isOptimizable[c]) {
println("Not investigating lambda[j=" + j + "][" + c + "].",2);
} else {
if (c != lastChanged_c) {
println("Investigating lambda[j=" + j + "][" + c + "]...",2);
// thresholdsAll[c] = thresholdsForParam(c,candCount,featVal_array,currLambda,indicesOfInterest);
set_thresholdsForParam(
thresholdsAll[c],c,currLambda,indicesOfInterest);
} else {
println("Keeping thresholds for lambda[j=" + j + "][" + c + "] from previous step.",2);
}
// now thresholdsAll has the values for lambda_c at which score changes
// based on the candidates for *all* the sentences (that satisfy
// range constraints).
// Each lambda_c value maps to a Vector of th_info. An overwhelming majority
// of these Vectors are of size 1.
if (thresholdsAll[c].size() != 0) {
double[] temp_lambda = new double[1+numParams];
System.arraycopy(currLambda,1,temp_lambda,1,numParams);
double smallest_th = thresholdsAll[c].firstKey();
if (minThValue[c] != NegInf) {
temp_lambda[c] = (minThValue[c] + smallest_th) / 2.0;
} else {
temp_lambda[c] = smallest_th - 0.05;
}
indexOfCurrBest[c] =
initial_indexOfCurrBest(temp_lambda,indicesOfInterest);
}
}
println("",2);
}
// if (useDisk == 2) {
set_suffStats_array(indicesOfInterest);
// } // if (useDisk == 2)
for (int c = 1; c <= numParams; ++c) {
// investigate currLambda[j][c]
if (isOptimizable[c]) {
double[] bestScoreInfo_c =
line_opt(thresholdsAll[c],indexOfCurrBest[c],c,currLambda);
// get best score and its lambda value
double bestLambdaVal_c = bestScoreInfo_c[0];
double bestScore_c = bestScoreInfo_c[1];
if (evalMetric.isBetter(bestScore_c,bestScore)) {
c_best = c;
bestLambdaVal = bestLambdaVal_c;
bestScore = bestScore_c;
}
} // if (!isOptimizable[c])
}
// delete according to indicesOfInterest
// printMemoryUsage();
// if (useDisk == 2) {
for (int i = 0; i < numSentences; ++i) {
indicesOfInterest[i].clear();
}
// }
// cleanupMemory();
// printMemoryUsage();
// println("",2);
double[] c_best_info = {c_best,bestLambdaVal,bestScore};
return c_best_info;
} // double[] bestParamToChange(int j, double[] currLambda)
private void normalizeLambda(double[] origLambda)
{
// private String[] normalizationOptions;
// How should a lambda[] vector be normalized (before decoding)?
// nO[0] = 0: no normalization
// nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
// nO[0] = 2: scale so that the maximum absolute value is nO[1]
// nO[0] = 3: scale so that the minimum absolute value is nO[1]
// nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
int normalizationMethod = (int)normalizationOptions[0];
double scalingFactor = 1.0;
if (normalizationMethod == 0) {
scalingFactor = 1.0;
} else if (normalizationMethod == 1) {
int c = (int)normalizationOptions[2];
scalingFactor = normalizationOptions[1]/Math.abs(origLambda[c]);
} else if (normalizationMethod == 2) {
double maxAbsVal = -1;
int maxAbsVal_c = 0;
for (int c = 1; c <= numParams; ++c) {
if (Math.abs(origLambda[c]) > maxAbsVal) {
maxAbsVal = Math.abs(origLambda[c]);
maxAbsVal_c = c;
}
}
scalingFactor = normalizationOptions[1]/Math.abs(origLambda[maxAbsVal_c]);
} else if (normalizationMethod == 3) {
double minAbsVal = PosInf;
int minAbsVal_c = 0;
for (int c = 1; c <= numParams; ++c) {
if (Math.abs(origLambda[c]) < minAbsVal) {
minAbsVal = Math.abs(origLambda[c]);
minAbsVal_c = c;
}
}
scalingFactor = normalizationOptions[1]/Math.abs(origLambda[minAbsVal_c]);
} else if (normalizationMethod == 4) {
double pow = normalizationOptions[1];
double norm = L_norm(origLambda,pow);
scalingFactor = normalizationOptions[2]/norm;
}
for (int c = 1; c <= numParams; ++c) {
origLambda[c] *= scalingFactor;
}
}
private void real_run() {
@SuppressWarnings("unchecked")
TreeMap<Double,TreeMap<Integer,int[]>>[] thresholdsAll = new TreeMap[1+numParams];
thresholdsAll[0] = null;
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
thresholdsAll[c] = new TreeMap<Double,TreeMap<Integer,int[]>>();
} else {
thresholdsAll[c] = null;
}
}
// cleanupMemory();
println("+++ Optimization of lambda[j=" + j + "] starting @ " + (new Date()) + " +++",1);
double[] currLambda = new double[1+numParams];
System.arraycopy(initialLambda,1,currLambda,1,numParams);
int[][] best1Cand_suffStats_doc = new int[numDocuments][suffStatsCount];
for (int doc = 0; doc < numDocuments; ++doc) {
for (int s = 0; s < suffStatsCount; ++s) {
best1Cand_suffStats_doc[doc][s] = 0;
}
}
for (int i = 0; i < numSentences; ++i) {
for (int s = 0; s < suffStatsCount; ++s) {
best1Cand_suffStats_doc[docOfSentence[i]][s] += best1Cand_suffStats[i][s];
}
}
double initialScore = 0.0;
if (optimizeSubset) initialScore = evalMetric.score(best1Cand_suffStats_doc,docSubset_firstRank,docSubset_lastRank);
else initialScore = evalMetric.score(best1Cand_suffStats_doc);
println("Initial lambda[j=" + j + "]: " + lambdaToString(initialLambda),1);
println("(Initial " + metricName_display + "[j=" + j + "]: " + initialScore + ")",1);
println("",1);
finalScore[j] = initialScore;
int c_best = 0; // which param to change?
double bestLambdaVal = 0; // what value to change to?
double bestScore = 0; // what score would be achieved?
while (true) {
double[] c_best_info = bestParamToChange(thresholdsAll,c_best,currLambda);
// we pass in c_best because we don't need
// to recalculate thresholds for it
c_best = (int)c_best_info[0]; // which param to change?
bestLambdaVal = c_best_info[1]; // what value to change to?
bestScore = c_best_info[2]; // what score would be achieved?
// now c_best is the parameter giving the most gain
if (evalMetric.isBetter(bestScore,finalScore[j])) {
println("*** Changing lambda[j=" + j + "][" + c_best + "] from "
+ f4.format(currLambda[c_best])
+ " (" + metricName_display + ": " + f4.format(finalScore[j]) + ") to "
+ f4.format(bestLambdaVal)
+ " (" + metricName_display + ": " + f4.format(bestScore) + ") ***",2);
println("*** Old lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
currLambda[c_best] = bestLambdaVal;
finalScore[j] = bestScore;
println("*** New lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
println("",2);
} else {
println("*** Not changing any weight in lambda[j=" + j + "] ***",2);
println("*** lambda[j=" + j + "]: " + lambdaToString(currLambda) + " ***",2);
println("",2);
break; // exit while (true) loop
}
if (oneModificationPerIteration) { break; } // exit while (true) loop
} // while (true)
// now currLambda is the optimized weight vector on the current candidate list
// (corresponding to initialLambda)
System.arraycopy(currLambda,1,finalLambda,1,numParams);
normalizeLambda(finalLambda);
// check if a lambda is outside its threshold range
for (int c = 1; c <= numParams; ++c) {
if (finalLambda[c] < minThValue[c] || finalLambda[c] > maxThValue[c]) {
println("Warning: after normalization, final lambda[j=" + j + "][" + c + "]="
+ f4.format(finalLambda[c]) + " is outside its critical value range.",2);
}
}
println("Final lambda[j=" + j + "]: " + lambdaToString(finalLambda),1);
println("(Final " + metricName_display + "[j=" + j + "]: " + finalScore[j] + ")",1);
println("",1);
blocker.release();
}
public void run() {
try {
real_run();
} catch (Exception e) {
System.err.println("Exception in IntermediateOptimizer.run(): " + e.getMessage());
System.exit(99905);
}
if (!strToPrint.equals("")) {
threadOutput.add(strToPrint);
}
}
private void println(String str, int priority) { if (priority <= verbosity) println(str); }
private void print(String str, int priority) { if (priority <= verbosity) print(str); }
private void println(String str) { threadOutput.add(strToPrint + str); strToPrint = ""; }
private void print(String str) { strToPrint += str; }
private String lambdaToString(double[] lambdaA)
{
String retStr = "{";
for (int c = 1; c <= numParams-1; ++c) {
retStr += "" + lambdaA[c] + ", ";
}
retStr += "" + lambdaA[numParams] + "}";
return retStr;
}
}