package edu.berkeley.nlp.PCFGLA;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import edu.berkeley.nlp.PCFGLA.Corpus.TreeBankType;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.CommandLineUtils;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.PriorityQueue;
public class GrammarMerger {
/**
* @param args
*/
public static void main(String[] args) {
if (args.length < 1) {
System.out.println("usage: java GrammarMerger \n" +
"\t\t -i Input File for Grammar (Required)\n" +
"\t\t -o Output File for Merged Grammar (Required)\n"+
"\t\t -p Merging percentage (Default: 0.5)\n" +
"\t\t -2p Merging percentage for non-siblings (Default: 0.0)\n" +
"\t\t -top Keep top N substates, overrides -p!" +
" -path Path to Corpus (Default: null)\n" +
// " -lang Language: 1-ENG, 2-CHN, 3-GER, 4-ARB (Default: 1)\n" +
"\t\t -chsh If this is enabled, then we train on a short segment of\n" +
"\t\t the Chinese treebank (Default: false)" +
"\t\t -trfr The fraction of the training corpus to keep (Default: 1.0)\n" +
"\t\t -maxIt Maximum number of EM iterations (Default: 100)"+
"\t\t -minIt Minimum number of EM iterations (Default: 5)"+
"\t\t -f Filter rules with prob under f (Default: -1)"+
"\t\t -dL Delete labels? (true/false) (Default: false)"+
"\t\t -ent Use Entropic prior (Default: false)"+
"\t\t -maxL Maximum sentence length (Default: 10000)"+
"\t\t -sep Set merging threshold for grammar and lexicon separately (Default: false)"
);
System.exit(2);
}
// provide feedback on command-line arguments
System.out.print("Running with arguments: ");
for (String arg : args) {
System.out.print(" '"+arg+"'");
}
System.out.println("");
// parse the input arguments
Map<String, String> input = CommandLineUtils.simpleCommandLineParser(args);
double mergingPercentage = Double.parseDouble(CommandLineUtils.getValueOrUseDefault(input, "-p", "0.5"));
double mergingPercentage2 = Double.parseDouble(CommandLineUtils.getValueOrUseDefault(input, "-2p", "0.0"));
String outFileName = CommandLineUtils.getValueOrUseDefault(input, "-o", null);
String inFileName = CommandLineUtils.getValueOrUseDefault(input, "-i", null);
System.out.println("Loading grammar from " + inFileName + ".");
ParserData pData = ParserData.Load(inFileName);
if (pData == null) {
System.out.println("Failed to load grammar from file" + inFileName + ".");
System.exit(1);
}
int minIterations = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input,"-minIt","0"));
if (minIterations>0)
System.out.println("I will do at least "+minIterations+" iterations.");
boolean separateMerge = CommandLineUtils.getValueOrUseDefault(input, "-sep", "").equals("true");
int maxIterations = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input,"-maxIt","100"));
if (maxIterations>0)
System.out.println("But at most "+maxIterations+" iterations.");
boolean deleteLabels = CommandLineUtils.getValueOrUseDefault(input, "-dL", "").equals("true");
boolean useEntropicPrior = CommandLineUtils.getValueOrUseDefault(input, "-ent", "").equals("true");
int maxSentenceLength = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input, "-maxL", "10000"));
System.out.println("Will remove sentences with more than "+maxSentenceLength+" words.");
String path = CommandLineUtils.getValueOrUseDefault(input, "-path", null);
// int lang = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input, "-lang", "1"));
// System.out.println("Loading trees from "+path+" and using language "+lang);
boolean chineseShort = Boolean.parseBoolean(CommandLineUtils
.getValueOrUseDefault(input, "-chsh", "false"));
double trainingFractionToKeep = Double.parseDouble(CommandLineUtils
.getValueOrUseDefault(input, "-trfr", "1.0"));
Grammar grammar = pData.getGrammar();
Lexicon lexicon = pData.getLexicon();
Numberer.setNumberers(pData.getNumbs());
int h_markov = pData.h_markov;
int v_markov = pData.v_markov;
Binarization bin = pData.bin;
short[] numSubStatesArray = pData.numSubStatesArray;
Numberer tagNumberer = Numberer.getGlobalNumberer("tags");
double filter = Double.parseDouble(CommandLineUtils.getValueOrUseDefault(input, "-f","-1"));
if(filter>0) System.out.println("Will remove rules with prob under "+filter);
Corpus corpus = new Corpus(path,TreeBankType.WSJ,trainingFractionToKeep,false);
//int nTrees = corpus.getTrainTrees().size();
//binarize trees
List<Tree<String>> trainTrees = Corpus.binarizeAndFilterTrees(corpus
.getTrainTrees(), v_markov, h_markov, maxSentenceLength, bin, false, false);
List<Tree<String>> validationTrees = Corpus.binarizeAndFilterTrees(corpus
.getValidationTrees(), v_markov, h_markov, maxSentenceLength, bin, false, false);
int nTrees = trainTrees.size();
System.out.println("There are "+nTrees+" trees in the training set.");
StateSetTreeList trainStateSetTrees = new StateSetTreeList(trainTrees, numSubStatesArray, false, tagNumberer);
StateSetTreeList validationStateSetTrees = new StateSetTreeList(validationTrees, numSubStatesArray, false, tagNumberer);
// get rid of the old trees
// trainTrees = null;
// validationTrees = null;
// corpus = null;
// System.gc();
// System.out.println("before merging, we have split trees:");
// for (int i=0; i<grammar.numStates; i++) {
// System.out.println(grammar.splitTrees[i]);
// }
//
double[][] mergeWeights = computeMergeWeights(grammar,lexicon,trainStateSetTrees);
double[][][] deltas = computeDeltas(grammar, lexicon, mergeWeights, trainStateSetTrees);
boolean[][][] mergeThesePairs = determineMergePairs(deltas,separateMerge,mergingPercentage,grammar);
Grammar newGrammar = doTheMerges(grammar, lexicon, mergeThesePairs, mergeWeights);
printMergingStatistics(grammar, newGrammar);
short[] newNumSubStatesArray = newGrammar.numSubStates;
trainStateSetTrees = new StateSetTreeList(trainTrees, newNumSubStatesArray, false, tagNumberer);
validationStateSetTrees = new StateSetTreeList(validationTrees, newNumSubStatesArray, false, tagNumberer);
// retrain lexicon to finish the lexicon merge (updates the unknown words model)...
System.out.println("completing lexicon merge");
ArrayParser newParser = new ArrayParser(newGrammar, lexicon);
SophisticatedLexicon newLexicon = new SophisticatedLexicon(newNumSubStatesArray,SophisticatedLexicon.DEFAULT_SMOOTHING_CUTOFF, lexicon.getSmoothingParams(), lexicon.getSmoother(),filter);
boolean updateOnlyLexicon = true;
double trainingLikelihood = GrammarTrainer.doOneEStep(newGrammar, lexicon, null, newLexicon, trainStateSetTrees, updateOnlyLexicon, 4 /*opts.rare*/);
// int n = 0;
// for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
// boolean secondHalf = (n++>nTrees/2.0);
// newParser.doInsideOutsideScores(stateSetTree,noSmoothing,debugOutput); // E Step
// double ll = stateSetTree.getLabel().getIScore(0);
// ll = Math.log(ll) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
// if (Double.isInfinite(ll)||Double.isNaN(ll)) {
// System.out.println("Training sentence "+n+" is given "+ll+" log likelihood!");
// GrammarTrainer.printBadLLReason(stateSetTree,lexicon);
// }
// else {
// //System.out.println("Training sentence "+n+" is good.");
// trainingLikelihood += ll;
// newLexicon.trainTree(stateSetTree, -1, lexicon, secondHalf,true);
// }
// }
System.out.println("The training LL is "+trainingLikelihood);
newLexicon.optimize();//Grammar.RandomInitializationType.INITIALIZE_WITH_SMALL_RANDOMIZATION); // M Step
// do 5 iterations of EM to clean things up
SophisticatedLexicon previousLexicon = null;
Grammar previousGrammar = null;
System.out.println("Doing some iterations of EM to clean things up...");
double maxLikelihood = Double.NEGATIVE_INFINITY;
int droppingIter = 0;
int iter = 0;
while ((droppingIter < 2)&& (iter<maxIterations)){
iter++;
previousLexicon = newLexicon;
previousGrammar = newGrammar;
boolean noSmoothing = false, debugOutput = false;
newParser = new ArrayParser(previousGrammar, previousLexicon);
newLexicon = new SophisticatedLexicon(newNumSubStatesArray,
SophisticatedLexicon.DEFAULT_SMOOTHING_CUTOFF, lexicon.getSmoothingParams(), lexicon.getSmoother(),filter);
newGrammar = new Grammar(newNumSubStatesArray, grammar.findClosedPaths,
grammar.smoother, grammar,filter);
if (useEntropicPrior) grammar.useEntropicPrior = true;
int n = 0;
trainingLikelihood=0;
for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
boolean secondHalf = (n++>nTrees/2.0);
newParser.doInsideOutsideScores(stateSetTree,noSmoothing,debugOutput); // E Step
double ll = stateSetTree.getLabel().getIScore(0);
ll = Math.log(ll) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
if (Double.isInfinite(ll)||Double.isNaN(ll)) {
System.out.println("Training sentence "+n+" is given "+ll+" log likelihood!");
GrammarTrainer.printBadLLReason(stateSetTree,previousLexicon);
}
else {
trainingLikelihood += ll;
newGrammar.tallyStateSetTree(stateSetTree, previousGrammar); // E Step
newLexicon.trainTree(stateSetTree, -1, previousLexicon, secondHalf,false,4 /*opts.rare*/);
}
}
System.out.println("The training LL is "+trainingLikelihood);
//SSIE
((SophisticatedLexicon) lexicon).overwriteWithMaxent();
newLexicon.optimize();//Grammar.RandomInitializationType.INITIALIZE_WITH_SMALL_RANDOMIZATION); // M Step
newGrammar.optimize(0);// Grammar.RandomInitializationType.INITIALIZE_WITH_SMALL_RANDOMIZATION); // M Step
newParser = new ArrayParser(newGrammar, newLexicon);
//System.out.println("Evaluating new grammar");
double validationLikelihood = 0;
n = 0;
for (Tree<StateSet> stateSetTree : validationStateSetTrees) {
n++;
newParser.doInsideScores(stateSetTree,false,false, null); // E Step
double ll = stateSetTree.getLabel().getIScore(0);
ll = Math.log(ll) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
if (Double.isInfinite(ll)||Double.isNaN(ll)) {
System.out.println("Validation sentence "+n+" is given -inf log likelihood!");
}
else validationLikelihood += ll; // there are for some reason some sentences that are unparsable
}
System.out.println("The validation LL after merging and "+(iter+1)+" iterations is "+validationLikelihood);
if (iter<minIterations){
maxLikelihood = Math.max(validationLikelihood,maxLikelihood);
grammar = newGrammar;
lexicon = newLexicon;
droppingIter = 0;
}
else if(validationLikelihood > maxLikelihood) {
maxLikelihood = validationLikelihood;
grammar = newGrammar;
lexicon = newLexicon;
droppingIter = 0;
}
else { droppingIter++; }
if (iter>0 && iter%5==0){
pData = new ParserData(newLexicon, newGrammar, null, Numberer.getNumberers(), newNumSubStatesArray, v_markov, h_markov, bin);
System.out.println("Saving grammar to "+outFileName+"-it-"+iter+".");
System.out.println("It gives a validation data log likelihood of: "+maxLikelihood);
if (pData.Save(outFileName+"-it-"+iter)) System.out.println("Saving successful");
else System.out.println("Saving failed!");
pData = null;
}
}
System.out.println("Saving grammar to "+outFileName+".");
System.out.println("It gives a validation data log likelihood of: "+maxLikelihood);
// for (int i=0; i<grammar.numStates; i++){
// if (grammar.numSubStates[i]!=lexicon.numSubStates[i])
// System.out.println("DISAGREEMENT: The grammar thinks that state "+i+" is split into "+grammar.numSubStates[i]+" substates, while the lexicon thinks "+lexicon.numSubStates[i]);
// }
ParserData newPData = new ParserData(lexicon, grammar, null, Numberer.getNumberers(), newNumSubStatesArray, v_markov, h_markov, bin);
if (newPData.Save(outFileName)) System.out.println("Saving successful.");
else System.out.println("Saving failed!");
System.exit(0);
}
/**
* @param grammar
* @param newGrammar
*/
public static void printMergingStatistics(Grammar grammar, Grammar newGrammar) {
PriorityQueue<String> lexiconStates = new PriorityQueue<String>();
PriorityQueue<String> grammarStates = new PriorityQueue<String>();
short[] numSubStatesArray = grammar.numSubStates;
short[] newNumSubStatesArray = newGrammar.numSubStates;
Numberer tagNumberer = grammar.tagNumberer;
for (short state=0; state<numSubStatesArray.length; state++) {
System.out.print("\nState "+tagNumberer.object(state)+" had "+numSubStatesArray[state]+" substates and now has "+newNumSubStatesArray[state]+".");
if (!grammar.isGrammarTag(state)){
lexiconStates.add((String)tagNumberer.object(state),newNumSubStatesArray[state]);
}
else {
grammarStates.add((String)tagNumberer.object(state),newNumSubStatesArray[state]);
}
}
System.out.print("\n");
System.out.println("Lexicon: "+lexiconStates.toString());
System.out.println("Grammar: "+grammarStates.toString());
// System.out.println("after merging, we have split trees:");
// for (int i=0; i<grammar.numStates; i++) {
// System.out.println(grammar.splitTrees[i]);
// }
}
/**
* This function was written to have the ability to also merge non-sibling pairs,
* however this functionality is not used anymore since it seemed tricky to determine
* an appropriate threshold for merging non-siblings.
* The function returns a new grammar object and changes the lexicon in place!
* @param grammar
* @param newGrammar
* @param lexicon
* @param mergeThesePairs
*/
public static Grammar doTheMerges(Grammar grammar, Lexicon lexicon, boolean[][][] mergeThesePairs, double[][] mergeWeights) {
short[] numSubStatesArray = grammar.numSubStates;
short[] newNumSubStatesArray = grammar.numSubStates;
Grammar newGrammar = null;
while (true) {
// we want to continue as long as there's something to merge
boolean somethingToMerge = false;
for (int tag=0; tag<numSubStatesArray.length; tag++) {
for (int i=0; i<newNumSubStatesArray[tag]; i++) {
for (int j=0; j<newNumSubStatesArray[tag]; j++) {
somethingToMerge = somethingToMerge || mergeThesePairs[tag][i][j];
}
}
}
if (!somethingToMerge)
break;
/** mergeThisIteration is which states to merge on this iteration through the loop */
boolean[][][] mergeThisIteration = new boolean[newNumSubStatesArray.length][][];
// make mergeThisIteration a copy of mergeTheseStates
for (int tag=0; tag<numSubStatesArray.length; tag++) {
mergeThisIteration[tag] = new boolean[mergeThesePairs[tag].length][mergeThesePairs[tag].length];
for (int i=0; i<mergeThesePairs[tag].length; i++) {
for (int j=0; j<mergeThesePairs[tag].length; j++) {
mergeThisIteration[tag][i][j] = mergeThesePairs[tag][i][j];
}
}
}
// delete all complicated merges from mergeThisIteration
for (int tag=0; tag<numSubStatesArray.length; tag++) {
boolean[] alreadyDecidedToMerge = new boolean[mergeThesePairs[tag].length];
for (int i=0; i<mergeThesePairs[tag].length; i++) {
for (int j=0; j<mergeThesePairs[tag].length; j++) {
if (alreadyDecidedToMerge[i] || alreadyDecidedToMerge[j])
mergeThisIteration[tag][i][j] = false;
alreadyDecidedToMerge[i] = alreadyDecidedToMerge[i] || mergeThesePairs[tag][i][j];
alreadyDecidedToMerge[j] = alreadyDecidedToMerge[j] || mergeThesePairs[tag][i][j];
}
}
}
// remove merges in mergeThisIteration from mergeThesePairs
for (int tag=0; tag<numSubStatesArray.length; tag++) {
for (int i=0; i<mergeThesePairs[tag].length; i++) {
for (int j=0; j<mergeThesePairs[tag].length; j++) {
mergeThesePairs[tag][i][j] = mergeThesePairs[tag][i][j] && !mergeThisIteration[tag][i][j];
}
}
}
// System.out.println("\nDoing one merge iteration.");
// for (short state=0; state<numSubStatesArray.length; state++) {
// System.out.print("\n State "+grammar.tagNumberer.object(state));
// for (int i=0; i<mergeThisIteration[state].length; i++){
// for (int j=i+1; j<mergeThisIteration[state][i].length; j++){
// if (mergeThisIteration[state][i][j]) System.out.print(". Merging pair ("+i+","+j+")");
// }
// }
// }
newGrammar = grammar.mergeStates(mergeThisIteration, mergeWeights);
lexicon.mergeStates(mergeThisIteration,mergeWeights);
//fix merge weights
grammar.fixMergeWeightsEtc(mergeThesePairs,mergeWeights,mergeThisIteration);
grammar = newGrammar;
newNumSubStatesArray = grammar.numSubStates;
}
grammar.makeCRArrays();
return grammar;
}
/**
* @param grammar
* @param lexicon
* @param mergeWeights
* @param trainStateSetTrees
* @return
*/
public static double[][][] computeDeltas(Grammar grammar, Lexicon lexicon, double[][] mergeWeights, StateSetTreeList trainStateSetTrees) {
ArrayParser parser = new ArrayParser(grammar, lexicon);
double[][][] deltas = new double[grammar.numSubStates.length][mergeWeights[0].length][mergeWeights[0].length];
boolean noSmoothing = false, debugOutput = false;
for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
parser.doInsideOutsideScores(stateSetTree,noSmoothing,debugOutput); // E Step
double ll = stateSetTree.getLabel().getIScore(0);
ll = Math.log(ll) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
if (!Double.isInfinite(ll))
grammar.tallyMergeScores(stateSetTree, deltas, mergeWeights);
}
return deltas;
}
/**
* @param grammar
* @param lexicon
* @param trainStateSetTrees
* @return
*/
public static double[][] computeMergeWeights(Grammar grammar, Lexicon lexicon, StateSetTreeList trainStateSetTrees) {
double[][] mergeWeights = new double[grammar.numSubStates.length][(int)ArrayUtil.max(grammar.numSubStates)];
double trainingLikelihood = 0;
ArrayParser parser = new ArrayParser(grammar, lexicon);
boolean noSmoothing = false, debugOutput = false;
int n = 0;
for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
parser.doInsideOutsideScores(stateSetTree,noSmoothing,debugOutput); // E Step
double ll = stateSetTree.getLabel().getIScore(0);
ll = Math.log(ll) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
if (Double.isInfinite(ll)) {
System.out.println("Training sentence "+n+" is given -inf log likelihood!");
}
else {
trainingLikelihood += ll; // there are for some reason some sentences that are unparsable
grammar.tallyMergeWeights(stateSetTree, mergeWeights);
}
n++;
}
System.out.println("The trainings LL before merging is "+trainingLikelihood);
// normalize the weights
grammar.normalizeMergeWeights(mergeWeights);
return mergeWeights;
}
/**
* @param deltas
* @return
*/
public static boolean[][][] determineMergePairs(double[][][] deltas, boolean separateMerge, double mergingPercentage, Grammar grammar) {
boolean[][][] mergeThesePairs = new boolean[grammar.numSubStates.length][][];
short[] numSubStatesArray = grammar.numSubStates;
// set the threshold so that p percent of the splits are merged again.
ArrayList<Double> deltaSiblings = new ArrayList<Double>();
ArrayList<Double> deltaPairs = new ArrayList<Double>();
ArrayList<Double> deltaLexicon = new ArrayList<Double>();
ArrayList<Double> deltaGrammar = new ArrayList<Double>();
int nSiblings = 0, nPairs = 0, nSiblingsGr = 0, nSiblingsLex=0;
for (int state=0; state<mergeThesePairs.length; state++) {
for (int sub1=0; sub1<numSubStatesArray[state]-1; sub1++) {
if (sub1%2==0 && deltas[state][sub1][sub1+1]!=0){
deltaSiblings.add(deltas[state][sub1][sub1+1]);
if (separateMerge){
if (grammar.isGrammarTag(state)) {
deltaGrammar.add(deltas[state][sub1][sub1+1]);
nSiblingsGr++;
}
else {
deltaLexicon.add(deltas[state][sub1][sub1+1]);
nSiblingsLex++;
}
}
nSiblings++;
}
for (int sub2=sub1+1; sub2<numSubStatesArray[state]; sub2++) {
if (!(sub2!=sub1+1 && sub1%2!=0) && deltas[state][sub1][sub2]!=0){
deltaPairs.add(deltas[state][sub1][sub2]);
nPairs++;
}
}
}
}
double threshold = -1, threshold2 = -1, thresholdGr = -1, thresholdLex = -1;
if (separateMerge){
System.out.println("Going to merge "+(int)(mergingPercentage*100)+"% of the substates siblings.");
System.out.println("Setting the merging threshold for lexicon and grammar separately.");
Collections.sort(deltaGrammar);
Collections.sort(deltaLexicon);
thresholdGr = deltaGrammar.get((int)(nSiblingsGr*mergingPercentage));
thresholdLex = deltaLexicon.get((int)(nSiblingsLex*mergingPercentage*1.5));
System.out.println("Setting the threshold for lexical siblings to "+thresholdLex);
System.out.println("Setting the threshold for grammatical siblings to "+thresholdGr);
}
else{
// String topNmerge = CommandLineUtils.getValueOrUseDefault(input, "-top", "");
// Collections.sort(deltaPairs);
// System.out.println(deltaPairs);
Collections.sort(deltaSiblings);
// if (topNmerge.equals("")) {
System.out.println("Going to merge "+(int)(mergingPercentage*100)+"% of the substates siblings.");
// System.out.println("Furthermore "+(int)(mergingPercentage2*100)+"% of the non-siblings will be merged.");
threshold = deltaSiblings.get((int)(nSiblings*mergingPercentage));
// if (maxSubStates>2 && mergingPercentage2>0) threshold2 = deltaPairs.get((int)(nPairs*mergingPercentage2));
// } else {
// int top = Integer.parseInt(topNmerge);
// System.out.println("Keeping the top "+top+" substates.");
// threshold = deltaSiblings.get(nPairs-top);
// }
System.out.println("Setting the threshold for siblings to "+threshold+".");
}
// if (maxSubStates>2 && mergingPercentage2>0) System.out.println("Setting the threshold for other pairs to "+threshold2);
int mergePair = 0, mergeSiblings = 0;
for (int state=0; state<mergeThesePairs.length; state++) {
mergeThesePairs[state] = new boolean[numSubStatesArray[state]][numSubStatesArray[state]];
for (int i=0; i<numSubStatesArray[state]-1; i++) {
if (i%2==0 && deltas[state][i][i+1]!=0){
if (separateMerge){
if (grammar.isGrammarTag(state)) mergeThesePairs[state][i][i+1] = deltas[state][i][i+1] <= thresholdGr;
else mergeThesePairs[state][i][i+1] = deltas[state][i][i+1] <= thresholdLex;
} else mergeThesePairs[state][i][i+1] = deltas[state][i][i+1] <= threshold;
if (mergeThesePairs[state][i][i+1]) {
mergeSiblings++;
}
}
// if (mergingPercentage2>0) {
// for (int j=i+1; j<numSubStatesArray[state]; j++) {
// if (!(j!=i+1 && i%2!=0) && deltas[state][i][j]!=0 && deltas[state][i][j] <= threshold2){
// mergeThesePairs[state][i][j] = true;
// mergePair++;
// System.out.println("Merging pair ("+i+","+j+") of state "+tagNumberer.object(state));
// }
// }
// }
}
}
System.out.println("Merging "+mergeSiblings+" siblings and "+mergePair+" other pairs.");
for (short state=0; state<deltas.length; state++) {
System.out.print("State "+grammar.tagNumberer.object(state));
for (int i=0; i<numSubStatesArray[state]; i++){
for (int j=i+1; j<numSubStatesArray[state]; j++){
if (mergeThesePairs[state][i][j]) System.out.print(". Merging pair ("+i+","+j+") at cost "+deltas[state][i][j]);
}
}
System.out.print(".\n");
}
return mergeThesePairs;
}
}