package joshua.decoder.ff.state_maintenance; import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; import joshua.corpus.vocab.SymbolTable; import joshua.decoder.chart_parser.SourcePath; import joshua.decoder.ff.tm.Rule; import joshua.decoder.hypergraph.HGNode; public class NgramStateComputer implements StateComputer<NgramDPState> { private SymbolTable symbolTable; private int ngramOrder; private int stateID; private static final Logger logger = Logger.getLogger(NgramStateComputer.class.getName()); /**SateID should be any integer except -1 * */ public NgramStateComputer(SymbolTable symbolTable, int nGramOrder, int stateID){ this.symbolTable = symbolTable; this.ngramOrder = nGramOrder; this.stateID = stateID; logger.info("NgramStateComputer: stateID=" + stateID + "; ngramOrder=" + this.ngramOrder); } public int getStateID() { return stateID; } public void setStateID(int stateID) { this.stateID = stateID; } public NgramDPState computeFinalState(HGNode antNode, int spanStart, int spanEnd, SourcePath srcPath) { //no state is required return null; } public NgramDPState computeState(Rule rule, List<HGNode> antNodes, int spanStart, int spanEnd, SourcePath srcPath){ List<Integer> leftStateSequence = new ArrayList<Integer>(); List<Integer> currentNgram = new ArrayList<Integer>(); int hypLen = 0; int[] enWords = rule.getEnglish(); for (int c = 0; c < enWords.length; c++) { int curID = enWords[c]; if (symbolTable.isNonterminal(curID)) { //== get left- and right-context int index = symbolTable.getTargetNonterminalIndex(curID); NgramDPState antState = (NgramDPState)antNodes.get(index).getDPState(this.getStateID());//TODO List<Integer> leftContext = antState.getLeftLMStateWords(); List<Integer> rightContext = antState.getRightLMStateWords(); if (leftContext.size() != rightContext.size()) { throw new RuntimeException("NgramStateComputer.computeState: left and right contexts have unequal lengths"); } //================ left context for (int i = 0; i < leftContext.size(); i++) { int t = leftContext.get(i); currentNgram.add(t); //always calculate cost for <bo>: additional backoff weight /* if (t == BACKOFF_LEFT_LM_STATE_SYM_ID) { int numAdditionalBackoffWeight = currentNgram.size() - (i+1);//number of non-state words //compute additional backoff weight transitionCost -= this.lmGrammar.logProbOfBackoffState(currentNgram, currentNgram.size(), numAdditionalBackoffWeight); if (currentNgram.size() == this.ngramOrder) { currentNgram.remove(0); } } else */if (currentNgram.size() == this.ngramOrder) { // compute the current word probablity, and remove it //transitionCost -= this.lmGrammar.ngramLogProbability(currentNgram, this.ngramOrder); currentNgram.remove(0); } if (leftStateSequence.size() < this.ngramOrder - 1) { leftStateSequence.add(t); } } //================ right context //note: left_state_org_wrds will never take words from right context because it is either duplicate or out of range //also, we will never score the right context probablity because they are either duplicate or partional ngram int tSize = currentNgram.size(); for (int i = 0; i < rightContext.size(); i++) { // replace context currentNgram.set(tSize - rightContext.size() + i, rightContext.get(i)); } } else {//terminal words hypLen++; currentNgram.add(curID); if (currentNgram.size() == this.ngramOrder) { // compute the current word probablity, and remove it //transitionCost -= this.lmGrammar.ngramLogProbability(currentNgram, this.ngramOrder); currentNgram.remove(0); } if (leftStateSequence.size() < this.ngramOrder - 1) { leftStateSequence.add(curID); } } } //===== get left euquiv state //double[] lmLeftCost = new double[2]; //int[] equivLeftState = this.lmGrammar.leftEquivalentState(Support.subIntArray(leftLMStateWrds, 0, leftLMStateWrds.size()), this.ngramOrder, lmLeftCost); //===== trabsition and estimate cost //transitionCost += lmLeftCost[0];//add finalized cost for the left state words // left and right should always have the same size List<Integer> rightStateSequence = currentNgram; if(leftStateSequence.size() > rightStateSequence.size()){ throw new RuntimeException("left has a bigger size right; " + "; left=" + leftStateSequence.size() + "; right="+rightStateSequence.size() ); } while(rightStateSequence.size()>leftStateSequence.size()){ rightStateSequence.remove(0);//TODO: speed up } return new NgramDPState(leftStateSequence, rightStateSequence); } }