/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.decoder.ff.tm; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.logging.Logger; import joshua.corpus.vocab.SymbolTable; import joshua.decoder.ff.FeatureFunction; /** * this class implements MonolingualRule * * @author Zhifei Li, <zhifei.work@gmail.com> * @version $LastChangedDate: 2010-02-10 09:59:38 -0600 (Wed, 10 Feb 2010) $ */ public class MonolingualRule implements Rule { private static final Logger logger = Logger.getLogger(MonolingualRule.class.getName()); //=============================================================== // Instance Fields //=============================================================== /* The string format of Rule is: * [Phrase] ||| french ||| english ||| feature scores */ private int ruleID; private int lhs; // tag of this rule private int[] pFrench; //pointer to the RuleCollection, as all the rules under it share the same Source side private int arity; private float[] featScores; // the feature scores for this rule /* a feature function will be fired for this rule * only if the owner of the rule matches the owner of the feature function */ private int owner; // TODO: consider remove this from the general class, and // create a new specific Rule class private float latticeCost; /** * estimate_cost depends on rule itself: statelesscost + * transition_cost(non-stateless/non-contexual* models), * we need this variable in order to provide sorting for * cube-pruning */ private float est_cost = 0; //=============================================================== // Static Fields //=============================================================== // TODO: Ideally, we shouldn't have to have dummy rule IDs // and dummy owners. How can this need be eliminated? public static final int DUMMY_RULE_ID = 1; public static final int DUMMY_OWNER = 1; //=============================================================== // Constructors //=============================================================== /** * Constructs a new rule using the provided parameters. The * owner and rule id for this rule are undefined. * * @param lhs Left-hand side of the rule. * @param sourceRhs Source language right-hand side of the rule. * @param featureScores Feature value scores for the rule. * @param arity Number of nonterminals in the source language * right-hand side. * @param owner * @param latticeCost * @param ruleID */ public MonolingualRule(int lhs, int[] sourceRhs, float[] featureScores, int arity, int owner, float latticeCost, int ruleID) { this.lhs = lhs; this.pFrench = sourceRhs; this.featScores = featureScores; this.arity = arity; this.latticeCost = latticeCost; this.ruleID = ruleID; this.owner = owner; } // called by class who does not care about lattice_cost, // rule_id, and owner public MonolingualRule(int lhs_, int[] source_rhs, float[] feature_scores, int arity_) { this.lhs = lhs_; this.pFrench = source_rhs; this.featScores = feature_scores; this.arity = arity_; //==== dummy values this.latticeCost = 0; this.ruleID = DUMMY_RULE_ID; this.owner = DUMMY_OWNER; } //=============================================================== // Attributes //=============================================================== public final void setRuleID(int id) { this.ruleID = id; } public final int getRuleID() { return this.ruleID; } public final void setArity(int arity) { this.arity = arity; } public final int getArity() { return this.arity; } public final void setOwner(int owner) { this.owner = owner; } public final int getOwner() { return this.owner; } public final void setLHS(int lhs) { this.lhs = lhs; } public final int getLHS() { return this.lhs; } public void setEnglish(int[] eng) { //TODO: do nothing } public int[] getEnglish() { //TODO return null; } public final void setFrench(int[] french) { this.pFrench = french; } public final int[] getFrench() { return this.pFrench; } public final void setFeatureScores(float[] scores) { this.featScores = scores; } public final float[] getFeatureScores() { return this.featScores; } public final void setLatticeCost(float cost) { this.latticeCost = cost; } public final float getLatticeCost() { return this.latticeCost; } public final float getEstCost() { if (est_cost <= Double.NEGATIVE_INFINITY) { logger.warning("The est cost is neg infinity; must be bad rule; rule is:\n" + toString()); } return est_cost; } /** * Set a lower-bound estimate inside the rule returns full * estimate. */ public final float estimateRuleCost(List<FeatureFunction> featureFunctions) { if (null == featureFunctions) { return 0; } else { float estcost = 0.0f; for (FeatureFunction ff : featureFunctions) { double mdcost = - ff.estimateLogP(this, -1) * ff.getWeight(); estcost += mdcost; } this.est_cost = estcost; return estcost; } } //=============================================================== // Methods //=============================================================== public float incrementFeatureScore(int column, double score) { synchronized(this) { featScores[column] += score; return featScores[column]; } } public void setFeatureCost(int column, float score) { synchronized(this) { featScores[column] = score; } } public float getFeatureCost(int column) { synchronized(this) { return featScores[column]; } } //=============================================================== // Serialization Methods //=============================================================== // BUG: These are all far too redundant. Should be refactored to share. // Caching this method significantly improves performance // We mark it transient because it is, though cf // java.io.Serializable private transient String cachedToString = null; @Deprecated public String toString(Map<Integer,String> ntVocab, SymbolTable sourceVocab, SymbolTable targetVocab) { if (null == this.cachedToString) { StringBuffer sb = new StringBuffer(); sb.append(ntVocab.get(this.lhs)); sb.append(" ||| "); sb.append(sourceVocab.getWords(this.pFrench,true)); sb.append(" |||"); for (int i = 0; i < this.featScores.length; i++) { //sb.append(String.format(" %.4f", this.feat_scores[i])); sb.append(' ').append(Float.toString(this.featScores[i])); } this.cachedToString = sb.toString(); } return this.cachedToString; } //print the rule in terms of Ingeters @Deprecated public String toString() { if (null == this.cachedToString) { StringBuffer sb = new StringBuffer(); sb.append(this.lhs); sb.append(" ||| "); sb.append(Arrays.toString(this.pFrench)); sb.append(" |||"); for (int i = 0; i < this.featScores.length; i++) { sb.append(String.format(" %.4f", this.featScores[i])); } this.cachedToString = sb.toString(); } return this.cachedToString; } //do not use cachedToString @Deprecated public String toString(SymbolTable symbolTable) { StringBuffer sb = new StringBuffer(); sb.append(symbolTable.getWord(this.lhs)); sb.append(" ||| "); sb.append(symbolTable.getWords(this.pFrench)); sb.append(" |||"); for (int i = 0; i < this.featScores.length; i++) { sb.append(String.format(" %.4f", this.featScores[i])); } return sb.toString(); } @Deprecated public String toStringWithoutFeatScores(SymbolTable symbolTable) { StringBuffer sb = new StringBuffer(); if(symbolTable==null) sb.append(this.getLHS()); else sb.append(symbolTable.getWord(this.getLHS())); return sb.append(" ||| ") .append(convertToString(this.getFrench(), symbolTable)) .toString(); } public String convertToString(int[] words, SymbolTable symbolTable){ StringBuffer sb = new StringBuffer(); for (int i = 0; i < words.length; i++) { if(symbolTable!=null) sb.append( symbolTable.getWord(words[i]) ); else sb.append(words[i]); if(i<words.length-1) sb.append(" "); } return sb.toString(); } }