package joshua.decoder.chart_parser; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import joshua.corpus.vocab.SymbolTable; import joshua.decoder.ff.tm.Grammar; import joshua.decoder.ff.tm.Rule; import joshua.decoder.segment_file.ConstraintRule; import joshua.decoder.segment_file.ConstraintSpan; /** * @author Zhifei Li, <zhifei.work@gmail.com> * @version $LastChangedDate: 2009-12-31 11:37:41 -0500 (星期四, 31 十二月 2009) $ */ public class ManualConstraintsHandler { // TODO: each span only has one ConstraintSpan // contain spans that have LHS or RHS constraints (they are always hard) private HashMap<String,ConstraintSpan> constraintSpansForFiltering; // contain spans that have hard "rule" constraint; key: start_span; value: end_span private ArrayList<Span> spansWithHardRuleConstraint; private SymbolTable symbolTable; private Chart chart; private Grammar grammarForConstructManualRule; private static final Logger logger = Logger.getLogger(ManualConstraintsHandler.class.getName()); public ManualConstraintsHandler(SymbolTable symbolTable, Chart chart, Grammar grammarForConstructManualRule, List<ConstraintSpan> constraintSpans){ this.symbolTable = symbolTable; this.chart = chart; this.grammarForConstructManualRule = grammarForConstructManualRule; initialize(constraintSpans); } private void initialize(List<ConstraintSpan> constraintSpans){ /** Note that manual constraints or OOV handling is not part of seeding * */ /** * (1) add manual rule (only allow flat rules) into the * chart as constraints * (2) add RHS or LHS constraint into * constraintSpansForFiltering * (3) add span signature into setOfSpansWithHardRuleConstraint; if the span contains a hard "RULE" constraint */ if (null != constraintSpans) { for (ConstraintSpan cSpan : constraintSpans) { if (null != cSpan.rules()) { boolean shouldAdd = false; // contain LHS or RHS constraints? for (ConstraintRule cRule : cSpan.rules()) { /** Note that LHS and RHS constraints are always hard, * while Rule constraint can be soft or hard **/ switch (cRule.type()){ case RULE: //== prepare the feature scores //TODO: this require the input always specify the right number of features float[] featureScores = new float[cRule.features().length]; for (int i = 0; i < featureScores.length; i++) { if (cSpan.isHard()) { featureScores[i] = 0; // force the feature cost as zero } else { featureScores[i] = cRule.features()[i]; } } /**If the RULE constraint is hard, then we should filter all out all consituents (within this span), * which are contructed from regular grammar*/ if (cSpan.isHard()) { if (null == this.spansWithHardRuleConstraint) { this.spansWithHardRuleConstraint = new ArrayList<Span>(); } this.spansWithHardRuleConstraint.add(new Span(cSpan.start(), cSpan.end())); } int arity = 0; // only allow flat rule (i.e. arity=0) Rule rule = this.grammarForConstructManualRule.constructManualRule( symbolTable.addNonterminal(cRule.lhs()), symbolTable.addTerminals(cRule.foreignRhs()), symbolTable.addTerminals(cRule.nativeRhs()), featureScores, arity); //add to the chart chart.addAxiom(cSpan.start(), cSpan.end(), rule, new SourcePath()); if (logger.isLoggable(Level.INFO)) logger.info("Adding RULE constraint for span " + cSpan.start() + ", " + cSpan.end() + "; isHard=" + cSpan.isHard() +rule.getLHS()); break; default: shouldAdd = true; } } if (shouldAdd) { if (logger.isLoggable(Level.INFO)) logger.info("Adding LHS or RHS constraint for span " + cSpan.start() + ", " + cSpan.end()); if (null == this.constraintSpansForFiltering) { this.constraintSpansForFiltering = new HashMap<String, ConstraintSpan>(); } this.constraintSpansForFiltering.put(getSpanSignature(cSpan.start(), cSpan.end()), cSpan); } } } } } // =============================================================== // Manual constraint annotation methods and classes // =============================================================== /** * if there are any LHS or RHS constraints for a span, then * all the applicable grammar rules in that span will have * to pass the filter. */ public List<Rule> filterRules(int i, int j, List<Rule> rulesIn) { if (null == this.constraintSpansForFiltering) return rulesIn; ConstraintSpan cSpan = this.constraintSpansForFiltering.get( getSpanSignature(i,j)); if (null == cSpan) { // no filtering return rulesIn; } else { List<Rule> rulesOut = new ArrayList<Rule>(); for (Rule gRule : rulesIn) { //gRule will survive, if any constraint (LHS or RHS) lets it survive for (ConstraintRule cRule : cSpan.rules()) { if (shouldSurvive(cRule, gRule)) { rulesOut.add(gRule); break; } } } return rulesOut; } } /**should we filter out the gRule * based on the manually provided constraint cRule*/ public boolean shouldSurvive(ConstraintRule cRule, Rule gRule) { switch (cRule.type()) { case LHS: return (gRule.getLHS() == this.symbolTable.addNonterminal(cRule.lhs())); case RHS: int[] targetWords = this.symbolTable.addTerminals(cRule.nativeRhs()); if (targetWords.length != gRule.getEnglish().length) return false; for (int t = 0; t < targetWords.length; t++) { if (targetWords[t] != gRule.getEnglish()[t]) return false; } return true; default: // not surviving return false; } } /** * if a span is *within* the coverage of a *hard* rule constraint, * then this span will be only allowed to use the mannual rules */ public boolean containHardRuleConstraint(int startSpan, int endSpan) { if (null != this.spansWithHardRuleConstraint) { for (Span span : this.spansWithHardRuleConstraint) { if (startSpan >= span.startPos && endSpan <= span.endPos) return true; } } return false; } private String getSpanSignature(int i, int j) { return i + " " + j; } private static class Span { int startPos; int endPos; public Span(int startPos, int endPos) { this.startPos = startPos; this.endPos = endPos; } } }