/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder.ff.tm;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.ff.FeatureFunction;
/**
* Grammar is a class for wrapping a trie of TrieGrammar in order
* to store holistic metadata.
*
* @author wren ng thornton <wren@users.sourceforge.net>
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2010-01-23 11:15:03 -0600 (Sat, 23 Jan 2010) $
*/
public interface Grammar {
/**
* Gets the root of the <code>Trie</code> backing this
* grammar.
* <p>
* <em>Note</em>: This method should run as a small
* constant-time function.
*
* @return the root of the <code>Trie</code> backing this
* grammar
*/
Trie getTrieRoot();
/**
* After calling this method, the rules in this grammar are
* guaranteed to be sorted based on the latest feature
* function values.
* <p>
* Cube-pruning requires that the grammar be sorted based
* on the latest feature functions.
*
* @param models List of feature functions
*/
void sortGrammar(List<FeatureFunction> models);
/**
* Determines whether the rules in this grammar have been
* sorted based on the latest feature function values.
* <p>
* This method is needed for the cube-pruning algorithm.
*
* @return <code>true</code> if the rules in this grammar
* have been sorted based on the latest feature
* function values, <code>false</code> otherwise
*/
boolean isSorted();
/**
* Returns whether this grammar has any valid rules for
* covering a particular span of a sentence. Heiro's "glue"
* grammar will only say True if the span is longer than
* our span limit, and is anchored at startIndex==0. Heiro's
* "regular" grammar will only say True if the span is less
* than the span limit. Other grammars, e.g. for rule-based
* systems, may have different behaviors.
*
* @param startIndex Indicates the starting index
* of a phrase in a source input phrase,
* or a starting node identifier
* in a source input lattice
* @param endIndex Indicates the ending index
* of a phrase in a source input phrase,
* or an ending node identifier
* in a source input lattice
* @param pathLength Length of the input path in a source input lattice.
* If a source input phrase is used instead of a lattice,
* this value will likely be ignored by the underlying implementation,
* but would normally be defined as <code>endIndex-startIndex</code>
*/
boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength);
/**
* Gets the number of rules stored in the grammar.
*
* @return the number of rules stored in the grammar
*/
int getNumRules();
/**
* Construct an out-of-vocabulary (OOV) rule for the word
* source. Only called when creating oov rule in Chart or
* DiskHypergraph, all the transition cost for phrase model,
* arity penalty, word penalty are all zero, except the LM
* cost or the first feature if no LM feature is used.
*
* TODO: will try to get rid of owner, have_lm_model, and num_feats
*/
Rule constructOOVRule(int num_feats, int sourceWord, int targetWord, boolean have_lm_model);
/**
* Gets the integer identifier of this grammar's out-of-vocabulary
* (OOV) rule.
*
* @return the integer identifier of this grammar's
* out-of-vocabulary (OOV) rule
*/
int getOOVRuleID();
/**
* This is used to construct a manual rule supported from
* outside the grammar, but the owner should be the same
* as the grammar. Rule ID will the same as OOVRuleId, and
* no lattice cost
*/
Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int aritity);
void writeGrammarOnDisk(String file, SymbolTable symbolTable);
void changeGrammarCosts(Map<String, Double> weightTbl, HashMap<String, Integer> featureMap, double[] scores, String prefix, int column, boolean negate);
void obtainRulesIDTable(Map<String, Integer> rulesIDTable, SymbolTable symbolTable);
}