JoshuaDecoder.java example

Explorer
relax-decode-master
- third-party
/* This file is part of the Joshua Machine Translation System.
 *
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */
package joshua.decoder;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import joshua.corpus.Corpus;
import joshua.corpus.alignment.Alignments;
import joshua.corpus.alignment.mm.MemoryMappedAlignmentGrids;
import joshua.corpus.mm.MemoryMappedCorpusArray;
import joshua.corpus.suffix_array.ParallelCorpusGrammarFactory;
import joshua.corpus.suffix_array.Suffixes;
import joshua.corpus.suffix_array.mm.MemoryMappedSuffixArray;
import joshua.corpus.vocab.BuildinSymbol;
import joshua.corpus.vocab.SrilmSymbol;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;
import joshua.decoder.ff.ArityPhrasePenaltyFF;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.PhraseModelFF;
import joshua.decoder.ff.SourcePathFF;
import joshua.decoder.ff.WordPenaltyFF;
import joshua.decoder.ff.lm.LanguageModelFF;
import joshua.decoder.ff.lm.NGramLanguageModel;
import joshua.decoder.ff.lm.bloomfilter_lm.BloomFilterLanguageModel;
import joshua.decoder.ff.lm.buildin_lm.LMGrammarJAVA;
import joshua.decoder.ff.lm.buildin_lm.TrieLM;
import joshua.decoder.ff.lm.distributed_lm.LMGrammarRemote;
import joshua.decoder.ff.lm.srilm.LMGrammarSRILM;
import joshua.decoder.ff.state_maintenance.NgramStateComputer;
import joshua.decoder.ff.state_maintenance.StateComputer;
import joshua.decoder.ff.tm.Grammar;
import joshua.decoder.ff.tm.GrammarFactory;
import joshua.decoder.ff.tm.hiero.MemoryBasedBatchGrammar;
import joshua.discriminative.DiscriminativeSupport;
import joshua.discriminative.feature_related.feature_function.BLEUOracleModel;
import joshua.discriminative.feature_related.feature_function.FeatureTemplateBasedFF;
import joshua.ui.hypergraph_visualizer.HyperGraphViewer;
import joshua.util.FileUtility;
import joshua.util.Regex;
import joshua.util.io.BinaryIn;
import joshua.util.io.LineReader;

/**
 * Implements decoder initialization,
 * including interaction with <code>JoshuaConfiguration</code>
 * and <code>DecoderThread</code>.
 *
 * @author Zhifei Li, <zhifei.work@gmail.com>
 * @author wren ng thornton <wren@users.sourceforge.net>
 * @author Lane Schwartz <dowobeha@users.sourceforge.net>
 * @version $LastChangedDate: 2010-02-03 09:20:31 -0600 (Wed, 03 Feb 2010) $
 */
public class JoshuaDecoder {
	/*
	 * Many of these objects themselves are global objects. We
	 * pass them in when constructing other objects, so that
	 * they all share pointers to the same object. This is good
	 * because it reduces overhead, but it can be problematic
	 * because of unseen dependencies (for example, in the
	 * SymbolTable shared by language model, translation grammar,
	 * etc).
	 */
	/** The DecoderFactory is the main thread of decoding */
	private DecoderFactory             decoderFactory;
	private List<GrammarFactory>  grammarFactories;
	private ArrayList<FeatureFunction> featureFunctions;
	private NGramLanguageModel         languageModel;
	
	private List<StateComputer> stateComputers;
	
	private Map<String,Integer> ruleStringToIDTable;
	
	/**
	 * Shared symbol table for source language terminals, target
	 * language terminals, and shared nonterminals.
	 */
	private SymbolTable                symbolTable;
	
	/** Logger for this class. */
	private static final Logger logger =
		Logger.getLogger(JoshuaDecoder.class.getName());
	
//===============================================================
// Constructors
//===============================================================

	/**
	 * Constructs a new decoder using the specified configuration
	 * file.
	 *
	 * @param configFile Name of configuration file.
	 */
	public JoshuaDecoder(String configFile) {
		this();
		this.initialize(configFile);
	}
	
	/** 
	 * Constructs an uninitialized decoder for use in testing.
	 * <p>
	 * This method is private because it should only ever be
	 * called by the {@link #getUninitalizedDecoder()} method
	 * to provide an uninitialized decoder for use in testing.
	 */
	private JoshuaDecoder() {
		this.grammarFactories = new ArrayList<GrammarFactory>();
	}
	
	/** 
	 * Gets an uninitialized decoder for use in testing.
	 * <p>
	 * This method is called by unit tests or any outside packages (e.g., MERT) 
	 * relying on the decoder.
	 */
	static public JoshuaDecoder getUninitalizedDecoder() {
		return new JoshuaDecoder();
	}
	
//===============================================================
// Public Methods
//===============================================================
	
	public void changeBaselineFeatureWeights(double[] weights){
		changeFeatureWeightVector(weights, null);
	} 
	
	public void changeDiscrminativeModelOnly(String discrminativeModelFile) {
		changeFeatureWeightVector(null, discrminativeModelFile);
	}
	
	/** 
	 * Sets the feature weight values used by the decoder.
	 * <p>
	 * This method assumes that the order of the provided weights
	 * is the same as their order in the decoder's configuration
	 * file.
	 * 
	 * @param weights Feature weight values
	 */
	public void changeFeatureWeightVector(double[] weights, String discrminativeModelFile) {
		if(weights!=null){
			if (this.featureFunctions.size() != weights.length) {
				throw new IllegalArgumentException("number of weights does not match number of feature functions");
			}
			
			int i = 0; 
			for (FeatureFunction ff : this.featureFunctions) {
				double oldWeight = ff.getWeight();
				ff.setWeight(weights[i]);
				logger.info("Feature function : " +	ff.getClass().getSimpleName() 
						+	"; weight changed from " + oldWeight + " to " + ff.getWeight());
				i++; 
			}
		}
		
		if(discrminativeModelFile!=null)
			changeDiscrminativeModelWeights(discrminativeModelFile);
		
		//FIXME: this works for Batch grammar only; not for sentence-specific grammars
		for (GrammarFactory grammarFactory : this.grammarFactories) {
//			if (grammarFactory instanceof Grammar) {
			grammarFactory.getGrammarForSentence(null)
				.sortGrammar(this.featureFunctions);
//			}
		}
	}
	
	

	private void changeDiscrminativeModelWeights(String discrminativeModelFile){
		for (FeatureFunction ff : this.featureFunctions) {					
			//== set the discriminative model
			if(discrminativeModelFile!=null && ff instanceof FeatureTemplateBasedFF){
				HashMap<String, Double> modelTable = new HashMap<String, Double>(); 
				DiscriminativeSupport.loadModel(discrminativeModelFile, modelTable, this.ruleStringToIDTable);
				((FeatureTemplateBasedFF) ff).setModel(modelTable);
			}
		}
	}
	
	
	/** 
	 * Decode a whole test set. This may be parallel.
	 *
	 * @param testFile
	 * @param nbestFile
	 * @param oracleFile
	 */
	public void decodeTestSet(String testFile, String nbestFile, String oracleFile) throws IOException {
		this.decoderFactory.decodeTestSet(testFile, nbestFile, oracleFile);
	}
	
	public void decodeTestSet(String testFile, String nbestFile) {
		this.decoderFactory.decodeTestSet(testFile, nbestFile, null);
	}
	
	
	/** Decode a sentence. This must be non-parallel. */
	public void decodeSentence(String testSentence, String[] nbests) {
		//TODO
	}
	
	
	public void cleanUp() {
		//TODO
		//this.languageModel.end_lm_grammar(); //end the threads
	}
	
	public void visualizeHyperGraphForSentence(String sentence)
	{
		HyperGraphViewer.visualizeHypergraphInFrame(this.decoderFactory.getHyperGraphForSentence(sentence), this.symbolTable);
	}
	
	
	public static void writeConfigFile(double[] newWeights, String template, String outputFile, String newDiscriminativeModel) {
		try {
			int columnID = 0;
			
			BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
			LineReader     reader = new LineReader(template);
			try { for (String line : reader) {
				line = line.trim();
				if (Regex.commentOrEmptyLine.matches(line)
				|| line.indexOf("=") != -1) {
					//comment, empty line, or parameter lines: just copy
					writer.write(line);
					writer.newLine();
					
				} else { //models: replace the weight
					String[] fds = Regex.spaces.split(line);
					StringBuffer newSent = new StringBuffer();
					if (! Regex.floatingNumber.matches(fds[fds.length-1])) {
						throw new IllegalArgumentException("last field is not a number; the field is: " + fds[fds.length-1]);
					}
					
					if(newDiscriminativeModel!=null && "discriminative".equals(fds[0])){
						newSent.append(fds[0]).append(' ');
						newSent.append(newDiscriminativeModel).append(' ');//change the file name
						for (int i = 2; i < fds.length-1; i++) {
							newSent.append(fds[i]).append(' ');
						}
					}else{//regular
						for (int i = 0; i < fds.length-1; i++) {
							newSent.append(fds[i]).append(' ');
						}
					}
					if(newWeights!=null)
						newSent.append(newWeights[columnID++]);//change the weight
					else
						newSent.append(fds[fds.length-1]);//do not change
					
					writer.write(newSent.toString());
					writer.newLine();
				}
			} } finally {
				reader.close();
				writer.close();
			}
			
			if (newWeights!=null && columnID != newWeights.length) {
				throw new IllegalArgumentException("number of models does not match number of weights");
			}
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	
	
//===============================================================
// Initialization Methods
//===============================================================
	
	/** 
	 * Initialize all parts of the JoshuaDecoder.
	 *
	 * @param configFile File containing configuration options
	 * @return An initialized decoder
	 */
	public JoshuaDecoder initialize(String configFile) {
		try {
			JoshuaConfiguration.readConfigFile(configFile);

			if (JoshuaConfiguration.tm_file != null) {

				//TODO: should not use file suffix to decide which kind of grammar we are using
				if (JoshuaConfiguration.tm_file.endsWith(".josh")) {
					
					try {
						// Use corpus-based grammar
						//inside getParallelCorpus, we will initialize symboltable, lm, and feature functions
						ParallelCorpusGrammarFactory parallelCorpus = getParallelCorpus(configFile);							
						grammarFactories.add(parallelCorpus);
						
					} catch (Exception e) {
						IOException ioe = new IOException("Error reading suffix array grammar.");
						ioe.initCause(e);
						throw ioe;
					}

				} else {

					// Sets: symbolTable, defaultNonterminals
					//symbol table may grow on the fly during decoding
					this.initializeSymbolTable(null);

					// Needs: symbolTable; Sets: languageModel
					if (JoshuaConfiguration.have_lm_model)
						initializeLanguageModel();


					// initialize and load grammar
					this.initializeGlueGrammar();					
					this.initializeMainTranslationGrammar();
					
//					 Initialize the features: requires that
					// LM model has been initialized. If an LM
					// feature is used, need to read config file
					// again
					this.initializeFeatureFunctions(configFile);
					
					this.initializeStateComputers(symbolTable, JoshuaConfiguration.lmOrder, JoshuaConfiguration.ngramStateID);

				}
			} else {
				throw new RuntimeException("No translation grammar or suffix array grammar was specified.");
			}
			
						
			// Sort the TM grammars (needed to do cube pruning)
			for (GrammarFactory grammarFactory : this.grammarFactories) {
				if (grammarFactory instanceof Grammar) {
					Grammar batchGrammar = (Grammar) grammarFactory;
					batchGrammar.sortGrammar(this.featureFunctions);
				}
			}
			
			
			this.decoderFactory = new DecoderFactory(
				this.grammarFactories,
				JoshuaConfiguration.have_lm_model,
				this.featureFunctions,
				this.stateComputers,
				this.symbolTable);
			
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return this;
	}
	
	// TODO: maybe move to JoshuaConfiguration to enable moving the featureFunction parsing there (Sets: symbolTable, defaultNonterminals)
	private void initializeSymbolTable(SymbolTable existingSymbols) {
		if (JoshuaConfiguration.use_remote_lm_server) {
			if (null == existingSymbols) {
				// Within the decoder, we assume BuildinSymbol when using the remote LM
				this.symbolTable = new BuildinSymbol(JoshuaConfiguration.remote_symbol_tbl);
			} else {
				this.symbolTable = existingSymbols;
			}
		} else if (JoshuaConfiguration.use_srilm) {
			logger.finest("Using SRILM symbol table");
			if (null == existingSymbols) {
				this.symbolTable = new SrilmSymbol(JoshuaConfiguration.lmOrder);
			} else {
				logger.finest("Populating SRILM symbol table with symbols from existing symbol table");
				this.symbolTable = new SrilmSymbol(existingSymbols, JoshuaConfiguration.lmOrder);
			}
		} else {
			if (null == existingSymbols) {
				//this.symbolTable = new Vocabulary();//new BuildinSymbol(null);
				this.symbolTable = new BuildinSymbol();
			} else {
				this.symbolTable = existingSymbols;
			}
		}
		
		// Add the default nonterminal
		this.symbolTable.addNonterminal(JoshuaConfiguration.default_non_terminal);
	}
	
	
	// TODO: maybe move to JoshuaConfiguration to enable moving the featureFunction parsing there (Needs: symbolTable; Sets: languageModel)
	// TODO: check we actually have a feature that requires a language model
	private void initializeLanguageModel() throws IOException {
		// BUG: All these different boolean LM fields should just be an enum.
		// FIXME: And we should check only once for the default (which supports left/right equivalent state) vs everything else (which doesn't)
		// TODO: maybe have a special exception type for BadConfigfileException instead of using IllegalArgumentException?
		
		if (JoshuaConfiguration.use_remote_lm_server) {
			if (JoshuaConfiguration.use_left_equivalent_state
			|| JoshuaConfiguration.use_right_equivalent_state) {
				throw new IllegalArgumentException("using remote LM, we cannot use suffix/prefix stuff");
			}
			this.languageModel = new LMGrammarRemote(
				this.symbolTable,
				JoshuaConfiguration.lmOrder,
				JoshuaConfiguration.f_remote_server_list,
				JoshuaConfiguration.num_remote_lm_servers);
			
		} else if (JoshuaConfiguration.use_srilm) {
			if (JoshuaConfiguration.use_left_equivalent_state
			|| JoshuaConfiguration.use_right_equivalent_state) {
				throw new IllegalArgumentException("using SRILM, we cannot use suffix/prefix stuff");
			}
			this.languageModel = new LMGrammarSRILM(
				(SrilmSymbol)this.symbolTable,
				JoshuaConfiguration.lmOrder,
				JoshuaConfiguration.lm_file);
			
		} else if (JoshuaConfiguration.use_bloomfilter_lm) {
			if (JoshuaConfiguration.use_left_equivalent_state
			|| JoshuaConfiguration.use_right_equivalent_state) {
				throw new IllegalArgumentException("using Bloomfilter LM, we cannot use suffix/prefix stuff");
			}
			this.languageModel = new BloomFilterLanguageModel(
					this.symbolTable,
					JoshuaConfiguration.lmOrder,
					JoshuaConfiguration.lm_file);
		} else if (JoshuaConfiguration.use_trie_lm) {
			if (JoshuaConfiguration.use_left_equivalent_state
					|| JoshuaConfiguration.use_right_equivalent_state) {
						throw new IllegalArgumentException("using Trie LM, we cannot use suffix/prefix stuff");
					}
					this.languageModel = new TrieLM(
							this.symbolTable,
							JoshuaConfiguration.lm_file);
				} else {
			
//			logger.info("Reading language model from " + JoshuaConfiguration.lm_file + " into internal trie");
//			this.languageModel = new TrieLM(
//					new ArpaFile(
//							JoshuaConfiguration.lm_file,
//							this.symbolTable
//					));
			
			// using the built-in JAVA implementation of LM, may not be as scalable as SRILM
			this.languageModel = new LMGrammarJAVA(
				this.symbolTable,
				JoshuaConfiguration.lmOrder,
				JoshuaConfiguration.lm_file,
				JoshuaConfiguration.use_left_equivalent_state,
				JoshuaConfiguration.use_right_equivalent_state);
		}
	}
	
	
	private void initializeGlueGrammar() throws IOException {
		logger.info("Constructing glue grammar...");
		
		MemoryBasedBatchGrammar gr = new MemoryBasedBatchGrammar(
				JoshuaConfiguration.glue_format,
				JoshuaConfiguration.glue_file,
				this.symbolTable,
				JoshuaConfiguration.glue_owner,
				JoshuaConfiguration.default_non_terminal,
				-1,
				JoshuaConfiguration.oovFeatureCost);
		
		this.grammarFactories.add(gr);
		
		if(JoshuaConfiguration.useRuleIDName){
			if(this.ruleStringToIDTable==null)
				this.ruleStringToIDTable = new HashMap<String,Integer>();
			gr.obtainRulesIDTable(this.ruleStringToIDTable, this.symbolTable);			
		}
		
	}
	
	
	private void initializeMainTranslationGrammar() throws IOException {
				
		if (logger.isLoggable(Level.INFO))
			logger.info("Using grammar read from file " + JoshuaConfiguration.tm_file);
		
		MemoryBasedBatchGrammar gr = new MemoryBasedBatchGrammar(
				JoshuaConfiguration.tm_format,
				JoshuaConfiguration.tm_file,
				this.symbolTable,
				JoshuaConfiguration.phrase_owner,
				JoshuaConfiguration.default_non_terminal,
				JoshuaConfiguration.span_limit,
				JoshuaConfiguration.oovFeatureCost);
		this.grammarFactories.add(gr);
		
		if(JoshuaConfiguration.useRuleIDName){
			if(this.ruleStringToIDTable==null)
				this.ruleStringToIDTable = new HashMap<String,Integer>();
			gr.obtainRulesIDTable(this.ruleStringToIDTable, this.symbolTable);			
		}
	}
	
	
	private ParallelCorpusGrammarFactory getParallelCorpus(String configFile)
	throws IOException, ClassNotFoundException {
		
		int maxCacheSize = JoshuaConfiguration.sa_rule_cache_size;
		
		String binaryVocabFileName =
			JoshuaConfiguration.tm_file + 
			File.separator + "common.vocab";
		
		String binarySourceCorpusFileName =
			JoshuaConfiguration.tm_file + 
			File.separator + "source.corpus";
		
		String binarySourceSuffixesFileName =
			JoshuaConfiguration.tm_file + 
			File.separator + "source.suffixes";
		
		String binaryTargetCorpusFileName =
			JoshuaConfiguration.tm_file + 
			File.separator + "target.corpus";
		
		String binaryTargetSuffixesFileName =
			JoshuaConfiguration.tm_file + 
			File.separator + "target.suffixes";
		
		{	// Load the symbol table from disk
			// Keep this code in its own block
			//      to ensure that this symbol table is not
			//      accidentally used anywhere.
			
			if (logger.isLoggable(Level.INFO))
				logger.info("Reading common vocabulary from " + 
						binaryVocabFileName);
			Vocabulary commonVocab = new Vocabulary();
			commonVocab.readExternal(
					BinaryIn.vocabulary(binaryVocabFileName));

			// Initialize symbol table using suffix array's vocab
			this.initializeSymbolTable(commonVocab);
		}

		initializeGlueGrammar();
		
		// Needs: symbolTable; Sets: languageModel
		if (JoshuaConfiguration.have_lm_model) 
			initializeLanguageModel();

		// Initialize the features: requires that
		// LM model has been initialized. If an LM
		// feature is used, need to read config file
		// again
		this.initializeFeatureFunctions(configFile);
		
		this.initializeStateComputers(symbolTable, JoshuaConfiguration.lmOrder, JoshuaConfiguration.ngramStateID);
		
		if (logger.isLoggable(Level.INFO))
			logger.info("Reading source language corpus from " +
				binarySourceCorpusFileName);
		Corpus sourceCorpusArray =
			new MemoryMappedCorpusArray(
				this.symbolTable, binarySourceCorpusFileName);
		
		
		if (logger.isLoggable(Level.INFO))
			logger.info("Reading source language suffix array from " +
				binarySourceSuffixesFileName);
		Suffixes sourceSuffixArray =
			new MemoryMappedSuffixArray(
					binarySourceSuffixesFileName,
					sourceCorpusArray,
					maxCacheSize);

		
		if (logger.isLoggable(Level.INFO))
			logger.info("Reading target language corpus from " +
				binaryTargetCorpusFileName);
		Corpus targetCorpusArray =
			new MemoryMappedCorpusArray(
				this.symbolTable, binaryTargetCorpusFileName);
		
		if (logger.isLoggable(Level.INFO))
			logger.info("Reading target language suffix array from " +
				binaryTargetSuffixesFileName);
		Suffixes targetSuffixArray =
			new MemoryMappedSuffixArray(
					binaryTargetSuffixesFileName,
					targetCorpusArray,
					maxCacheSize);
		
		String binaryAlignmentFileName = 
			JoshuaConfiguration.tm_file + 
			File.separator + "alignment.grids";
		if (logger.isLoggable(Level.INFO))
			logger.info("Reading alignment grid data from " +
				binaryAlignmentFileName);
		Alignments alignments =
			new MemoryMappedAlignmentGrids(
					binaryAlignmentFileName,
					sourceCorpusArray,
					targetCorpusArray);
				
		// Finally, add the parallel corpus that will serve as a grammar
		ParallelCorpusGrammarFactory parallelCorpus = new ParallelCorpusGrammarFactory(
				sourceSuffixArray,
				targetSuffixArray,
				alignments,
				this.featureFunctions,
				JoshuaConfiguration.sa_rule_sample_size,
				JoshuaConfiguration.sa_max_phrase_span,
				JoshuaConfiguration.sa_max_phrase_length,
				JoshuaConfiguration.sa_max_nonterminals,
				JoshuaConfiguration.sa_min_nonterminal_span,
				JoshuaConfiguration.sa_lex_floor_prob, 
				JoshuaConfiguration.phrase_owner, JoshuaConfiguration.default_non_terminal, JoshuaConfiguration.oovFeatureCost);
		
		return parallelCorpus;
	}
	
	private void initializeStateComputers(SymbolTable symbolTable, int nGramOrder, int ngramStateID){
		stateComputers = new ArrayList<StateComputer>();
		StateComputer ngramStateComputer = new NgramStateComputer(symbolTable, nGramOrder, ngramStateID);
		stateComputers.add(ngramStateComputer);
	}
	
	// BUG: why are we re-reading the configFile? JoshuaConfiguration should do this. (Needs: languageModel, symbolTable, (logger?); Sets: featureFunctions)
	private void initializeFeatureFunctions(String configFile)
	throws IOException {
		this.featureFunctions = new ArrayList<FeatureFunction>();
		
		LineReader reader = new LineReader(configFile);
		try { for (String line : reader) {
			line = line.trim();
			if (Regex.commentOrEmptyLine.matches(line)) 
				continue;
			
			if (line.indexOf("=") == -1) { // ignore lines with "="
				String[] fds = Regex.spaces.split(line);
				
				if ("lm".equals(fds[0]) && fds.length == 2) { // lm weight
					if (null == this.languageModel) {
						throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
					}
					double weight = Double.parseDouble(fds[1].trim());
					this.featureFunctions.add(
						new LanguageModelFF(
							JoshuaConfiguration.ngramStateID,	
							this.featureFunctions.size(),
							JoshuaConfiguration.lmOrder,
							this.symbolTable, this.languageModel, weight));
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Line: %s\nAdd LM, order: %d; weight: %.3f;",
							line, JoshuaConfiguration.lmOrder, weight));
					
				} else if ("oracle".equals(fds[0]) && fds.length >= 3) { //oracle files weight
					if (null == this.languageModel) {
						throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
					}					
					String[] referenceFiles = new String[fds.length-2];
					for(int i=0; i< referenceFiles.length; i++)
						referenceFiles[i] =  fds[i+1].trim();			
					double weight = Double.parseDouble(fds[fds.length-1].trim());
					
					this.featureFunctions.add(
						new BLEUOracleModel(JoshuaConfiguration.ngramStateID, JoshuaConfiguration.lmOrder, 
								this.featureFunctions.size(), this.symbolTable, weight, referenceFiles, JoshuaConfiguration.linearCorpusGainThetas));
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Line: %s\nAdd BLEUOracleModel, order: %d; weight: %.3f;",
							line, JoshuaConfiguration.lmOrder, weight));
					
				} else if ("discriminative".equals(fds[0]) && fds.length == 3) { //discriminative weight modelFile
					if (null == this.languageModel) {
						throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
					}
					
					String featureFile = null;//TODO???????					
					String modelFile = fds[1].trim();
					
					double weight = Double.parseDouble(fds[2].trim());
					
					this.featureFunctions.add (DiscriminativeSupport.setupRerankingFeature(this.featureFunctions.size(), weight, symbolTable, 
							JoshuaConfiguration.useTMFeat, JoshuaConfiguration.useLMFeat, JoshuaConfiguration.useEdgeNgramOnly, JoshuaConfiguration.useTMTargetFeat,
							JoshuaConfiguration.useMicroTMFeat, JoshuaConfiguration.wordMapFile,
							JoshuaConfiguration.ngramStateID, 
							JoshuaConfiguration.lmOrder, JoshuaConfiguration.startNgramOrder, JoshuaConfiguration.endNgramOrder, featureFile, modelFile, this.ruleStringToIDTable) );
					
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Line: %s\nAdd FeatureTemplateBasedFF, order: %d; weight: %.3f;",
							line, JoshuaConfiguration.lmOrder, weight));
					
				} else if ("latticecost".equals(fds[0]) && fds.length == 2) {
					double weight = Double.parseDouble(fds[1].trim());
					this.featureFunctions.add(
						new SourcePathFF(
							this.featureFunctions.size(), weight));
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Line: %s\nAdd Source lattice cost, weight: %.3f",
							line, weight));
					
				} else if ("phrasemodel".equals(fds[0]) && fds.length == 4) { // phrasemodel owner column(0-indexed) weight
					int    owner  = this.symbolTable.addTerminal(fds[1]);
					int    column = Integer.parseInt(fds[2].trim());
					double weight = Double.parseDouble(fds[3].trim());
					this.featureFunctions.add(
						new PhraseModelFF(
							this.featureFunctions.size(),
							weight, owner, column));
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Process Line: %s\nAdd PhraseModel, owner: %s; column: %d; weight: %.3f",
							line, owner, column, weight));
					
				} else if ("arityphrasepenalty".equals(fds[0]) && fds.length == 5) { // arityphrasepenalty owner start_arity end_arity weight
					int owner      = this.symbolTable.addTerminal(fds[1]);
					int startArity = Integer.parseInt(fds[2].trim());
					int endArity   = Integer.parseInt(fds[3].trim());
					double weight  = Double.parseDouble(fds[4].trim());
					this.featureFunctions.add(
						new ArityPhrasePenaltyFF(
							this.featureFunctions.size(),
							weight, owner, startArity, endArity));
					
					if (logger.isLoggable(Level.INFO))
						logger.finest(String.format(
							"Process Line: %s\nAdd ArityPhrasePenalty, owner: %s; startArity: %d; endArity: %d; weight: %.3f",
							line, owner, startArity, endArity, weight));
					
				} else if ("wordpenalty".equals(fds[0]) && fds.length == 2) { // wordpenalty weight
					double weight = Double.parseDouble(fds[1].trim());
					this.featureFunctions.add(
						new WordPenaltyFF(
							this.featureFunctions.size(), weight));
					if (logger.isLoggable(Level.FINEST))
						logger.finest(String.format(
							"Process Line: %s\nAdd WordPenalty, weight: %.3f",
							line, weight));
					
				} else {
					throw new IllegalArgumentException("Wrong config line: " + line);
				}
			}
		} } finally {
			reader.close();
		}
	}
	
	
//===============================================================
// Main
//===============================================================
	public static void main(String[] args) throws IOException {
		logger.finest("Starting decoder");
		
		long startTime = 0;
		if (logger.isLoggable(Level.INFO)) {
			startTime = System.currentTimeMillis();
		}
		
		if (args.length != 3 && args.length != 4) {
			System.out.println("Usage: java " +
				JoshuaDecoder.class.getName() +
				" configFile testFile outputFile (oracleFile)");
			
			System.out.println("num of args is " + args.length);
			for (int i = 0; i < args.length; i++) {
				System.out.println("arg is: " + args[i]);
			}
			System.exit(1);
		}
		String configFile = args[0].trim();
		String testFile   = args[1].trim();
		String nbestFile  = args[2].trim();
		String oracleFile = (4 == args.length ? args[3].trim() : null);
		
		
		/* Step-1: initialize the decoder, test-set independent */
		JoshuaDecoder decoder = new JoshuaDecoder(configFile);
		if (logger.isLoggable(Level.INFO)) {
			logger.info("Before translation, loading time is "
				+ ((double)(System.currentTimeMillis() - startTime) / 1000.0)
				+ " seconds");
		}
		
		
		/* Step-2: Decoding */
		decoder.decodeTestSet(testFile, nbestFile, oracleFile);
		
		
		/* Step-3: clean up */
		decoder.cleanUp();
		if (logger.isLoggable(Level.INFO)) {
			logger.info("Total running time is "
				+ ((double)(System.currentTimeMillis() - startTime) / 1000.0)
				+ " seconds");
		}
	}
}