GrammarReader.java example

Explorer
relax-decode-master
- third-party
package joshua.decoder.ff.tm;

import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;

import joshua.corpus.vocab.SymbolTable;
import joshua.util.io.LineReader;

/**
 * This is a base class for simple, ASCII line-based grammars that
 * are stored on disk.
 * 
 * @author Juri Ganitkevitch
 * 
 */
public abstract class GrammarReader<R extends Rule>
implements Iterable<R>, Iterator<R> {

	protected static String fieldDelimiter;
	protected static String nonTerminalRegEx;
	protected static String nonTerminalCleanRegEx;

	protected static String description;

	protected SymbolTable symbolTable;

	protected String fileName;
	protected LineReader reader;
	protected String lookAhead;
		
	private static final Logger logger = Logger
			.getLogger(GrammarReader.class.getName());

	// dummy constructor for
	public GrammarReader() {
		this.symbolTable = null;
		this.fileName = null;
	}
	
	public GrammarReader(String fileName, SymbolTable symbolTable) {
		this.fileName = fileName;
		this.symbolTable = symbolTable;
	}

	public void initialize() {
		try {
			this.reader = new LineReader(fileName);
		} catch (IOException e) {
			throw new RuntimeException(
				"Error opening translation model file: " + fileName
				+ (null != e.getMessage()
					? e.getMessage()
					: "No details available. Sorry."), e);
		}
		
		advanceReader();
	}

	// the reader is the iterator itself
	public Iterator<R> iterator() {
		return this;
	}

	/** Unsupported Iterator method. */
	public void remove() throws UnsupportedOperationException {
		throw new UnsupportedOperationException();
	}
	
	
	public void close() {
		if (null != this.reader) {
			try {
				this.reader.close();
			} catch (IOException e) {
				// FIXME: is this the right logging level?
				if (logger.isLoggable(Level.WARNING))
					logger.info("Error closing grammar file stream: "
							+ this.fileName);
			}
			this.reader = null;
		}
	}
	
	
	/**
	 * For correct behavior <code>close</code> must be called
	 * on every GrammarReader, however this code attempts to
	 * avoid resource leaks.
	 *
	 * @see joshua.util.io.LineReader
	 */
	protected void finalize() throws Throwable {
		logger.severe("Grammar file stream was not closed, this indicates a coding error: " + this.fileName);
		
		this.close();
		super.finalize();
	}
	
	
	public boolean hasNext() {
		return lookAhead != null;
	}
	
	
	private void advanceReader() {
		try {
			lookAhead = reader.readLine();
		} catch (IOException e) {			
			logger.severe("Error reading grammar from file: " + fileName);
		}
		if (lookAhead == null && reader != null) {
			this.close();
		}
	}

	public R next() {
		String line = lookAhead;
		advanceReader();
		return parseLine(line);
	}

	protected abstract R parseLine(String line);
		
	// TODO: keep these around or not?
	public abstract String toWords(R rule);
	public abstract String toWordsWithoutFeatureScores(R rule);

	public abstract String toTokenIds(R rule);
	public abstract String toTokenIdsWithoutFeatureScores(R rule);
	
	public int cleanNonTerminal(int tokenID) {
		// cleans NT of any markup, e.g., [X,1] may becomes [X], depending 
		return symbolTable.addNonterminal(
				cleanNonTerminal(symbolTable.getWord(tokenID)));
	}

	public String cleanNonTerminal(String word) {
		// cleans NT of any markup,  e.g., [X,1] may becomes [X], depending on nonTerminalCleanRegEx
		return word.replaceAll(nonTerminalCleanRegEx, "");
	}

	public static boolean isNonTerminal(final String word) {
		// checks if word matches NT regex
		return word.matches(nonTerminalRegEx);
	}

	public String getNonTerminalRegEx() {
		return nonTerminalRegEx;
	}

	public String getNonTerminalCleanRegEx() {
		return nonTerminalCleanRegEx;
	}

}