AbstractPhrase.java example

Explorer
relax-decode-master
- third-party
/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */
package joshua.corpus;

import joshua.corpus.vocab.SymbolTable;


/**
 * This class provides a skeletal implementation of the base methods
 * likely to be common to most or all implementations of the
 * <code>Phrase</code> interface.
 * 
 * @author Lane Schwartz
 * @author Chris Callison-Burch
 */
public abstract class AbstractPhrase implements Phrase {

	//===============================================================
	// Constants
	//===============================================================

	/** seed used in hash code generation */
	public static final int HASH_SEED = 17;

	/** offset used in has code generation */
	public static final int HASH_OFFSET = 37;
	
	/**
	 * Splits a sentence (on white space), then looks up the
	 * integer representations of each word using the supplied
	 * symbol table.
	 * 
	 * @param sentence   White-space separated String of words.
	 * @param vocabulary Symbol table for mapping tokens to
	 *                   integers.
	 * @return Array of integers corresponding to the words in
	 *         the sentence.
	 */
	protected int[] splitSentence(String sentence, SymbolTable vocabulary) {
		String[] w      = sentence.split("\\s+");
		int[] words      = new int[w.length];
		for (int i = 0; i < w.length; i++)
			words[i] = vocabulary.addTerminal(w[i]);
		return words;
	}
	
	/**
	 * Uses the standard java approach of calculating hashCode.
	 * Start with a seed, add in every value multiplying the
	 * exsiting hash times an offset.
	 *
	 * @return int hashCode for the list
	 */
	public int hashCode() {
		int result = HASH_SEED;
		for (int i=0; i < size(); i++) {
			result = HASH_OFFSET*result + getWordID(i);
		}
		return result;
	}
	

	/**
	 * Two phrases are their word IDs are the same. Note that
	 * this could give a false positive if their Vocabularies
	 * were different but their IDs were somehow the same.
	 */
	public boolean equals(Object o) {
		
		if (o instanceof Phrase) {
			Phrase other = (Phrase) o;
			
			if(this.size() != other.size()) return false;
			for (int i=0; i < size(); i++) {
				if(this.getWordID(i) != other.getWordID(i)) return false;
			}
			return true;
		} else {
			return false;
		}
		
	}
	
	
	/**
	 * Compares the two strings based on the lexicographic order
	 * of words defined in the Vocabulary.
	 *
	 * @param other the object to compare to
	 * @return -1 if this object is less than the parameter, 0
	 *         if equals, 1 if greater
	 * @exception ClassCastException if the passed object is
	 *                               not of type Phrase
	 */
	public int compareTo(Phrase other) {
		int length = size();
		int otherLength = other.size();
		for (int i = 0; i < length; i++) {
			if (i < otherLength) {
				int difference = getWordID(i) - other.getWordID(i);
				if (difference != 0) return difference;
			} else {
				//same but other is shorter, so we are after
				return 1;
			}
		}
		if (length < otherLength) {
			return -1;
		} else {
			return 0;
		}
	}
	
	
	
	/**
	 * Returns a string representation of the phrase.
	 *
	 * @return a space-delimited string of the words in the
	 *         phrase.
	 */
	public String toString() {
		SymbolTable vocab = getVocab();
		StringBuffer buf = new StringBuffer();
        for (int i=0; i<size(); i++) {
			String word = vocab.getWord(getWordID(i));
			if (i != 0) buf.append(' ');
            buf.append(word);
        }
        return buf.toString();
	}
	
}