/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.corpus; import joshua.corpus.vocab.SymbolTable; /** * This class provides a skeletal implementation of the base methods * likely to be common to most or all implementations of the * <code>Phrase</code> interface. * * @author Lane Schwartz * @author Chris Callison-Burch */ public abstract class AbstractPhrase implements Phrase { //=============================================================== // Constants //=============================================================== /** seed used in hash code generation */ public static final int HASH_SEED = 17; /** offset used in has code generation */ public static final int HASH_OFFSET = 37; /** * Splits a sentence (on white space), then looks up the * integer representations of each word using the supplied * symbol table. * * @param sentence White-space separated String of words. * @param vocabulary Symbol table for mapping tokens to * integers. * @return Array of integers corresponding to the words in * the sentence. */ protected int[] splitSentence(String sentence, SymbolTable vocabulary) { String[] w = sentence.split("\\s+"); int[] words = new int[w.length]; for (int i = 0; i < w.length; i++) words[i] = vocabulary.addTerminal(w[i]); return words; } /** * Uses the standard java approach of calculating hashCode. * Start with a seed, add in every value multiplying the * exsiting hash times an offset. * * @return int hashCode for the list */ public int hashCode() { int result = HASH_SEED; for (int i=0; i < size(); i++) { result = HASH_OFFSET*result + getWordID(i); } return result; } /** * Two phrases are their word IDs are the same. Note that * this could give a false positive if their Vocabularies * were different but their IDs were somehow the same. */ public boolean equals(Object o) { if (o instanceof Phrase) { Phrase other = (Phrase) o; if(this.size() != other.size()) return false; for (int i=0; i < size(); i++) { if(this.getWordID(i) != other.getWordID(i)) return false; } return true; } else { return false; } } /** * Compares the two strings based on the lexicographic order * of words defined in the Vocabulary. * * @param other the object to compare to * @return -1 if this object is less than the parameter, 0 * if equals, 1 if greater * @exception ClassCastException if the passed object is * not of type Phrase */ public int compareTo(Phrase other) { int length = size(); int otherLength = other.size(); for (int i = 0; i < length; i++) { if (i < otherLength) { int difference = getWordID(i) - other.getWordID(i); if (difference != 0) return difference; } else { //same but other is shorter, so we are after return 1; } } if (length < otherLength) { return -1; } else { return 0; } } /** * Returns a string representation of the phrase. * * @return a space-delimited string of the words in the * phrase. */ public String toString() { SymbolTable vocab = getVocab(); StringBuffer buf = new StringBuffer(); for (int i=0; i<size(); i++) { String word = vocab.getWord(getWordID(i)); if (i != 0) buf.append(' '); buf.append(word); } return buf.toString(); } }