/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.linguist;
import static java.lang.Math.min;
import java.util.*;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Word;
/**
* This class can be used to keep track of a word sequence. This class is an
* immutable class. It can never be modified once it is created (except,
* perhaps for transient, cached things such as a precalculated hashcode).
*/
public final class WordSequence implements Comparable<WordSequence> {
/**
* Comparator that compares two sequences by their oldest part.
*/
public final static Comparator<WordSequence> OLDEST_COMPARATOR =
new Comparator<WordSequence>() {
public int compare(WordSequence o1, WordSequence o2) {
return o1.getOldest().compareTo(o2.getOldest());
}
};
/** an empty word sequence, that is, it has no words. */
public final static WordSequence EMPTY = new WordSequence(0);
public static WordSequence asWordSequence(final Dictionary dictionary,
String... words) {
Word[] dictWords = new Word[words.length];
for (int i = 0; i < words.length; i++) {
dictWords[i] = dictionary.getWord(words[i]);
}
return new WordSequence(dictWords);
}
private final Word[] words;
private transient int hashCode = -1;
/**
* Constructs a word sequence with the given depth.
*
* @param size the maximum depth of the word history
*/
private WordSequence(int size) {
words = new Word[size];
}
/**
* Constructs a word sequence with the given word IDs
*
* @param words the word IDs of the word sequence
*/
public WordSequence(Word... words) {
this(Arrays.asList(words));
}
/**
* Constructs a word sequence from the list of words
*
* @param list the list of words
*/
public WordSequence(List<Word> list) {
this.words = list.toArray(new Word[list.size()]);
check();
}
private void check() {
for (Word word : words)
if (word == null)
throw new Error("WordSequence should not have null Words.");
}
/**
* Returns a new word sequence with the given word added to the sequence
*
* @param word the word to add to the sequence
* @param maxSize the maximum size of the generated sequence
* @return a new word sequence with the word added (but trimmed to
* maxSize).
*/
public WordSequence addWord(Word word, int maxSize) {
if (maxSize <= 0) {
return EMPTY;
}
int nextSize = ((size() + 1) > maxSize) ? maxSize : (size() + 1);
WordSequence next = new WordSequence(nextSize);
int nextIndex = nextSize - 1;
int thisIndex = size() - 1;
next.words[nextIndex--] = word;
while (nextIndex >= 0 && thisIndex >= 0) {
next.words[nextIndex--] = this.words[thisIndex--];
}
next.check();
return next;
}
/**
* Returns the oldest words in the sequence (the newest word is omitted)
*
* @return the oldest words in the sequence, with the newest word omitted
*/
public WordSequence getOldest() {
WordSequence next = EMPTY;
if (size() >= 1) {
next = new WordSequence(words.length - 1);
System.arraycopy(this.words, 0, next.words, 0, next.words.length);
}
return next;
}
/**
* Returns the newest words in the sequence (the old word is omitted)
*
* @return the newest words in the sequence with the oldest word omitted
*/
public WordSequence getNewest() {
WordSequence next = EMPTY;
if (size() >= 1) {
next = new WordSequence(words.length - 1);
System.arraycopy(this.words, 1, next.words, 0, next.words.length);
}
return next;
}
/**
* Returns a word sequence that is no longer than the given size, that is
* filled in with the newest words from this sequence
*
* @param maxSize the maximum size of the sequence
* @return a new word sequence, trimmed to maxSize.
*/
public WordSequence trim(int maxSize) {
if (maxSize <= 0 || size() == 0) {
return EMPTY;
} else if (maxSize == size()) {
return this;
} else {
if (maxSize > size()) {
maxSize = size();
}
WordSequence next = new WordSequence(maxSize);
int thisIndex = words.length - 1;
int nextIndex = next.words.length - 1;
for (int i = 0; i < maxSize; i++) {
next.words[nextIndex--] = this.words[thisIndex--];
}
return next;
}
}
/**
* Returns the n-th word in this sequence
*
* @param n which word to return
* @return the n-th word in this sequence
*/
public Word getWord(int n) {
assert n < words.length;
return words[n];
}
/**
* Returns the number of words in this sequence
*
* @return the number of words
*/
public int size() {
return words.length;
}
/**
* Returns a string representation of this word sequence. The format is:
* [ID_0][ID_1][ID_2].
*
* @return the string
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Word word : words)
sb.append('[').append(word).append(']');
return sb.toString();
}
/**
* Calculates the hashcode for this object
*
* @return a hashcode for this object
*/
@Override
public int hashCode() {
if (hashCode == -1) {
int code = 123;
for (int i = 0; i < words.length; i++) {
code += words[i].hashCode() * (2 * i + 1);
}
hashCode = code;
}
return hashCode;
}
/**
* compares the given object to see if it is identical to this WordSequence
*
* @param object the object to compare this to
* @return true if the given object is equal to this object
*/
@Override
public boolean equals(Object object) {
if (this == object)
return true;
if (!(object instanceof WordSequence))
return false;
return Arrays.equals(words, ((WordSequence) object).words);
}
/**
* @param startIndex start index
* @param stopIndex stop index
* @return a subsequence with both <code>startIndex</code> and
* <code>stopIndex</code> exclusive.
*/
public WordSequence getSubSequence(int startIndex, int stopIndex) {
List<Word> subseqWords = new ArrayList<Word>();
for (int i = startIndex; i < stopIndex; i++) {
subseqWords.add(getWord(i));
}
return new WordSequence(subseqWords);
}
/**
* @return the words of the <code>WordSequence</code>.
*/
public Word[] getWords() {
return getSubSequence(0, size()).words; // create a copy to keep the
// class immutable
}
public int compareTo(WordSequence other) {
int n = min(words.length, other.words.length);
for (int i = 0; i < n; ++i) {
if (!words[i].equals(other.words[i])) {
return words[i].compareTo(other.words[i]);
}
}
return words.length - other.words.length;
}
}