/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus;
import java.util.*;
import joshua.corpus.vocab.SymbolTable;
/**
* ContiguousPhrase implements the Phrase interface by linking into
* indices within a corpus. This is intended to be a very low-memory
* implementation of the class.
*
* @author Chris Callison-Burch
* @since 29 May 2008
* @version $LastChangedDate:2008-09-18 12:47:23 -0500 (Thu, 18 Sep 2008) $
*/
public class ContiguousPhrase extends AbstractPhrase {
//===============================================================
// Constants
//===============================================================
//===============================================================
// Member variables
//===============================================================
protected int startIndex;
protected int endIndex;
protected Corpus corpusArray;
//===============================================================
// Constructor(s)
//===============================================================
public ContiguousPhrase(int startIndex, int endIndex, Corpus corpusArray) {
this.startIndex = startIndex;
this.endIndex = endIndex;
this.corpusArray = corpusArray;
}
//===============================================================
// Public
//===============================================================
//===========================================================
// Accessor methods (set/get)
//===========================================================
/**
* @return the vocabulary that the words in this phrase are
* drawn from.
*/
public SymbolTable getVocab() {
return corpusArray.getVocabulary();
}
/**
* This method copies the phrase into an array of ints.
* This method should be avoided if possible.
*
* @return an int[] corresponding to the ID of each word
* in the phrase
*/
public int[] getWordIDs() {
int[] words = new int[endIndex-startIndex];
for (int i = startIndex; i < endIndex; i++) {
words[i-startIndex] = corpusArray.getWordID(i); //corpusArray.corpus[i];
}
return words;
}
public int getWordID(int position) {
return corpusArray.getWordID(startIndex+position);
// return corpusArray.corpus[startIndex+position];
}
public int size() {
return endIndex-startIndex;
}
//===========================================================
// Methods
//===========================================================
/**
* Gets all possible subphrases of this phrase, up to and
* including the phrase itself. For example, the phrase "I
* like cheese ." would return the following:
* <ul>
* <li>I
* <li>like
* <li>cheese
* <li>.
* <li>I like
* <li>like cheese
* <li>cheese .
* <li>I like cheese
* <li>like cheese .
* <li>I like cheese .
* </ul>
*
* @return ArrayList of all possible subphrases.
*/
public List<Phrase> getSubPhrases() {
return getSubPhrases(size());
}
/**
* Returns a list of subphrases only of length
* <code>maxLength</code> or smaller.
*
* @param maxLength the maximum length phrase to return.
* @return ArrayList of all possible subphrases of length
* maxLength or less
* @see #getSubPhrases()
*/
public List<Phrase> getSubPhrases(int maxLength) {
if (maxLength > size()) return getSubPhrases(size());
List<Phrase> phrases=new ArrayList<Phrase>();
for (int i = 0; i < size(); i++) {
for (int j=i+1; (j <= size()) && (j-i <= maxLength); j++) {
Phrase subPhrase = subPhrase(i,j);
phrases.add(subPhrase);
}
}
return phrases;
}
/**
* creates a new phrase object from the indexes provided.
* <P>
* NOTE: subList merely creates a "view" of the existing
* Phrase object. Memory taken up by other Words in the
* Phrase is not freed since the underlying subList object
* still points to the complete Phrase List.
*
* @see ArrayList#subList(int, int)
*/
public Phrase subPhrase(int start, int end) {
return new ContiguousPhrase(startIndex+start, startIndex+end, corpusArray);
}
//===============================================================
// Protected
//===============================================================
//===============================================================
// Methods
//===============================================================
//===============================================================
// Private
//===============================================================
//===============================================================
// Methods
//===============================================================
//===============================================================
// Static
//===============================================================
//===============================================================
// Main
//===============================================================
/**
* Main contains test code
*/
public static void main(String[] args) {
}
}