package edu.berkeley.nlp.lm;
import edu.berkeley.nlp.lm.collections.Indexer;
/**
* Implementation of a WordIndexer in which words are represented as strings.
*
* @author adampauls
*
*/
public class StringWordIndexer implements WordIndexer<String>
{
/**
*
*/
private static final long serialVersionUID = 1L;
private final Indexer<String> sparseIndexer;
private String startSymbol;
private String endSymbol;
private String unkSymbol;
private int unkIndex = -1;
public StringWordIndexer() {
sparseIndexer = new Indexer<String>();
}
@Override
public int getOrAddIndex(final String word) {
return sparseIndexer.getIndex(word);
}
@Override
public String getWord(final int index) {
return sparseIndexer.getObject(index);
}
@Override
public int numWords() {
return sparseIndexer.size();
}
@Override
public String getStartSymbol() {
return startSymbol;
}
@Override
public String getEndSymbol() {
return endSymbol;
}
@Override
public String getUnkSymbol() {
return unkSymbol;
}
@Override
public int getOrAddIndexFromString(final String word) {
return getOrAddIndex(word);
}
@Override
public void setStartSymbol(final String sym) {
startSymbol = sym;
sparseIndexer.add(sym);
}
@Override
public void setEndSymbol(final String sym) {
endSymbol = sym;
sparseIndexer.add(sym);
}
@Override
public void setUnkSymbol(final String sym) {
unkSymbol = sym;
unkIndex = sparseIndexer.getIndex(sym);
}
@Override
public void trimAndLock() {
sparseIndexer.trim();
sparseIndexer.lock();
}
@Override
public int getIndexPossiblyUnk(final String word) {
final int id = sparseIndexer.indexOf(word);
return id < 0 ? unkIndex : id;
}
}