package com.transmem.nlp;
import java.util.Hashtable;
/**
* A stemmer for the English language words.
*
* @author Ted Wen
* @version 0.1
* @update 15/Jan/2007
*/
public class EnglishStemmer implements IStemmer
{
private boolean useIrregulars_;
private Hashtable<String,String> irregulars;
public EnglishStemmer()
{
loadIrregulars();
}
/**
* Always returns true for EnglishStemmer
*/
public boolean applies()
{
return true;
}
/**
* Set or reset use_irregular words switch.
*/
public void useIrregularTable(boolean set)
{
this.useIrregulars_ = set;
}
/**
* Makes the stem of a word. If it's a irregular word, then get the stem from a hashtable.
* If word is null or empty, return "".
*/
public String stem(String word)
{
if (word != null && word.length() > 0)
{
word = word.toLowerCase();
String s = null;
if (this.useIrregulars_)
{
s = this.irregulars.get(word);
if (s != null)
{
return s;
}
}
s = PorterStemmer.stem(word);
return s;
}
else
{
return "";
}
}
/**
* Load irregular words into hashtable for fast query.
*/
private void loadIrregulars()
{
String[] irregular_words = {
"took","take",
"taken","take",
};
int n = irregular_words.length / 2;
this.irregulars = new Hashtable<String,String>(n);
for (int i=0; i<n; i+=2)
{
this.irregulars.put(irregular_words[i],irregular_words[i+1]);
}
}
}