package org.basex.util.ft;
import java.util.Collections;
import java.util.LinkedList;
import org.basex.util.Util;
/**
* Implementation of common stemmer methods.
*
* @author BaseX Team 2005-12, BSD License
* @author Dimitar Popov
*/
public abstract class Stemmer extends LanguageImpl {
/** List of available stemmers. */
static final LinkedList<Stemmer> IMPL = new LinkedList<Stemmer>();
/** Load stemmers and order them by precedence. */
static {
// built-in stemmers
IMPL.add(new EnglishStemmer(null));
IMPL.add(new GermanStemmer(null));
IMPL.add(new DummyStemmer(null));
if(SnowballStemmer.available()) IMPL.add(new SnowballStemmer());
if(LuceneStemmer.available()) IMPL.add(new LuceneStemmer());
if(WordnetStemmer.available()) IMPL.add(new WordnetStemmer());
// sort stemmers and tokenizers by precedence
Collections.sort(IMPL);
}
/** Full-text iterator. */
private final FTIterator iter;
/**
* Constructor.
*/
Stemmer() {
this(null);
}
/**
* Constructor.
* @param ft full-text iterator.
*/
Stemmer(final FTIterator ft) {
iter = ft;
}
/**
* Checks if the language is supported by the available stemmers.
* @param l language to be found
* @return result of check
*/
public static boolean supportFor(final Language l) {
for(final Stemmer s : IMPL) if(s.supports(l)) return true;
return false;
}
/**
* Factory method.
* @param l language
* @param fti full-text iterator
* @return stemmer
*/
abstract Stemmer get(final Language l, final FTIterator fti);
/**
* Stems a word.
* @param word input word to stem
* @return the stem of the word
*/
protected abstract byte[] stem(final byte[] word);
@Override
public final Stemmer init(final byte[] txt) {
iter.init(txt);
return this;
}
@Override
public final boolean hasNext() {
return iter.hasNext();
}
@Override
public final FTSpan next() {
final FTSpan s = iter.next();
s.text = stem(s.text);
return s;
}
@Override
public final byte[] nextToken() {
return stem(iter.nextToken());
}
@Override
public String toString() {
return Util.name(this).replace("Stemmer", "");
}
}