package org.basex.util.ft;
import static org.basex.util.Token.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.URL;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import org.basex.util.Reflect;
/**
* Stemmer implementation using the WordNet stemmer.
* The WordNet stemmer is developed by George A. Miller and is based on
* the WordNet 3.0 License: {@code http://wordnet.princeton.edu/}.
*
* @author BaseX Team 2005-12, BSD License
* @author Dimitar Popov
*/
final class WordnetStemmer extends Stemmer {
/** Name of the package of the WordNet stemmer. */
private static final String PATTERN = "edu.mit.jwi.%";
/** Path to the WordNet dictionary files. */
private static final String PATH = "etc/wndict";
/** WordnetStemmer class. */
private static final Constructor<?> CTR;
/** WordnetStemmer.findStems method. */
private static final Method FIND_STEMS;
/** WordNet dictionary instance. */
private static final Object DICT;
static {
// don't try to find the other classes if Dictionary is not found:
if(Reflect.available(PATTERN, "Dictionary")) {
FIND_STEMS = null;
CTR = null;
DICT = null;
} else {
final Class<?> dict = Reflect.find(PATTERN, "Dictionary");
final Class<?> wn = Reflect.find(PATTERN, "morph.WordnetStemmer");
CTR = Reflect.find(wn, Reflect.find(PATTERN, "IDictionary"));
FIND_STEMS = Reflect.method(wn, "findStems", String.class);
DICT = newDict(dict);
}
}
/**
* Create new instance of the WordNet dictionary.
* @param dct dictionary class
* @return new instance of the WordNet dictionary
*/
private static Object newDict(final Class<?> dct) {
try {
final Constructor<?> ctr = Reflect.find(dct, URL.class);
final Object dict = Reflect.get(ctr, new URL("file", null, PATH));
return Reflect.invoke(Reflect.method(dct, "open"), dict);
} catch(final Exception ex) {
return null;
}
}
/**
* Checks if the library is available.
* @return result of check
*/
static boolean available() {
return DICT != null;
}
/** Instance of WordNet stemmer. */
private Object stemmer;
/** Empty constructor. */
WordnetStemmer() {
}
/**
* Constructs a WordNet stemmer. Call {@link #available()} first to
* check if the library is available.
* @param fti full-text iterator
*/
private WordnetStemmer(final FTIterator fti) {
super(fti);
stemmer = Reflect.get(CTR, DICT);
}
@Override
Stemmer get(final Language l, final FTIterator fti) {
return new WordnetStemmer(fti);
}
@Override
public boolean supports(final Language lang) {
return lang.equals(Language.get("en"));
}
@Override
protected byte prec() {
return 30;
}
@Override
Collection<Language> languages() {
final HashSet<Language> ln = new HashSet<Language>();
ln.add(Language.get("en"));
return ln;
}
@Override
protected byte[] stem(final byte[] word) {
@SuppressWarnings("unchecked")
final List<String> l = (List<String>)
Reflect.invoke(FIND_STEMS, stemmer, string(word));
final byte[] result = l.isEmpty() ? word : token(l.get(0));
return result.length == 0 ? word : result;
}
}