package org.basex.util.ft;
import static org.basex.util.Token.*;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.HashMap;
import org.basex.util.Reflect;
import org.basex.util.Util;
/**
* Stemmer implementation using the Lucene stemmer contributions.
* The Lucene stemmers are based on the Apache License:
* {@code http://lucene.apache.org/}.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
final class LuceneStemmer extends Stemmer {
/** Name of the package with all Lucene stemmers. */
private static final String PATTERN = "org.apache.lucene.analysis.%Stemmer";
/** Stemmer classes which the Lucene library provides. */
private static final HashMap<Language, StemmerClass> CLASSES =
new HashMap<Language, StemmerClass>();
/** Stemmer class corresponding to the required properties. */
private StemmerClass clazz;
/** Stemmer instance. */
private Object stemmer;
static {
if(Reflect.available(PATTERN, "de.German")) {
add(Language.get("cs"), "cz.Czech");
add(Language.get("es"), "es.SpanishLight");
add(Language.get("fi"), "fi.FinnishLight");
add(Language.get("hu"), "hu.HungarianLight");
add(Language.get("it"), "it.ItalianLight");
add(Language.get("pt"), "br.Brazilian");
add(Language.get("sv"), "sv.SwedishLight");
add("ar", "bg", "de", "fr", "hi", "lv", "nl", "ru");
}
}
/**
* Check if a stemmer class is available, and add it the the list of stemmers.
* @param lang language
*/
private static void add(final String... lang) {
for(final String ln : lang) {
final Language l = Language.get(ln);
if(l != null) add(l, l.code() + '.' + l);
}
}
/**
* Check if a stemmer class is available, and add it the the list of stemmers.
* @param lang language
* @param name name of language
*/
private static void add(final Language lang, final String name) {
final Class<?> clz = Reflect.find(PATTERN, name);
if(clz == null) {
Util.debug("Could not initialize \"%\" Lucene stemmer class.", lang);
return;
}
final boolean ch;
Method m = Reflect.method(clz, "stem", String.class);
ch = m == null;
if(ch) m = Reflect.method(clz, "stem", char[].class, int.class);
if(m == null) {
Util.debug("Could not initialize \"%\" Lucene stemmer method.", lang);
} else {
CLASSES.put(lang, new StemmerClass(clz, m, ch));
}
}
/**
* Checks if the library is available.
* @return result of check
*/
static boolean available() {
return !CLASSES.isEmpty();
}
/** Empty constructor. */
LuceneStemmer() {
}
/**
* Constructs a stemmer instance. Call {@link #available()} first to
* check if the library is available.
* @param lang language of the text to stem
* @param fti full-text iterator
*/
private LuceneStemmer(final Language lang, final FTIterator fti) {
super(fti);
clazz = CLASSES.get(lang);
stemmer = Reflect.get(clazz.clz);
}
@Override
Collection<Language> languages() {
return CLASSES.keySet();
}
@Override
Stemmer get(final Language l, final FTIterator fti) {
return new LuceneStemmer(l, fti);
}
@Override
public boolean supports(final Language lang) {
return CLASSES.containsKey(lang);
}
@Override
protected byte prec() {
return 5;
}
@Override
protected byte[] stem(final byte[] word) {
String s = string(word);
if(clazz.chars) {
final char[] ch = s.toCharArray();
final int cl = s.length();
final int nl = (Integer) Reflect.invoke(clazz.stem, stemmer, ch, cl);
s = new String(ch, 0, nl);
} else {
s = (String) Reflect.invoke(clazz.stem, stemmer, s);
}
return s == null ? word : token(s);
}
/** Structure, containing stemming methods. */
private static class StemmerClass {
/** Class implementing the stemmer. */
final Class<?> clz;
/** Method {@code stem}. */
final Method stem;
/** String indicator. */
final boolean chars;
/**
* Constructor.
* @param sc class implementing the stemmer
* @param stm method {@code stem}
* @param ch indicator for stemming via character array
*/
StemmerClass(final Class<?> sc, final Method stm, final boolean ch) {
clz = sc;
stem = stm;
chars = ch;
stem.setAccessible(true);
}
}
}