/*
* #!
* Ontopia Classify
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.topicmaps.classify;
import java.util.HashMap;
import java.util.Map;
import net.ontopia.utils.OntopiaRuntimeException;
import org.tartarus.snowball.SnowballProgram;
/**
* INTERNAL:
*/
public class SnowballStemmer implements TermStemmerIF {
private static Map<String, String> languages;
static {
languages = new HashMap<String, String>();
languages.put("dk", "org.tartarus.snowball.ext.DanishStemmer");
languages.put("nl", "org.tartarus.snowball.ext.DutchStemmer");
languages.put("en", "org.tartarus.snowball.ext.EnglishStemmer");
languages.put("fi", "org.tartarus.snowball.ext.FinnishStemmer");
languages.put("fr", "org.tartarus.snowball.ext.FrenchStemmer");
languages.put("de2", "org.tartarus.snowball.ext.German2Stemmer");
languages.put("de", "org.tartarus.snowball.ext.GermanStemmer");
languages.put("it", "org.tartarus.snowball.ext.ItalianStemmer");
languages.put("kp", "org.tartarus.snowball.ext.KpStemmer");
languages.put("lovins", "org.tartarus.snowball.ext.LovinsStemmer");
languages.put("no", "org.tartarus.snowball.ext.NorwegianStemmer");
languages.put("porter", "org.tartarus.snowball.ext.PorterStemmer");
languages.put("pt", "org.tartarus.snowball.ext.PortugueseStemmer");
languages.put("ru", "org.tartarus.snowball.ext.RussianStemmer");
languages.put("es", "org.tartarus.snowball.ext.SpanishStemmer");
languages.put("se", "org.tartarus.snowball.ext.SwedishStemmer");
}
protected SnowballProgram stemmer;
public SnowballStemmer(String lang) {
String stemClassName = languages.get(lang);
if (stemClassName == null)
throw new OntopiaRuntimeException("Unknown language: '" + lang + "'");
try {
@SuppressWarnings("unchecked")
Class<SnowballProgram> stemClass = (Class<SnowballProgram>) Class.forName(stemClassName);
this.stemmer = stemClass.newInstance();
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
public String stem(String term) {
stemmer.setCurrent(term);
stemmer.stem();
// lower-case stem
String stem = stemmer.getCurrent();
return (stem == null ? null : stem.toLowerCase());
}
}