/*
* Copyright 2011 Greg Milette and Adam Stroud
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package root.gast.speech.text.match;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
//Note: org.tartarus is part of the lucene contrib project
import org.tartarus.snowball.ext.EnglishStemmer;
/**
* encode strings using a stemmer
* @author Greg Milette <<a href="mailto:gregorym@gmail.com">gregorym@gmail.com</a>>
*
*/
public class StemmedWordMatcher extends WordMatcher
{
public StemmedWordMatcher(String... wordsIn)
{
this(Arrays.asList(wordsIn));
}
public StemmedWordMatcher(List<String> wordsIn)
{
super(encode(wordsIn));
}
private static List<String> encode(List<String> input)
{
List<String> encoded = new ArrayList<String>();
for (String in : input)
{
encoded.add(stem(in));
}
return encoded;
}
@Override
public boolean isIn(String word)
{
return super.isIn(stem(word));
}
/**
* run the stemmer from Lucene
*/
private static String stem(String word)
{
EnglishStemmer stemmer = new EnglishStemmer();
stemmer.setCurrent(word);
boolean result = stemmer.stem();
if (!result)
{
return word;
}
return stemmer.getCurrent();
}
}