package org.apache.solr.analysis.author;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
public class TestAuthorTransliterationFilter extends BaseTokenStreamTestCase {
final class TestFilter extends TokenFilter {
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public TestFilter(TokenStream input) {
super(input);
}
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
typeAtt.setType(AuthorUtils.AUTHOR_INPUT);
return true;
}
}
public void testAuthorSynonyms() throws Exception {
Reader reader = new StringReader("Müller, Bill");
Tokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(reader);
AuthorTransliterationFactory factory = new AuthorTransliterationFactory(new HashMap<String,String>());
TokenStream stream = factory.create(new TestFilter(tokenizer));
String[] expected = { "Müller, Bill", "Mueller, Bill", "Muller, Bill" };
assertTokenStreamContents(stream, expected);
}
public void testAccents() throws Exception {
Reader reader = new StringReader("Jeřábková, Tereza");
Tokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(reader);
AuthorTransliterationFactory factory = new AuthorTransliterationFactory(new HashMap<String,String>());
TokenStream stream = factory.create(new TestFilter(tokenizer));
String[] expected = { "Jeřábková, Tereza", "Jerhaebkovae, Tereza", "Jerabkova, Tereza"};
assertTokenStreamContents(stream, expected);
}
}