package org.gbif.checklistbank.lucene;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
/**
*
*/
public class ScientificNameAnalyzerTest {
ScientificNameAnalyzer analyzer = new ScientificNameAnalyzer();
@Test
public void testAnalyzer() throws Exception {
assertAnalyzer(null, "");
assertAnalyzer("abies", "Abies ");
assertAnalyzer("abies", "Abiies ");
assertAnalyzer("abyes", "Abyes ");
assertAnalyzer("abyes alba", "Abyes albus");
assertAnalyzer("abyes albieta", "Abyes albieta");
assertAnalyzer("abies albieta", "Abies albijeta");
assertAnalyzer("abies albieta", "Abies albyeta");
assertAnalyzer("abies alba", " \txAbies × ållbbus\t");
assertAnalyzer("abies alba", " \txAbies × ållbbus\t");
assertAnalyzer("rhachis taktos", "Rhachis taktos");
assertAnalyzer("hieracium sabauda", "Hieracium sabaudum");
assertAnalyzer("hieracium scorzoneraefolia", "Hieracium scorzoneræfolium");
assertAnalyzer("hieracium scorzonerifolia", "Hieracium scorzonerifolium");
assertAnalyzer("macrozamia platirachis", "Macrozamia platyrachis");
assertAnalyzer("macrozamia platirachis", "Macrozamia platyrhachis");
assertAnalyzer("cycas circinalis", "Cycas circinalis");
assertAnalyzer("cycas circinalis", "Cycas circinnalis");
assertAnalyzer("isolona perieri", "Isolona perrieri");
assertAnalyzer("isolona perieri", "Isolona perrierii");
assertAnalyzer("carex caioueti", "Carex ×cayouettei");
assertAnalyzer("platanus hispanica", "Platanus x hispanica");
assertAnalyzer("eragrostis browni", "Eragrostis brownii");
assertAnalyzer("eragrostis browni", "Eragrostis brownei");
assertAnalyzer("eragrostis browni", "Eragrostis brownei");
}
void assertAnalyzer(String expected, String name) throws Exception {
// use lucene analyzer to normalize input without using the full query parser
StringBuilder sb = new StringBuilder();
try (TokenStream stream = analyzer.tokenStream("anything", new StringReader(name))) {
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
sb.append(termAtt.toString());
}
stream.end();
}
assertEquals(expected, expected);
}
}