package org.xbib.elasticsearch.index.analysis.decompound.fst;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.common.settings.Settings;
import org.junit.Test;
import org.xbib.elasticsearch.MapperTestUtils;
import org.xbib.elasticsearch.index.analysis.BaseTokenStreamTest;
import java.io.IOException;
/**
*
*/
public class FstDecompoundTokenFilterTests extends BaseTokenStreamTest {
@Test
public void test() throws IOException {
String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";
String[] expected = {
"Die",
"Jahresfeier",
"jahres",
"feier",
"jahre",
"jahr",
"der",
"Rechtsanwaltskanzleien",
"rechts",
"anwalts",
"kanzleien",
"recht",
"anwalt",
"auf",
"dem",
"Donaudampfschiff",
"donau",
"dampf",
"schiff",
"hat",
"viel",
"Ökosteuer",
"ökos",
"teuer",
"gekostet"
};
Settings settings = Settings.builder()
.put("index.analysis.analyzer.myanalyzer.type", "custom")
.put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
.put("index.analysis.analyzer.myanalyzer.filter.0", "fst_decompound")
.put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
.build();
Analyzer myanalyzer = MapperTestUtils.analyzer(settings, "myanalyzer");
assertAnalyzesTo(myanalyzer, source, expected);
}
}