package org.xbib.elasticsearch.common.fsa; import org.junit.Test; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; /** */ public class FsaCompilerTool { public void buildLemmatizeFSA() throws IOException { String[] langs = { "ast", "bg", "cs", "cy", "de", "en", "es", "et", "fa", "fr", "ga", "gd", "gl", "gv", "hu", "it", "pl", "pt", "ro", "sk", "sl", "sv", "uk" }; for (String lang : langs) { Dictionary dictionary = new Dictionary(); String resource = "/lemmatize/lemmatization-" + lang + ".txt"; Path path = Paths.get("build/lemmatization-" + lang + ".fsa"); try (DataOutputStream dataOutputStream = new DataOutputStream(Files.newOutputStream(path)); Reader reader = new InputStreamReader(getClass().getResourceAsStream(resource), StandardCharsets.UTF_8)) { dictionary.loadLinesReverse(reader); dictionary.fsa().write(dataOutputStream); } } } }