package org.xbib.elasticsearch.index.analysis.icu.tools;
import com.ibm.icu.text.RuleBasedBreakIterator;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* Utility to convert RuleBasedBreakIterator (.rbbi) files into binary compiled form (.brk).
*/
public class RBBIRuleCompiler {
private static final Logger logger = LogManager.getLogger(RBBIRuleCompiler.class.getName());
public void compile(Path inputPath, Path outputPath) throws IOException {
String rules = getRules(inputPath);
try (OutputStream os = Files.newOutputStream(outputPath)) {
new RuleBasedBreakIterator(rules);
RuleBasedBreakIterator.compileRules(rules, os);
} catch (IllegalArgumentException e) {
logger.error(e.getMessage(), e);
}
}
private String getRules(Path rulePath) throws IOException {
StringBuilder rules = new StringBuilder();
try (InputStream inputStream = Files.newInputStream(rulePath);
BufferedReader cin =
new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String line;
while ((line = cin.readLine()) != null) {
if (!line.startsWith("#")) {
rules.append(line);
rules.append('\n');
}
}
}
return rules.toString();
}
}