/*
* Copyright 2014-15 Skynav, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY SKYNAV, INC. AND ITS CONTRIBUTORS “AS IS” AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL SKYNAV, INC. OR ITS CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.skynav.ttpe.text;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Map;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.skynav.ttv.util.IOUtil;
import com.skynav.ttv.util.Reporter;
public class LineBreaker {
private static final String DEFAULT_INPUT_ENCODING = "utf-8";
private static Charset defaultInputEncoding;
static {
try {
defaultInputEncoding = Charset.forName(DEFAULT_INPUT_ENCODING);
} catch (RuntimeException e) {
defaultInputEncoding = Charset.defaultCharset();
}
}
public static final String RULES_SOURCE_EXT = "txt";
public static final String RULES_BINARY_EXT = "dat";
private static String[][] rulesFileNames = new String[][] {
{ "uax14", "icu-brkiter-uax14" },
{ "scalar", null },
};
private static Map<String,String> rulesFileNameMap;
private static Map<String,LineBreaker> breakers;
static {
rulesFileNameMap = new java.util.HashMap<String,String>();
for (String[] entry : rulesFileNames) {
rulesFileNameMap.put(entry[0], entry[1]);
}
breakers = new java.util.HashMap<String,LineBreaker>();
}
private String name;
private LineBreakIterator iterator;
private LineBreaker(String name) {
this.name = name;
}
public String getName() {
return name;
}
public LineBreakIterator getIterator(Reporter reporter) {
return maybeLoad(reporter);
}
public void clear() {
iterator = null;
}
private LineBreakIterator maybeLoad(Reporter reporter) {
LineBreakIterator iterator = this.iterator;
if (iterator != null)
return iterator;
else {
BreakIterator bi = null;
InputStream is = null;
try {
URL rulesLocator = getRulesLocator(name, RULES_BINARY_EXT);
if (rulesLocator != null) {
is = rulesLocator.openStream();
bi = RuleBasedBreakIterator.getInstanceFromCompiledRules(is);
reporter.logInfo(reporter.message("*KEY*", "Loaded rules based break iterator from ''{0}''.", rulesLocator.toString()));
} else
bi = BreakIterator.getCharacterInstance();
} catch (IOException e) {
} finally {
IOUtil.closeSafely(is);
}
if (bi != null) {
return this.iterator = new LineBreakIterator(bi);
} else
return null;
}
}
private URL getRulesLocator(String name, String extension) {
String rulesFileName = rulesFileNameMap.get(name);
if (rulesFileName != null)
return getClass().getResource(rulesFileName + "." + extension);
else
return null;
}
public static LineBreaker getInstance(String name) {
if (name == null)
name = "";
LineBreaker lb = breakers.get(name);
if (lb != null)
return lb;
lb = new LineBreaker(name);
breakers.put(name, lb);
return lb;
}
public static void main(String[] args) {
if (args.length == 2) {
String inputFilePath = args[0];
String outputFilePath = args[1];
InputStream is = null;
OutputStream os = null;
BufferedReader r = null;
try {
is = new FileInputStream(inputFilePath);
os = new FileOutputStream(outputFilePath);
r = new BufferedReader(new InputStreamReader(is, defaultInputEncoding));
StringBuffer rules = new StringBuffer();
String line;
while ((line = r.readLine()) != null) {
rules.append(line);
rules.append('\n');
}
RuleBasedBreakIterator.compileRules(rules.toString(), os);
} catch (IOException e) {
} finally {
IOUtil.closeSafely(r);
IOUtil.closeSafely(os);
IOUtil.closeSafely(is);
}
} else {
System.err.println("Usage: java -cp ... com.skynav.ttpe.text.LineBreaker [INPUT-FILE-PATH] [OUTPUT-FILE-PATH]");
}
}
}