package com.formulasearchengine.mathosphere.mlp.text;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.CharUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Map;
public class UnicodeMap {
private static Map<Integer, String> MAP = null;
private static void buildMap() {
// see table here
// http://unicode-table.com/en/blocks/letterlike-symbols/
ImmutableMap.Builder<Integer, String> unicode2tex = ImmutableMap.builder();
try {
Reader in = new InputStreamReader(UnicodeMap.class.getClassLoader().getResourceAsStream("unicode2tex.csv"));
Iterable<CSVRecord> records = CSVFormat.RFC4180.withHeader().parse(in);
for (CSVRecord record : records) {
String sUni = record.get("unicode");
String sTex = record.get("latex");
unicode2tex.put(Integer.parseInt(sUni, 16), sTex);
}
} catch (java.io.IOException e) {
LOGGER.error("unicode2tex-problem");
e.printStackTrace();
}
MAP = unicode2tex.build();
}
private static final Logger LOGGER = LoggerFactory.getLogger(UnicodeMap.class);
public static String string2TeX(String in) {
int[] chars = in.codePoints().toArray();
StringBuilder res = new StringBuilder();
for (int code : chars) {
res.append(char2TeX(code));
}
String s = res.toString().trim();
if (chars.length == 1) {
s = s.replaceAll("^\\{(.*)\\}$", "$1");
}
return s;
}
public static String char2TeX(int codePoint) {
if ( MAP == null){
buildMap();
}
if (CharUtils.isAsciiPrintable((char) codePoint)) {
return CharUtils.toString((char) codePoint);
}
final String tex = MAP.get(codePoint);
if (tex != null) {
if (tex.endsWith("}") || tex.length() == 1) {
return tex;
}
return "{" + tex + "}";
}
LOGGER.debug("invalid char", codePoint);
return "";
}
}