package com.formulasearchengine.mathosphere.mlp.text;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class UnicodeUtilsTest {
private static final Logger LOGGER = LoggerFactory.getLogger(UnicodeUtilsTest.class);
String capitals = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
String smalls = "abcdefghijklmnopqrstuvwxyz";
String greekCapitals = "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡϴΣΤΥΦΧΨΩ\u2207";
String greekSmalls = "αβγδεζηθικλμνξοπρςστυφχψω∂ϵϑϰϕϱϖ";
String digits = "0123456789";
@Test
public void normalizeString_boldA() {
String boldA = "\uD835\uDC00";
String result = UnicodeUtils.normalizeString(boldA);
assertEquals("A", result);
}
@Test
public void normalizeString_mathLatinLetters() throws Exception {
List<String> lines = readTestData();
for (int i = 0; i < 26; i++) {
String expected = i % 2 == 0 ? capitals : smalls;
String mathString = lines.get(i);
LOGGER.info("test data {}", mathString);
String result = UnicodeUtils.normalizeString(mathString);
assertEquals(expected, result);
}
}
@Test
public void normalizeString_greekLetters() throws Exception {
List<String> lines = readTestData();
for (int i = 26; i < 36; i++) {
String expected = i % 2 == 0 ? greekCapitals : greekSmalls;
String mathString = lines.get(i);
LOGGER.info("test data {}", mathString);
String result = UnicodeUtils.normalizeString(mathString);
assertEquals(expected, result);
}
}
@Test
public void normalizeString_digits() throws Exception {
List<String> lines = readTestData();
for (int i = 36; i < 41; i++) {
String mathString = lines.get(i);
LOGGER.info("test data {}", mathString);
String result = UnicodeUtils.normalizeString(mathString);
assertEquals(digits, result);
}
}
@Test
public void normalize_letterLikeSymbols() throws Exception {
List<String> lines = readTestData("unicode2.txt");
for (String line : lines) {
String[] split = line.split(" ");
String expected = split[0];
String actual = UnicodeUtils.normalizeString(split[1]);
assertEquals(expected, actual);
}
}
private List<String> readTestData(String res) throws IOException {
InputStream is = UnicodeUtils.class.getResourceAsStream(res);
return IOUtils.readLines(is);
}
private List<String> readTestData() throws IOException {
return readTestData("unicode.txt");
}
}