import static org.junit.Assert.*;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.opensextant.util.TextUtils;
public class TestTextUtils {
private void print(String m) {
System.out.println(m);
}
@Test
public void testFormatting() {
assertTrue(TextUtils.countFormattingSpace("a\nb") == 1);
assertTrue(TextUtils.countFormattingSpace("a\n\u000Bb") == 2);
}
@Test
public void testTokens() {
print(StringUtils.join(TextUtils.tokensRight("\n "), ","));
}
@Test
public void testEOL() {
String buf = "\t ABC\r\n\r\n123 x y z ";
print(StringUtils.join(TextUtils.tokens(buf), ","));
print(StringUtils.join(TextUtils.tokensRight(buf), ","));
print(StringUtils.join(TextUtils.tokensLeft(buf), ","));
print(StringUtils.join(TextUtils.tokensRight(""), ","));
print(StringUtils.join(TextUtils.tokensRight("ABC_NO_EOL"), ","));
}
@Test
public void testRemoveSomeEmoticon() {
String result = TextUtils.removeEmoticons("😪😔😱😱😱");
System.out.println("Any emojis left? " + result);
// assertTrue( TextUtils.removeEmoticons("😪😔😱😱😱").length() == 0);
}
@Test
public void testRemoveLeft() {
int count = TextUtils.removeAnyLeft("-+*ABC", "-").length();
assertTrue(count == 5); // Trim
// left
count = TextUtils.removeAnyLeft("-+*ABC", "+-").length();
assertTrue(count == 4); // Trim left
count = TextUtils.removeAny("-+*ABC", "+ - * (^%").length();
assertTrue(count == 3); // Remove any chars from string. yields ABC
}
@Test
public void testScriptDetection() {
assertTrue(TextUtils.isLatin("Ö"));
assertTrue(TextUtils.isLatin("a Ö 5 !"));
assertTrue(!TextUtils.isLatin("a Ö 杨寨 5 !"));
String t = TextUtils.replaceDiacritics("a Ö ø Ø é å Å 杨寨 5 ! ē M ē ā");
if (!t.equals("a O o O e a A 杨寨 5 ! e M e a")) {
fail("Diacritics not replaced!");
}
assertTrue(!TextUtils.isASCII("xÖx"));
assertTrue(TextUtils.isLatin("O a b c d O"));
}
@Test
public void testLanguageCodes() {
assertTrue("Chinese".equals(TextUtils.getLanguage("chi").getName()));
assertTrue("French".equals(TextUtils.getLanguage("fre").getName()));
assertTrue("French".equals(TextUtils.getLanguage("fra").getName()));
assertTrue("French".equals(TextUtils.getLanguage("FRENCH").getName()));
}
@Test
public void testCase() {
String UPPER = "This IS MOSTLY 898 UPPER Case data $%%";
String LOWER = "This is mostly lower cased data çx®tÇ 512131";
/**
* UPPER CASE tests. Mostly upper case vs. all upper case.
*/
int[] checkCase = TextUtils.measureCase(UPPER);
if (checkCase != null) {
print("NOT uppercase\t" + UPPER);
assertTrue(TextUtils.isUpperCaseDocument(checkCase));
}
checkCase = TextUtils.measureCase(UPPER.toUpperCase());
if (checkCase != null) {
print("IS uppercase\t" + UPPER.toUpperCase());
assertTrue(TextUtils.isUpperCaseDocument(checkCase));
}
/**
* LOWER CASE tests. Mostly lower case vs. all lower case.
*/
checkCase = TextUtils.measureCase(LOWER);
if (checkCase != null) {
print("NOT lower\t" + LOWER);
assertFalse(TextUtils.isLowerCaseDocument(checkCase));
}
checkCase = TextUtils.measureCase(LOWER.toLowerCase());
if (checkCase != null) {
print("IS lower\t" + LOWER.toLowerCase());
assertTrue(TextUtils.isLowerCaseDocument(checkCase));
}
assertTrue(!TextUtils.isLower("Abc"));
assertTrue(TextUtils.isLower("abc"));
assertTrue(!TextUtils.isLower("a b c 9 1$% Ö"));
assertTrue(TextUtils.isLower("a b c 9 1$% ø"));
assertTrue(!TextUtils.isUpper("ABc"));
assertTrue(!TextUtils.isUpper("abc"));
assertTrue(TextUtils.isUpper("A B C 9 1$% Ö"));
assertTrue(!TextUtils.isUpper("A B C 9 1$% ø"));
String arabicText = "المناطق:";
assertTrue(!TextUtils.isUpper(arabicText));
String chineseText = "杨寨";
assertTrue(!TextUtils.isUpper(chineseText));
chineseText = "a 杨寨";
assertTrue(TextUtils.isLower(chineseText));
chineseText = "A 杨寨";
assertTrue(TextUtils.isUpper(chineseText));
String latinText = "ø Ø";
// Neither upper or lower. Mixed.
assertTrue(!TextUtils.isUpper(latinText) && !TextUtils.isLower(latinText));
assertTrue(TextUtils.isLower("øh baby") && TextUtils.isUpper("ØH BABY"));
}
}