/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2013 Alex Buloichik
2016 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.Locale;
import org.junit.Test;
/**
* Tests for (some) static utility methods.
*
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Aaron Madlon-Kay
*/
public class StringUtilTest {
@Test
public void testIsSubstringAfter() {
assertFalse(StringUtil.isSubstringAfter("123456", 5, "67"));
assertTrue(StringUtil.isSubstringAfter("123456", 5, "6"));
assertTrue(StringUtil.isSubstringAfter("123456", 4, "56"));
assertTrue(StringUtil.isSubstringAfter("123456", 0, "12"));
assertTrue(StringUtil.isSubstringAfter("123456", 1, "23"));
}
@Test
public void testIsTitleCase() {
assertFalse(StringUtil.isTitleCase("foobar"));
assertFalse(StringUtil.isTitleCase("fooBar"));
assertFalse(StringUtil.isTitleCase("f1obar"));
assertFalse(StringUtil.isTitleCase("FooBar"));
assertTrue(StringUtil.isTitleCase("Fo1bar"));
assertTrue(StringUtil.isTitleCase("Foobar"));
// LATIN CAPITAL LETTER L WITH SMALL LETTER J (U+01C8)
assertTrue(StringUtil.isTitleCase("\u01C8bcd"));
assertFalse(StringUtil.isTitleCase("a\u01C8bcd"));
// LATIN CAPITAL LETTER L WITH SMALL LETTER J (U+01C8)
assertTrue(StringUtil.isTitleCase("\u01c8"));
// LATIN CAPITAL LETTER LJ (U+01C7)
assertFalse(StringUtil.isTitleCase("\u01c7"));
// LATIN SMALL LETTER LJ (U+01C9)
assertFalse(StringUtil.isTitleCase("\u01c9"));
}
@Test
public void testIsSubstringBefore() {
assertFalse(StringUtil.isSubstringBefore("123456", 1, "01"));
assertTrue(StringUtil.isSubstringBefore("123456", 1, "1"));
assertTrue(StringUtil.isSubstringBefore("123456", 2, "12"));
assertTrue(StringUtil.isSubstringBefore("123456", 6, "56"));
assertTrue(StringUtil.isSubstringBefore("123456", 5, "45"));
}
@Test
public void testUnicodeNonBMP() {
// MATHEMATICAL BOLD CAPITAL A (U+1D400)
String test = "\uD835\uDC00";
assertTrue(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertTrue(StringUtil.isTitleCase(test));
// MATHEMATICAL BOLD CAPITAL A (U+1D400) x2
test = "\uD835\uDC00\uD835\uDC00";
assertTrue(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
// MATHEMATICAL BOLD SMALL A (U+1D41A)
test = "\uD835\uDC1A";
assertFalse(StringUtil.isUpperCase(test));
assertTrue(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
// MATHEMATICAL BOLD CAPITAL A + MATHEMATICAL BOLD SMALL A
test = "\uD835\uDC00\uD835\uDC1A";
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertTrue(StringUtil.isTitleCase(test));
// MATHEMATICAL BOLD SMALL A + MATHEMATICAL BOLD CAPITAL A
test = "\uD835\uDC1A\uD835\uDC00";
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
}
@Test
public void testAlphanumericStringCase() {
String test = "MQL5";
assertTrue(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
assertFalse(StringUtil.isMixedCase(test));
test = "mql5";
assertFalse(StringUtil.isUpperCase(test));
assertTrue(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
assertFalse(StringUtil.isMixedCase(test));
test = "Mql5";
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertTrue(StringUtil.isTitleCase(test));
assertFalse(StringUtil.isMixedCase(test));
test = "mQl5";
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
assertTrue(StringUtil.isMixedCase(test));
}
@Test
public void testEmptyStringCase() {
String test = null;
try {
assertFalse(StringUtil.isUpperCase(test));
fail("Should throw an NPE");
} catch (NullPointerException ex) {
// OK
}
try {
assertFalse(StringUtil.isLowerCase(test));
fail("Should throw an NPE");
} catch (NullPointerException ex) {
// OK
}
try {
assertFalse(StringUtil.isTitleCase(test));
fail("Should throw an NPE");
} catch (NullPointerException ex) {
// OK
}
try {
StringUtil.toTitleCase(test, Locale.ENGLISH);
fail("Should throw an NPE");
} catch (NullPointerException ex) {
// OK
}
test = "";
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isTitleCase(test));
assertEquals("", StringUtil.toTitleCase("", Locale.ENGLISH));
}
@Test
public void testIsWhiteSpace() {
try {
assertFalse(StringUtil.isWhiteSpace(null));
fail("Should throw an NPE");
} catch (NullPointerException ex) {
// OK
}
assertFalse(StringUtil.isWhiteSpace(""));
assertTrue(StringUtil.isWhiteSpace(" "));
assertFalse(StringUtil.isWhiteSpace(" a "));
// SPACE (U+0020) + IDEOGRAPHIC SPACE (U+3000)
assertTrue(StringUtil.isWhiteSpace(" \u3000"));
// We consider whitespace but Character.isWhiteSpace(int) doesn't:
// NO-BREAK SPACE (U+00A0) + FIGURE SPACE (U+2007) + NARROW NO-BREAK SPACE (U+202F)
assertTrue(StringUtil.isWhiteSpace("\u00a0\u2007\u202f"));
}
@Test
public void testIsMixedCase() {
assertTrue(StringUtil.isMixedCase("ABc"));
assertTrue(StringUtil.isMixedCase("aBc"));
// This is title case, not mixed:
assertFalse(StringUtil.isMixedCase("Abc"));
// Non-letter characters should not affect the result:
assertTrue(StringUtil.isMixedCase(" {ABc"));
}
@Test
public void testNonWordCase() {
String test = "{";
assertFalse(StringUtil.isLowerCase(test));
assertFalse(StringUtil.isUpperCase(test));
assertFalse(StringUtil.isTitleCase(test));
assertFalse(StringUtil.isMixedCase(test));
}
@Test
public void testToTitleCase() {
Locale locale = Locale.ENGLISH;
assertEquals("Abc", StringUtil.toTitleCase("abc", locale));
assertEquals("Abc", StringUtil.toTitleCase("ABC", locale));
assertEquals("Abc", StringUtil.toTitleCase("Abc", locale));
assertEquals("Abc", StringUtil.toTitleCase("abc", locale));
assertEquals("Abc", StringUtil.toTitleCase("aBC", locale));
assertEquals("A", StringUtil.toTitleCase("a", locale));
// LATIN SMALL LETTER NJ (U+01CC) -> LATIN CAPITAL LETTER N WITH SMALL LETTER J (U+01CB)
assertEquals("\u01CB", StringUtil.toTitleCase("\u01CC", locale));
// LATIN SMALL LETTER I (U+0069) -> LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) in Turkish
assertEquals("\u0130jk", StringUtil.toTitleCase("ijk", new Locale("tr")));
// Non-letters in front
assertEquals("'Good day, sir.'", StringUtil.toTitleCase("'GOOD DAY, SIR.'", locale));
// No letters at all
String test = "!@#$%^&*()-=\"\\";
assertEquals(test, StringUtil.toTitleCase(test, locale));
}
@Test
public void testCompressSpace() {
assertEquals("One Two Three Four Five", StringUtil.compressSpaces(" One Two\nThree Four\r\nFive "));
assertEquals("Six seven", StringUtil.compressSpaces("Six\tseven"));
}
@Test
public void testIsValidXMLChar() {
assertFalse(StringUtil.isValidXMLChar(0x01));
assertTrue(StringUtil.isValidXMLChar(0x09));
assertTrue(StringUtil.isValidXMLChar(0x0A));
assertTrue(StringUtil.isValidXMLChar(0x0D));
assertTrue(StringUtil.isValidXMLChar(0x21));
assertFalse(StringUtil.isValidXMLChar(0xD800));
assertTrue(StringUtil.isValidXMLChar(0xE000));
assertFalse(StringUtil.isValidXMLChar(0xFFFE));
assertTrue(StringUtil.isValidXMLChar(0x10000));
assertFalse(StringUtil.isValidXMLChar(0x110000));
}
@Test
public void testCapitalizeFirst() {
Locale locale = Locale.ENGLISH;
assertEquals("Abc", StringUtil.capitalizeFirst("abc", locale));
assertEquals("ABC", StringUtil.capitalizeFirst("ABC", locale));
assertEquals("Abc", StringUtil.capitalizeFirst("Abc", locale));
assertEquals("Abc", StringUtil.capitalizeFirst("abc", locale));
assertEquals("AbC", StringUtil.capitalizeFirst("abC", locale));
assertEquals("A", StringUtil.capitalizeFirst("a", locale));
// LATIN SMALL LETTER NJ (U+01CC) -> LATIN CAPITAL LETTER N WITH SMALL LETTER J (U+01CB)
assertEquals("\u01CB", StringUtil.capitalizeFirst("\u01CC", locale));
// LATIN SMALL LETTER I (U+0069) -> LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) in Turkish
assertEquals("\u0130jk", StringUtil.capitalizeFirst("ijk", new Locale("tr")));
}
@Test
public void testMatchCapitalization() {
Locale locale = Locale.ENGLISH;
String text = "foo";
// matchTo is empty -> return original text
assertEquals(text, StringUtil.matchCapitalization(text, null, locale));
assertEquals(text, StringUtil.matchCapitalization(text, "", locale));
// text starts with matchTo -> return original text
assertEquals(text, StringUtil.matchCapitalization(text, text + "BAR", locale));
// matchTo is title case
assertEquals("Foo", StringUtil.matchCapitalization(text, "Abc", locale));
assertEquals("Foo", StringUtil.matchCapitalization(text, "A", locale));
// matchTo is lower case
assertEquals("foo", StringUtil.matchCapitalization("FOO", "lower", locale));
assertEquals("foo", StringUtil.matchCapitalization("fOo", "l", locale));
// matchTo is upper case
assertEquals("FOO", StringUtil.matchCapitalization(text, "UPPER", locale));
assertEquals("FOO", StringUtil.matchCapitalization("fOo", "UP", locale));
assertEquals("FOo", StringUtil.matchCapitalization("fOo", "U", locale)); // Interpreted as title case
// matchTo is mixed or not cased
assertEquals(text, StringUtil.matchCapitalization(text, "bAzZ", locale));
assertEquals(text, StringUtil.matchCapitalization(text, ".", locale));
}
@Test
public void testFirstN() {
// MATHEMATICAL BOLD CAPITAL A (U+1D400) x2
String test = "\uD835\uDC00\uD835\uDC00";
assertTrue(StringUtil.firstN(test, 0).isEmpty());
assertEquals("\uD835\uDC00", StringUtil.firstN(test, 1));
assertEquals(test, StringUtil.firstN(test, 2));
assertEquals(test, StringUtil.firstN(test, 100));
}
@Test
public void testTruncateString() {
// MATHEMATICAL BOLD CAPITAL A (U+1D400) x3
String test = "\uD835\uDC00\uD835\uDC00\uD835\uDC00";
try {
StringUtil.truncate(test, 0);
fail();
} catch (IndexOutOfBoundsException ex) {
// Ignore
}
assertEquals(String.valueOf(StringUtil.TRUNCATE_CHAR), StringUtil.truncate(test, 1));
assertEquals("\uD835\uDC00" + StringUtil.TRUNCATE_CHAR, StringUtil.truncate(test, 2));
assertEquals(test, StringUtil.truncate(test, 3));
assertEquals(test, StringUtil.truncate(test, 100));
}
@Test
public void testNormalizeWidth() {
String test = "Foo 123 " // ASCII
+ "\uFF26\uFF4F\uFF4F\u3000\uFF11\uFF12\uFF13 " // Full-width alphanumerics
+ "\uFF01\uFF1F\uFF08\uFF09 " // Full-width punctuation
+ "\u3371 " // Squared Latin Abbreviations
+ "\u2100 " // Letter-Like Symbols
+ "\u30AC\u30D1\u30AA " // Katakana
+ "\uD55C\uAD6D\uC5B4 " // Full-width Hangul
+ "\u314E\u314F\u3134"; // Full-width Jamo
assertEquals("Foo 123 Foo 123 !?() hPa a/c \u30AC\u30D1\u30AA \uD55C\uAD6D\uC5B4 \u314E\u314F\u3134",
StringUtil.normalizeWidth(test));
test = "\uFF26\uFF4F\uFF4F\u3000\uFF11\uFF12\uFF13 " // Full-width alphanumerics
+ "Foo 123 !?() " // ASCII
+ "\uFF76\uFF9E\uFF8A\uFF9F\uFF75 " // Half-width Katakana
+ "\uFFBE\uFFC2\uFFA4"; // Half-width Jamo
assertEquals("Foo 123 Foo 123 !?() \u30AC\u30D1\u30AA \u314E\uFFC2\u3134",
StringUtil.normalizeWidth(test));
test = "\uff21\uff22\uff23\uff0e\uff11\uff12\uff13\uff04\uff01";
assertEquals("ABC.123$!", StringUtil.normalizeWidth(test));
test = "\u30a2\uff71\u30ac\uff76\u3099\u3000";
assertEquals("\u30a2\u30a2\u30ac\u30ac ", StringUtil.normalizeWidth(test));
}
@Test
public void testRstrip() {
assertEquals("", StringUtil.rstrip(""));
assertEquals("", StringUtil.rstrip(" "));
assertEquals("ABC", StringUtil.rstrip("ABC"));
assertEquals("ABC", StringUtil.rstrip("ABC "));
assertEquals(" ABC", StringUtil.rstrip(" ABC "));
assertEquals("ABC", StringUtil.rstrip("ABC "));
assertEquals("ABC\u00a0", StringUtil.rstrip("ABC\u00a0")); // U+00A0 NO-BREAK SPACE
try {
StringUtil.rstrip(null);
fail();
} catch (NullPointerException ex) {
// Should fail when stripping null string.
}
}
}