package wikokit.base.wikipedia.util;
import wikokit.base.wikipedia.util.StringUtilRegular;
import wikokit.base.wikipedia.language.Encodings;
import junit.framework.*;
public class StringUtilRegularTest extends TestCase {
public StringUtilRegularTest(String testName) {
super(testName);
}
protected void setUp() throws Exception {
}
protected void tearDown() throws Exception {
}
public static Test suite() {
TestSuite suite = new TestSuite(StringUtilRegularTest.class);
return suite;
}
/**
* Test of stripNonWordLetters method, of class wikipedia.util.StringUtilRegular.
*/
public void testStripNonWordLetters() {
System.out.println("stripNonWordLetters");
String[] words = {"\nword1", "\t word-long2\r\n"};
String[] expResult = {"word1", "word-long2"};
StringUtilRegular.stripNonWordLetters(words);
assertEquals(expResult[0], words[0]);
assertEquals(expResult[1], words[1]);
}
public void testGetLettersTillSpace() {
System.out.println("getLettersTillSpace");
String source, expResult, result;
source = "\nword1 word2";
expResult = "word1";
result = StringUtilRegular.getLettersTillSpace(source);
assertEquals(expResult, result);
source = "\t word-long2\r\n hello";
expResult = "word-long2";
result = StringUtilRegular.getLettersTillSpace(source);
assertEquals(expResult, result);
}
public void testGetLettersTillHyphen() {
System.out.println("getLettersTillHyphen");
String source, expResult, result;
source = "\nword1-word2";
expResult = "word1";
result = StringUtilRegular.getLettersTillHyphen(source);
assertEquals(expResult, result);
source = "\t word-long2\r\n hello";
expResult = "word";
result = StringUtilRegular.getLettersTillHyphen(source);
assertEquals(expResult, result);
}
public void testEncodeRussianToLatinitsa() {
System.out.println("encodeRussianToLatinitsa");
String r, ru, lat;
ru = "А потом он аккуpатно заменИл PS на ЗЫ и сохpанил письмо.";
lat = "A potom on akkupatno zamenIl PS na ZY i soxpanil pis'mo.";
//r = StringUtilRegular.encodeRussianToLatinitsa(ru);
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.UTF8ToCp1251(ru));
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.UTF8ToLatin1(ru));
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.Latin1ToUTF8(ru));
//r = StringUtilRegular.encodeRussianToLatinitsa(Encodings.FromTo(ru, "Cp1251", "UTF8"), "Cp1251", "UTF8");
r = StringUtilRegular.encodeRussianToLatinitsa(ru, Encodings.enc_java_default, Encodings.enc_int_default);
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.UTF8ToCp1251(ru));
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.UTF8ToCp1251(ru));
assertEquals(lat, r);
ru = "В связи с установившейся в системном блоке жарой системный таймер переходит на летнее время.";
lat = "V svyazi s ustanovivshejsya v sistemnom bloke zharoj sistemnyj tajmer perexodit na letnee vremya.";
//r = StringUtilRegular.encodeRussianToLatinitsa(wikipedia.util.Encodings.Latin1ToUTF8(ru));
ru = Encodings.FromTo(ru, Encodings.enc_java_default, Encodings.enc_int_default);
r = StringUtilRegular.encodeRussianToLatinitsa(ru, Encodings.enc_java_default, Encodings.enc_int_default);
assertEquals(lat, r);
}
private static String text = "text before \n" +
"\n" +
"===Bibliography===\n" + // 14 (end position of header)
"* N\n" +
"== Links ==\n" + // 35
"[[Category:Musical instruments]]\n";
public void testGetFirstHeaderPosition() {
System.out.println("getFirstHeaderPosition");
int pos;
pos = StringUtilRegular.getFirstHeaderPosition(0, text);
assertEquals(14, pos);
pos = StringUtilRegular.getFirstHeaderPosition(pos + 5, text);
assertEquals(37, pos);
}
public void testGetTextTillFirstHeaderPosition() {
System.out.println("getTextTillFirstHeaderPosition");
String s1 = StringUtilRegular.getTextTillFirstHeaderPosition(0, text);
assertTrue(s1.equalsIgnoreCase("text before \n\n"));
String s2 = StringUtilRegular.getTextTillFirstHeaderPosition(37 + 5, text);
assertTrue(s2.equalsIgnoreCase("nks ==\n[[Category:Musical instruments]]\n"));
}
public void testGetTextTillFirstHeaderOrEmptyLine() {
System.out.println("getTextTillFirstHeaderOrEmptyLine");
// test 1: the empty line before header
String kolokolchik_synonyms = "# [[кандия]] (церк.)\n" +
"# -\n" +
"# -";
String addon = "\n\n" +
"====Антонимы====\n";
String s = kolokolchik_synonyms.concat(addon);
String s1 = StringUtilRegular.getTextTillFirstHeaderOrEmptyLine(0, s);
assertTrue(s1.equalsIgnoreCase(kolokolchik_synonyms));
// test 2: the empty line is absent, there is only header
addon = "\n" +
"====Антонимы====\n";
s = kolokolchik_synonyms.concat(addon);
String s2 = StringUtilRegular.getTextTillFirstHeaderOrEmptyLine(0, s);
assertTrue(s2.equalsIgnoreCase(kolokolchik_synonyms));
}
}