/*
* EncodingsTest.java
* JUnit based test
*/
package wikokit.base.wikipedia.language;
//import wikipedia.language.Encodings;
import wikokit.base.wikipedia.language.Encodings;
import junit.framework.*;
/*import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Set;*/
import java.util.Map;
public class EncodingsTest extends TestCase {
public EncodingsTest(String testName) {
super(testName);
}
protected void setUp() throws Exception {
}
protected void tearDown() throws Exception {
}
public void testGetEncodings() {
System.out.println("getEncodings");
Map expResult = null;
Map result = Encodings.getEncodings();
for(Object o:result.keySet()) {
//System.out.println(o.toString());
}
}
public void testFromTo() {
System.out.println("FromTo");
long t_start, t_end;
float t_work;
t_start = System.currentTimeMillis();
String text = "text";
for(int i=0; i<300000; i++) {
String encode_from = "UTF8";
String encode_to = "ISO8859_1";
text = Encodings.FromTo(text, encode_from, encode_to);
text = Encodings.FromTo(text, encode_to, encode_from);
}
t_end = System.currentTimeMillis();
t_work = (t_end - t_start)/1000f; // in sec
System.out.println("FromTo() total time: " + t_work + "sec.");
}
/*
public void testFromToFast() {
System.out.println("FromToFast");
long t_start, t_end;
float t_work;
t_start = System.currentTimeMillis();
String text = "text";
for(int i=0; i<300000; i++) {
EncodingType encode_from = EncodingType.get("UTF8");
EncodingType encode_to = EncodingType.get("ISO8859_1");
text = Encodings.FromToFast(text, encode_from, encode_to);
text = Encodings.FromToFast(text, encode_to, encode_from);
}
t_end = System.currentTimeMillis();
t_work = (t_end - t_start)/1000f; // in sec
System.out.println("FromToFast() total time: " + t_work + "sec.");
}*/
}
/*
static void dumpEncodings(String db_str,String str_enc) {
String sText;
System.out.println(" ****** Start ****** " );
System.out.println(" * * " );
System.out.println("rsDB.getBytes "+str_enc+" :" + db_str);
//sText = Encodings.FromTo(db_str,"ISO8859_1","UTF8"); System.out.println("ISO8859_1 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"Cp1252","UTF8"); System.out.println("Cp1252 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"Cp850","UTF8"); System.out.println("Cp850 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"Cp855","UTF8"); System.out.println("Cp855 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"Cp852","UTF8"); System.out.println("Cp852 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"WINDOWS-1252","UTF8"); System.out.println("WINDOWS-1252 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"Cp866","UTF8"); System.out.println("Cp866 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"windows-1251","UTF8"); System.out.println("windows-1251 to UTF8 : " + sText);
//sText = Encodings.FromTo(db_str,"koi8r","UTF8"); System.out.println("koi8r to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"cp866","UTF8"); System.out.println("cp866 to UTF8 : " + sText);
//sText = Encodings.FromTo(db_str,"latin1","UTF8"); System.out.println("latin1 to UTF8 : " + sText);
//sText = Encodings.FromTo(db_str,"latin2","UTF8"); System.out.println("latin2 to UTF8 : " + sText);
System.out.println(" * * " );
sText = Encodings.FromTo(db_str,"UTF8","ISO8859_1"); System.out.println("UTF8 to ISO8859_1 : " + sText);
sText = Encodings.FromTo(db_str,"Cp1251","UTF8"); System.out.println("Cp1251 to UTF8 : " + sText);
sText = Encodings.FromTo(db_str,"UTF8","Cp1251"); System.out.println("UTF8 to Cp1251 : " + sText);
sText = Encodings.FromTo(db_str,"Cp1251","ISO8859_1"); System.out.println("Cp1251 to ISO8859_1: " + sText);
sText = Encodings.FromTo(db_str,"ISO8859_1","Cp1251"); System.out.println("ISO8859_1 to Cp1251: " + sText);
sText = Encodings.FromTo(db_str,"UTF8","Cp1252"); System.out.println("UTF8 to Cp1252 : " + sText);
sText = Encodings.FromTo(db_str,"ISO8859_1","Cp1252"); System.out.println("ISO8859_1 to Cp1252: " + sText);
sText = Encodings.FromTo(db_str,"Cp1252","ISO8859_1"); System.out.println("Cp1252 to ISO8859_1: " + sText);
sText = Encodings.FromTo(db_str,"Cp1251","Cp1252"); System.out.println("Cp1251 to Cp1252 : " + sText);
sText = Encodings.FromTo(db_str,"Cp1252","Cp1251"); System.out.println("Cp1252 to Cp1251 : " + sText);
System.out.println(" * * " );
System.out.println(" ****** End ****** " );
}
byte[] aa;
String sText1;
Encodings e = new Encodings();
byte[] aa = rsDB.getBytes( "Text" );
if ( ( aa == null ) || ( aa.length == 0 ) ) sText = null;
else
{
String db_str = Encodings.bytesTo( aa, e.GetDBEnc());
sText = e.EncodeFromDB(db_str);
};
aa = rsDB.getBytes( "Text" );
if ( ( aa == null ) || ( aa.length == 0 ) ) sText = null;
else sText = new String( aa, "UTF-8" ); //
aa = null;
sText = rsDB.getString( "Text" );
aa = sText.getBytes( "UTF-8" );
if ( ( aa == null ) || ( aa.length == 0 ) ) sText1 = null;
else sText1 = new String( aa, "UTF-8" );
aa = null;
System.out.println( sText ); // + " " + sText.length()
System.out.println( sText1 );
// sText = new String( rsDB.getBytes( "Text" ), "Cp1251" );
String db_str;
String str_enc;
//sText = rsDB.getString( "Text" );
//Encodings e = new Encodings();
//String db_str = Encodings.bytesTo(rsDB.getBytes("Text"), e.GetDBEnc());
//db_str = Encodings.bytesTo(rsDB.getBytes("Text"), "ISO8859_1");
//db_str = rsDB.getString("Text");
//db_str = Encodings.bytesTo(rsDB.getBytes("Text"), "UTF8");
//sText = e.EncodeFromDB(db_str);
db_str = rsDB.getString("Text");
if ( null != db_str) {
System.out.println("");
System.out.println(" ****** Start ************************************************ " );
System.out.println(" * * " );
for (Object o:availcs.keySet()) {
String so = (String)o;
if(0 == so.compareToIgnoreCase("ISO-2022-CN") ||
0 == so.compareToIgnoreCase("x-JISAutoDetect")) {
continue;
}
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), so);
String db_str2 = Encodings.FromTo(db_str, so, "UTF8");
System.out.println(o + ": " + db_str + ": " + db_str2);
}
str_enc = "String";
dumpEncodings(db_str, str_enc);
str_enc = "UTF8";
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), str_enc);
dumpEncodings(db_str, str_enc);
str_enc = "ISO8859_1";
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), str_enc);
dumpEncodings(db_str, str_enc);
str_enc = "cp866";
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), str_enc);
dumpEncodings(db_str, str_enc);
str_enc = "Cp1251";
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), str_enc);
dumpEncodings(db_str, str_enc);
str_enc = "Cp1252";
db_str = Encodings.bytesTo(rsDB.getBytes("Text"), str_enc);
dumpEncodings(db_str, str_enc);
}
*/