package net.varkhan.base.conversion.character;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;
/**
* <b></b>.
* <p/>
*
* @author varkhan
* @date 11/23/13
* @time 4:23 PM
*/
public class UTF8EncoderTest extends TestCase {
public void testEncode() throws Exception {
UTF8Encoder<Object> enc = new UTF8Encoder<Object>();
assertEquals("Foo bar baz",new String(enc.encode("Foo bar baz",null),Charset.forName("UTF-8")));
assertEquals("Foo bar $$þ",new String(enc.encode("Foo bar $$\u00fe",null),Charset.forName("UTF-8")));
ByteArrayOutputStream out = new ByteArrayOutputStream();
assertEquals(12, enc.encode("Foo bar $$\u00fe", out, null));
assertEquals("Foo bar $$þ",new String(out.toByteArray(),Charset.forName("UTF-8")));
ByteBuffer buf = ByteBuffer.allocate(50);
assertEquals(12, enc.encode("Foo bar $$\u00fe", buf, null));
assertEquals("Foo bar $$þ",new String(buf.array(),buf.arrayOffset(),buf.arrayOffset()+buf.position(),Charset.forName("UTF-8")));
}
public void testEncode2() throws Exception {
String[] ss = { "دبي", "الشرقيه",
"aköy", "zığ", "büyük", "Çor", "niğ",
"一", "你好", "龵", "ホ", "࿊",
"ſt", "⣿", "꜕", "\uE425>"
};
UTF8Encoder<Object> enc = new UTF8Encoder<Object>();
for (int i = 0; i < ss.length; i++) {
String s = ss[i];
byte[] buf=encode_native(s);
assertEquals("encode_native().length<>length(): \""+s+"\"", buf.length, enc.length(s, null));
assertArrayEquals("encode_native(decode()) \""+s+"\" "+
"\n\t"+Arrays.toString(buf)+
"\n\t"+Arrays.toString(enc.encode(s, null))+
"\n", buf, enc.encode(s, null));
ByteArrayOutputStream out = new ByteArrayOutputStream();
assertEquals("encode_native(decode()) \""+s+"\" "+
"\n\t"+Arrays.toString(buf)+
"\n\t"+Arrays.toString(enc.encode(s, null))+
"\n", buf.length, enc.encode(s, out, null));
assertEquals(s,new String(out.toByteArray(),Charset.forName("UTF-8")));
ByteBuffer bby = ByteBuffer.allocate(50);
assertEquals("encode_native(decode()) \""+s+"\" "+
"\n\t"+Arrays.toString(buf)+
"\n\t"+Arrays.toString(enc.encode(s, null))+
"\n", buf.length, enc.encode(s, bby, null));
assertEquals(s,new String(bby.array(),bby.arrayOffset(),bby.arrayOffset()+bby.position(),Charset.forName("UTF-8")));
}
}
private void assertArrayEquals(String message, byte[] expected, byte[] actual) {
if(!Arrays.equals(expected, actual)) assertEquals(message,Arrays.toString(expected),Arrays.toString(actual));
}
public static byte[] encode_native(String str) {
CharsetEncoder ce = UTF8Decoder.UTF_8.newEncoder();
char[] chars = str.toCharArray();
CharBuffer cb = CharBuffer.wrap(chars,0,chars.length);
byte[] bytes = new byte[(int)(chars.length*ce.maxBytesPerChar())];
ByteBuffer bb = ByteBuffer.wrap(bytes);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow()) cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow()) cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return (bb.position()>=bytes.length)?bytes:Arrays.copyOf(bytes,bb.position());
}
}