package net.varkhan.base.conversion.character; import junit.framework.TestCase; import java.io.ByteArrayInputStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.util.Arrays; /** * <b></b>. * <p/> * * @author varkhan * @date 11/23/13 * @time 4:23 PM */ public class UTF8DecoderTest extends TestCase { public void testDecode() throws Exception { UTF8Decoder<Object> dec = new UTF8Decoder<Object>(); assertEquals("Foo bar baz",dec.decode("Foo bar baz".getBytes(Charset.forName("UTF-8")),null)); byte[] buf ="Foo bar $$þ".getBytes(Charset.forName("UTF-8")); assertEquals("Foo bar $$þ",dec.decode(buf,0,buf.length,null)); assertEquals("Foo bar $$þ",dec.decode(new ByteArrayInputStream(buf),null)); assertEquals("Foo bar $$þ",dec.decode(ByteBuffer.wrap(buf),null)); } public void testDecode2() throws Exception { String[] ss = { "دبي", "الشرقيه", "aköy", "zığ", "büyük", "Çor", "niğ", "一", "你好", "龵", "ホ", "࿊", "ſt", "⣿", "꜕", "\uE425>" }; UTF8Decoder<Object> dec = new UTF8Decoder<Object>(); for (int i = 0; i < ss.length; i++) { String s = ss[i]; byte[] buf=encode_native(s); assertEquals("encode(decode_native()) \"" + s + "\" " + "\n\t" + s + "\n\t" + dec.decode(buf,null) + "\n", s, dec.decode(buf, null)); assertEquals("encode(decode_native()) \"" + s + "\" " + "\n\t" + s + "\n\t" + dec.decode(new ByteArrayInputStream(buf),null) + "\n", s, dec.decode(new ByteArrayInputStream(buf), null)); assertEquals("encode(decode_native()) \"" + s + "\" " + "\n\t" + s + "\n\t" + dec.decode(ByteBuffer.wrap(buf),null) + "\n", s, dec.decode(ByteBuffer.wrap(buf), null)); } } public static byte[] encode_native(String str) { CharsetEncoder ce = UTF8Decoder.UTF_8.newEncoder(); char[] chars = str.toCharArray(); CharBuffer cb = CharBuffer.wrap(chars,0,chars.length); byte[] bytes = new byte[(int)(chars.length*ce.maxBytesPerChar())]; ByteBuffer bb = ByteBuffer.wrap(bytes); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return (bb.position()>=bytes.length)?bytes:Arrays.copyOf(bytes,bb.position()); } }