/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.j2objc.nio.charset; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import junit.framework.TestCase; import org.junit.Assert; /** * Test encoding and decoding of J2ObjC's charset implementations. * * @author Keith Stanger */ public class CharsetTest extends TestCase { private void assertCorrectDecoding(String expected, byte[] bytes, String charsetName) throws IOException { Charset cs = Charset.forName(charsetName); CharsetDecoder decoder = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString()); assertEquals(expected, cs.decode(ByteBuffer.wrap(bytes)).toString()); assertEquals(expected, new String(bytes, charsetName)); assertEquals(expected, new String(bytes, cs)); } public void testDecoding() throws IOException { // UTF-8 with some invalid bytes. byte[] invalidUtf8 = { 91, 92, -1, -40, -1, -32, 1, 16, 74, 0, 70, -27, -101, 73, 70, -28, -72, -83, -27, -101 }; assertCorrectDecoding( "[\\\ufffd\ufffd\ufffd\ufffd\u0001\u0010J\0F\ufffdIF中\ufffd", invalidUtf8, "UTF-8"); // UTF-16 with different byte order marks. assertCorrectDecoding("abc", new byte[] { -2, -1, 0, 97, 0, 98, 0, 99 }, "UTF-16"); assertCorrectDecoding("abc", new byte[] { -1, -2, 97, 0, 98, 0, 99, 0 }, "UTF-16"); assertCorrectDecoding("abc", new byte[] { 0, 97, 0, 98, 0, 99 }, "UTF-16"); // UTF-16 with explicit endianness. assertCorrectDecoding("abc", new byte[] { 0, 97, 0, 98, 0, 99 }, "UTF-16BE"); assertCorrectDecoding("\ufeffabc", new byte[] { -2, -1, 0, 97, 0, 98, 0, 99 }, "UTF-16BE"); assertCorrectDecoding("abc", new byte[] { 97, 0, 98, 0, 99, 0 }, "UTF-16LE"); assertCorrectDecoding("\ufeffabc", new byte[] { -1, -2, 97, 0, 98, 0, 99, 0 }, "UTF-16LE"); // UTF-32 assertCorrectDecoding("abc", new byte[] { 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99 }, "UTF-32"); assertCorrectDecoding("abc", new byte[] { 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99 }, "UTF-32BE"); assertCorrectDecoding("abc", new byte[] { 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0 }, "UTF-32LE"); // Other encodings assertCorrectDecoding("abc", new byte[] { 97, 98, 99 }, "US-ASCII"); assertCorrectDecoding("abc", new byte[] { 97, 98, 99 }, "ISO-8859-1"); assertCorrectDecoding("abc", new byte[] { 97, 98, 99 }, "ISO-8859-2"); assertCorrectDecoding("日本", new byte[] { -58, -4, -53, -36 }, "EUC-JP"); assertCorrectDecoding("日本", new byte[] { -109, -6, -106, 123 }, "SHIFT_JIS"); assertCorrectDecoding( "日本", new byte[] { 27, 36, 66, 70, 124, 75, 92, 27, 40, 66 }, "ISO-2022-JP"); assertCorrectDecoding("öߍ", new byte[] { -10, -33, -115 }, "WINDOWS-1250"); assertCorrectDecoding("фЭЖ", new byte[] { -12, -35, -58 }, "WINDOWS-1251"); assertCorrectDecoding("žºé", new byte[] { -98, -70, -23 }, "WINDOWS-1252"); assertCorrectDecoding("ΔΣΨ", new byte[] { -60, -45, -40 }, "WINDOWS-1253"); assertCorrectDecoding("Ğ¿ÿ", new byte[] { -48, -65, -1 }, "WINDOWS-1254"); assertCorrectDecoding("√ˇà", new byte[] { -61, -1, -120 }, "X-MACROMAN"); } private void assertCorrectEncoding(byte[] expected, String input, String charsetName) throws IOException { Charset cs = Charset.forName(charsetName); CharsetEncoder encoder = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); ByteBuffer bb = encoder.encode(CharBuffer.wrap(input.toCharArray())); byte[] result = new byte[bb.remaining()]; bb.get(result); Assert.assertArrayEquals(expected, result); bb = cs.encode(CharBuffer.wrap(input.toCharArray())); result = new byte[bb.remaining()]; bb.get(result); Assert.assertArrayEquals(expected, result); Assert.assertArrayEquals(expected, input.getBytes(charsetName)); Assert.assertArrayEquals(expected, input.getBytes(cs)); } public void testEncoding() throws IOException { // UTF-16 assertCorrectEncoding(new byte[] { -2, -1, 0, 97, 0, 98, 0, 99 }, "abc", "UTF-16"); assertCorrectEncoding(new byte[] { 0, 97, 0, 98, 0, 99 }, "abc", "UTF-16BE"); assertCorrectEncoding(new byte[] { 97, 0, 98, 0, 99, 0 }, "abc", "UTF-16LE"); // UTF-32 assertCorrectEncoding(new byte[] { 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99 }, "abc", "UTF-32"); assertCorrectEncoding(new byte[] { 0, 0, 0, 97, 0, 0, 0, 98, 0, 0, 0, 99 }, "abc", "UTF-32BE"); assertCorrectEncoding(new byte[] { 97, 0, 0, 0, 98, 0, 0, 0, 99, 0, 0, 0 }, "abc", "UTF-32LE"); // Other encodings assertCorrectEncoding(new byte[] { 97, 98, 99 }, "abc", "US-ASCII"); assertCorrectEncoding(new byte[] { 97, 98, 99 }, "abc", "ISO-8859-1"); assertCorrectEncoding(new byte[] { 97, 98, 99 }, "abc", "ISO-8859-2"); assertCorrectEncoding(new byte[] { -58, -4, -53, -36 }, "日本", "EUC-JP"); assertCorrectEncoding(new byte[] { -109, -6, -106, 123 }, "日本", "SHIFT_JIS"); assertCorrectEncoding( new byte[] { 27, 36, 66, 70, 124, 75, 92, 27, 40, 66 }, "日本", "ISO-2022-JP"); assertCorrectEncoding(new byte[] { -10, -33, -115 }, "öߍ", "WINDOWS-1250"); assertCorrectEncoding(new byte[] { -12, -35, -58 }, "фЭЖ", "WINDOWS-1251"); assertCorrectEncoding(new byte[] { -98, -70, -23 }, "žºé", "WINDOWS-1252"); assertCorrectEncoding(new byte[] { -60, -45, -40 }, "ΔΣΨ", "WINDOWS-1253"); assertCorrectEncoding(new byte[] { -48, -65, -1 }, "Ğ¿ÿ", "WINDOWS-1254"); assertCorrectEncoding(new byte[] { -61, -1, -120 }, "√ˇà", "X-MACROMAN"); // Unmappable character assertCorrectEncoding(new byte[] { 97, 98, 63, 99, 100 }, "ab\uD7C5cd", "ISO-8859-1"); } }