package io.norberg.rut; import org.junit.Test; import java.io.UnsupportedEncodingException; import static com.google.common.net.UrlEscapers.urlPathSegmentEscaper; import static io.norberg.rut.Encoding.decode; import static java.lang.Character.MAX_CODE_POINT; import static java.lang.Character.MAX_SURROGATE; import static java.lang.Character.MIN_CODE_POINT; import static java.lang.Character.MIN_SURROGATE; import static java.lang.Character.toChars; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; import static org.junit.Assert.assertThat; public class EncodingTest { public static final String ONE_BYTE = "$"; public static final String ONE_BYTE_MIN = String.valueOf(toChars(0x0000)); public static final String ONE_BYTE_MAX = String.valueOf(toChars(0x007F)); public static final String ONE_BYTE_ENCODED = "%24"; public static final String ONE_BYTE_MIN_ENCODED = "%00"; public static final String ONE_BYTE_MAX_ENCODED = "%7F"; public static final String ONE_BYTE_ENCODED_OVERLONG2 = "%C0%A4"; public static final String TWO_BYTES = "¢"; public static final String TWO_BYTES_MIN = String.valueOf(toChars(0x0080)); public static final String TWO_BYTES_MAX = String.valueOf(toChars(0x07FF)); // 11000010 10100010 // C2 A2 public static final String TWO_BYTES_ENCODED = "%C2%A2"; public static final String TWO_BYTES_ENCODED_PERCENT_INVALID = "%C2%AG"; // 11000010 10000010 // C2 80 public static final String TWO_BYTES_MIN_ENCODED = "%C2%80"; // 11011111 10111111 // DF BF public static final String TWO_BYTES_MAX_ENCODED = "%DF%BF"; // 11000010 11100010 // C2 E2 public static final String TWO_BYTES_ENCODED_INVALID = "%C2%E2"; // 11100000 10000010 10000000 // E0 9F 80 public static final String TWO_BYTES_ENCODED_OVERLONG3A = "%E0%9F%80"; // 11100000 10000010 10100010 // E0 82 A2 public static final String TWO_BYTES_ENCODED_OVERLONG3B = "%E0%82%A2"; // 11110000 10000000 10000010 10100010 // F0 80 82 A2 public static final String TWO_BYTES_ENCODED_OVERLONG4 = "%F0%80%82%A2"; public static final String THREE_BYTES_A = "€"; public static final String THREE_BYTES_B = String.valueOf(toChars(0x82C)); public static final String THREE_BYTES_MIN = String.valueOf(toChars(0x0800)); public static final String THREE_BYTES_MAX = String.valueOf(toChars(0xFFFF)); // 11100010 10000010 10101100 // E2 82 AC public static final String THREE_BYTES_ENCODED_A = "%E2%82%AC"; public static final String THREE_BYTES_ENCODED_A_PERCENT_INVALID = "%E2%82%AG"; // 11100000 10100000 10000000 // E0 A0 AC public static final String THREE_BYTES_ENCODED_B = "%E0%A0%AC"; // 11100000 10100000 10000000 // E0 A0 80 public static final String THREE_BYTES_MIN_ENCODED = "%E0%A0%80"; // 11101111 10111111 10111111 // EF BF BF public static final String THREE_BYTES_MAX_ENCODED = "%EF%BF%BF"; // 11100010 11000010 10101100 // E2 C2 AC public static final String THREE_BYTES_ENCODED_INVALID1 = "%E2%C2%AC"; // 11100010 10000010 11101100 // E2 82 EC public static final String THREE_BYTES_ENCODED_INVALID2 = "%E2%82%EC"; // 11110000 10000010 10000010 10101100 // F0 82 82 AC public static final String THREE_BYTES_ENCODED_OVERLONG4 = "%F0%82%82%AC"; public static final String FOUR_BYTES = String.valueOf(toChars(0x10348)); public static final String FOUR_BYTES_MIN = String.valueOf(toChars(0x100000)); public static final String FOUR_BYTES_MAX = String.valueOf(toChars(0x10FFFF)); // 11110000 10010000 10001101 10001000 // F0 90 8D 88 public static final String FOUR_BYTES_ENCODED = "%F0%90%8D%88"; public static final String FOUR_BYTES_ENCODED_PERCENT_INVALID = "%F0%90%8D%8G"; // 11110100 10001111 10111111 10111111 // F4 8F BF BF public static final String FOUR_BYTES_MAX_ENCODED = "%F4%8F%BF%BF"; // 11110000 11010000 10001101 10001000 // F0 D0 8D 88 public static final String FOUR_BYTES_ENCODED_INVALID1 = "%F0%D0%8D%88"; // 11110000 10010000 11001101 10001000 // F0 90 CD 88 public static final String FOUR_BYTES_ENCODED_INVALID2 = "%F0%90%CD%88"; // 11110000 10010000 10001101 11001000 // F0 90 8D C8 public static final String FOUR_BYTES_ENCODED_INVALID3 = "%F0%90%8D%C8"; // 11110100 10010000 10000000 10000000 // F4 90 80 80 public static final String FOUR_BYTES_ENCODED_TOO_GREAT_F4 = "%F4%90%80%80"; // 11110101 10000000 10000000 10000000 // F5 80 80 80 public static final String FOUR_BYTES_ENCODED_TOO_GREAT_F5 = "%F5%80%80%80"; @Test public void verifyUninstantiable() { Access.verifyUninstantiable(Encoding.class); } @Test public void testDecode1() throws Exception { assertThat(decode(ONE_BYTE_ENCODED).toString(), is(ONE_BYTE)); assertThat(decode(ONE_BYTE_MIN_ENCODED).toString(), is(ONE_BYTE_MIN)); assertThat(decode(ONE_BYTE_MAX_ENCODED).toString(), is(ONE_BYTE_MAX)); } @Test public void testDecode2() throws Exception { assertThat(decode(TWO_BYTES_ENCODED).toString(), is(TWO_BYTES)); assertThat(decode(TWO_BYTES_MIN_ENCODED).toString(), is(TWO_BYTES_MIN)); assertThat(decode(TWO_BYTES_MAX_ENCODED).toString(), is(TWO_BYTES_MAX)); } @Test public void testDecode3() throws Exception { assertThat(decode(THREE_BYTES_ENCODED_A).toString(), is(THREE_BYTES_A)); assertThat(decode(THREE_BYTES_ENCODED_B).toString(), is(THREE_BYTES_B)); assertThat(decode(THREE_BYTES_MIN_ENCODED).toString(), is(THREE_BYTES_MIN)); assertThat(decode(THREE_BYTES_MAX_ENCODED).toString(), is(THREE_BYTES_MAX)); } @Test public void testDecode4() throws Exception { assertThat(decode(FOUR_BYTES_ENCODED).toString(), is(FOUR_BYTES)); assertThat(decode(FOUR_BYTES_MAX_ENCODED).toString(), is(FOUR_BYTES_MAX)); } @Test public void testDecodeMix() { final String expected = "mixed ascii with " + "1 byte sequences (" + ONE_BYTE + ") and " + "2 byte sequences (" + TWO_BYTES + ") and " + "3 byte sequences (" + THREE_BYTES_A + ") and " + "4 byte sequences (" + FOUR_BYTES + ")"; final String encoded = "mixed ascii with " + "1 byte sequences (" + ONE_BYTE_ENCODED + ") and " + "2 byte sequences (" + TWO_BYTES_ENCODED + ") and " + "3 byte sequences (" + THREE_BYTES_ENCODED_A + ") and " + "4 byte sequences (" + FOUR_BYTES_ENCODED + ")"; assertThat(String.valueOf(decode(encoded)), is(expected)); } @Test public void verifyDecode1OverlongFails() throws Exception { assertThat(decode(ONE_BYTE_ENCODED_OVERLONG2), is(nullValue())); } @Test public void verifyDecode2Overlong3Fails() throws Exception { assertThat(decode(TWO_BYTES_ENCODED_OVERLONG3A), is(nullValue())); assertThat(decode(TWO_BYTES_ENCODED_OVERLONG3B), is(nullValue())); } @Test public void verifyDecode2Overlong4Fails() throws Exception { assertThat(decode(TWO_BYTES_ENCODED_OVERLONG4), is(nullValue())); } @Test public void verifyDecode3OverlongFails() throws Exception { assertThat(decode(THREE_BYTES_ENCODED_OVERLONG4), is(nullValue())); } @Test public void verifyDecode4TooGreatFails() throws Exception { assertThat(decode(FOUR_BYTES_ENCODED_TOO_GREAT_F4), is(nullValue())); assertThat(decode(FOUR_BYTES_ENCODED_TOO_GREAT_F5), is(nullValue())); } @Test public void verifyDecode2InvalidFails() throws Exception { assertThat(decode(TWO_BYTES_ENCODED_INVALID), is(nullValue())); assertThat(decode(TWO_BYTES_ENCODED_PERCENT_INVALID), is(nullValue())); } @Test public void verifyDecode3InvalidFails() throws Exception { assertThat(decode(THREE_BYTES_ENCODED_INVALID1), is(nullValue())); assertThat(decode(THREE_BYTES_ENCODED_INVALID2), is(nullValue())); assertThat(decode(THREE_BYTES_ENCODED_A_PERCENT_INVALID), is(nullValue())); } @Test public void verifyDecode4InvalidFails() throws Exception { assertThat(decode(FOUR_BYTES_ENCODED_INVALID1), is(nullValue())); assertThat(decode(FOUR_BYTES_ENCODED_INVALID2), is(nullValue())); assertThat(decode(FOUR_BYTES_ENCODED_INVALID3), is(nullValue())); assertThat(decode(FOUR_BYTES_ENCODED_PERCENT_INVALID), is(nullValue())); } @Test public void testEntireUnicodeRange() throws UnsupportedEncodingException { for (int i = MIN_CODE_POINT; i < MAX_CODE_POINT; i++) { if (isSurrogate((char) i)) { continue; } final String s = String.valueOf(Character.toChars(i)); final String encoded = urlPathSegmentEscaper().escape(s); final CharSequence decoded = Encoding.decode(encoded); assertThat(decoded.toString(), is(s)); } } public static boolean isSurrogate(char ch) { return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); } }