/* * Copyright (C) 2008 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.common.escape; import com.google.common.annotations.GwtCompatible; import junit.framework.TestCase; /** * Tests for {@link UnicodeEscaper}. * * @author David Beaumont */ @GwtCompatible public class UnicodeEscaperTest extends TestCase { private static final String SMALLEST_SURROGATE = "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE; private static final String LARGEST_SURROGATE = "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE; private static final String TEST_STRING = "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" + SMALLEST_SURROGATE + "0189" + LARGEST_SURROGATE; // Escapes nothing private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() { @Override protected char[] escape(int c) { return null; } }; // Escapes everything except [a-zA-Z0-9] private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() { @Override protected char[] escape(int cp) { return ('a' <= cp && cp <= 'z') || ('A' <= cp && cp <= 'Z') || ('0' <= cp && cp <= '9') ? null : ("[" + String.valueOf(cp) + "]").toCharArray(); } }; public void testNopEscaper() { UnicodeEscaper e = NOP_ESCAPER; assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING)); } public void testSimpleEscaper() { UnicodeEscaper e = SIMPLE_ESCAPER; String expected = "[0]abyz[128][256][2048][4096]ABYZ[65535]" + "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" + "0189[" + Character.MAX_CODE_POINT + "]"; assertEquals(expected, escapeAsString(e, TEST_STRING)); } public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer StringBuffer input = new StringBuffer(); StringBuffer expected = new StringBuffer(); for (int i = 256; i < 1024; i++) { input.append((char) i); expected.append("[" + i + "]"); } assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString())); } public void testSurrogatePairs() { UnicodeEscaper e = SIMPLE_ESCAPER; // Build up a range of surrogate pair characters to test final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT; final int max = Character.MAX_CODE_POINT; final int range = max - min; final int s1 = min + (1 * range) / 4; final int s2 = min + (2 * range) / 4; final int s3 = min + (3 * range) / 4; final char[] dst = new char[12]; // Put surrogate pairs at odd indices so they can be split easily dst[0] = 'x'; Character.toChars(min, dst, 1); Character.toChars(s1, dst, 3); Character.toChars(s2, dst, 5); Character.toChars(s3, dst, 7); Character.toChars(max, dst, 9); dst[11] = 'x'; String test = new String(dst); // Get the expected result string String expected = "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x"; assertEquals(expected, escapeAsString(e, test)); } public void testTrailingHighSurrogate() { String test = "abc" + Character.MIN_HIGH_SURROGATE; try { escapeAsString(NOP_ESCAPER, test); fail("Trailing high surrogate should cause exception"); } catch (IllegalArgumentException expected) { // Pass } try { escapeAsString(SIMPLE_ESCAPER, test); fail("Trailing high surrogate should cause exception"); } catch (IllegalArgumentException expected) { // Pass } } public void testNullInput() { UnicodeEscaper e = SIMPLE_ESCAPER; try { e.escape((String) null); fail("Null string should cause exception"); } catch (NullPointerException expected) { // Pass } } public void testBadStrings() { UnicodeEscaper e = SIMPLE_ESCAPER; String[] BAD_STRINGS = { String.valueOf(Character.MIN_LOW_SURROGATE), Character.MIN_LOW_SURROGATE + "xyz", "abc" + Character.MIN_LOW_SURROGATE, "abc" + Character.MIN_LOW_SURROGATE + "xyz", String.valueOf(Character.MAX_LOW_SURROGATE), Character.MAX_LOW_SURROGATE + "xyz", "abc" + Character.MAX_LOW_SURROGATE, "abc" + Character.MAX_LOW_SURROGATE + "xyz", }; for (String s : BAD_STRINGS) { try { escapeAsString(e, s); fail("Isolated low surrogate should cause exception [" + s + "]"); } catch (IllegalArgumentException expected) { // Pass } } } public void testFalsePositivesForNextEscapedIndex() { UnicodeEscaper e = new UnicodeEscaper() { // Canonical escaper method that only escapes lower case ASCII letters. @Override protected char[] escape(int cp) { return ('a' <= cp && cp <= 'z') ? new char[] { Character.toUpperCase((char) cp) } : null; } // Inefficient implementation that defines all letters as escapable. @Override protected int nextEscapeIndex(CharSequence csq, int index, int end) { while (index < end && !Character.isLetter(csq.charAt(index))) { index++; } return index; } }; assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", e.escape("\0HeLLo \uD800\uDC00 WorlD!\n")); } public void testCodePointAt_IndexOutOfBoundsException() { try { UnicodeEscaper.codePointAt("Testing...", 4, 2); fail(); } catch (IndexOutOfBoundsException expected) { } } private static String escapeAsString(Escaper e, String s) { return e.escape(s); } }