/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang3; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.FileInputStream; import java.io.IOException; import java.io.StringWriter; import java.lang.reflect.Constructor; import java.lang.reflect.Modifier; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.text.translate.CharSequenceTranslator; import org.apache.commons.lang3.text.translate.NumericEntityEscaper; import org.junit.Test; /** * Unit tests for {@link StringEscapeUtils}. * * @version $Id$ */ public class StringEscapeUtilsTest { private final static String FOO = "foo"; @Test public void testConstructor() { assertNotNull(new StringEscapeUtils()); final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors(); assertEquals(1, cons.length); assertTrue(Modifier.isPublic(cons[0].getModifiers())); assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers())); assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers())); } @Test public void testEscapeJava() throws IOException { assertEquals(null, StringEscapeUtils.escapeJava(null)); try { StringEscapeUtils.ESCAPE_JAVA.translate(null, null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } try { StringEscapeUtils.ESCAPE_JAVA.translate("", null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } assertEscapeJava("empty string", "", ""); assertEscapeJava(FOO, FOO); assertEscapeJava("tab", "\\t", "\t"); assertEscapeJava("backslash", "\\\\", "\\"); assertEscapeJava("single quote should not be escaped", "'", "'"); assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r"); assertEscapeJava("\\u1234", "\u1234"); assertEscapeJava("\\u0234", "\u0234"); assertEscapeJava("\\u00EF", "\u00ef"); assertEscapeJava("\\u0001", "\u0001"); assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd"); assertEscapeJava("He didn't say, \\\"stop!\\\"", "He didn't say, \"stop!\""); assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0", "This space is non-breaking:\u00a0"); assertEscapeJava("\\uABCD\\u1234\\u012C", "\uABCD\u1234\u012C"); } /** * Tests https://issues.apache.org/jira/browse/LANG-421 */ @Test public void testEscapeJavaWithSlash() { final String input = "String with a slash (/) in it"; final String expected = input; final String actual = StringEscapeUtils.escapeJava(input); /** * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape * in a Java string. */ assertEquals(expected, actual); } private void assertEscapeJava(final String escaped, final String original) throws IOException { assertEscapeJava(null, escaped, original); } private void assertEscapeJava(String message, final String expected, final String original) throws IOException { final String converted = StringEscapeUtils.escapeJava(original); message = "escapeJava(String) failed" + (message == null ? "" : (": " + message)); assertEquals(message, expected, converted); final StringWriter writer = new StringWriter(); StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); assertEquals(expected, writer.toString()); } @Test public void testUnescapeJava() throws IOException { assertEquals(null, StringEscapeUtils.unescapeJava(null)); try { StringEscapeUtils.UNESCAPE_JAVA.translate(null, null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } try { StringEscapeUtils.UNESCAPE_JAVA.translate("", null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } try { StringEscapeUtils.unescapeJava("\\u02-3"); fail(); } catch (final RuntimeException ex) { } assertUnescapeJava("", ""); assertUnescapeJava("test", "test"); assertUnescapeJava("\ntest\b", "\\ntest\\b"); assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b"); assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r"); assertUnescapeJava("", "\\"); //foo assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx"); assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx"); assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd"); } private void assertUnescapeJava(final String unescaped, final String original) throws IOException { assertUnescapeJava(null, unescaped, original); } private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException { final String expected = unescaped; final String actual = StringEscapeUtils.unescapeJava(original); assertEquals("unescape(String) failed" + (message == null ? "" : (": " + message)) + ": expected '" + StringEscapeUtils.escapeJava(expected) + // we escape this so we can see it in the error message "' actual '" + StringEscapeUtils.escapeJava(actual) + "'", expected, actual); final StringWriter writer = new StringWriter(); StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); assertEquals(unescaped, writer.toString()); } @Test public void testEscapeEcmaScript() { assertEquals(null, StringEscapeUtils.escapeEcmaScript(null)); try { StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } try { StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';")); } // HTML and XML //-------------------------------------------------------------- private static final String[][] HTML_ESCAPES = { {"no escaping", "plain text", "plain text"}, {"no escaping", "plain text", "plain text"}, {"empty string", "", ""}, {"null", null, null}, {"ampersand", "bread & butter", "bread & butter"}, {"quotes", ""bread" & butter", "\"bread\" & butter"}, {"final character only", "greater than >", "greater than >"}, {"first character only", "< less than", "< less than"}, {"apostrophe", "Huntington's chorea", "Huntington's chorea"}, {"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"}, {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, }; @Test public void testEscapeHtml() { for (String[] element : HTML_ESCAPES) { final String message = element[0]; final String expected = element[1]; final String original = element[2]; assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original)); final StringWriter sw = new StringWriter(); try { StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); } catch (final IOException e) { } final String actual = original == null ? null : sw.toString(); assertEquals(message, expected, actual); } } @Test public void testUnescapeHtml4() { for (String[] element : HTML_ESCAPES) { final String message = element[0]; final String expected = element[2]; final String original = element[1]; assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original)); final StringWriter sw = new StringWriter(); try { StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); } catch (final IOException e) { } final String actual = original == null ? null : sw.toString(); assertEquals(message, expected, actual); } // \u00E7 is a cedilla (c with wiggle under) // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly // on some locales assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais")); assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); } @Test public void testUnescapeHexCharsHtml() { // Simple easy to grok test assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); // Test all Character values: for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { final Character c1 = new Character(i); final Character c2 = new Character((char)(i+1)); final String expected = c1.toString() + c2.toString(); final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2)); } } @Test public void testUnescapeUnknownEntity() throws Exception { assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); } @Test public void testEscapeHtmlVersions() throws Exception { assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); // TODO: refine API for escaping/unescaping specific HTML versions } @Test public void testEscapeXml() throws Exception { assertEquals("<abc>", StringEscapeUtils.escapeXml("<abc>")); assertEquals("<abc>", StringEscapeUtils.unescapeXml("<abc>")); assertEquals("XML should not escape >0x7f values", "\u00A1", StringEscapeUtils.escapeXml("\u00A1")); assertEquals("XML should be able to unescape >0x7f values", "\u00A0", StringEscapeUtils.unescapeXml(" ")); assertEquals("XML should be able to unescape >0x7f values with one leading 0", "\u00A0", StringEscapeUtils.unescapeXml(" ")); assertEquals("XML should be able to unescape >0x7f values with two leading 0s", "\u00A0", StringEscapeUtils.unescapeXml(" ")); assertEquals("XML should be able to unescape >0x7f values with three leading 0s", "\u00A0", StringEscapeUtils.unescapeXml(" ")); assertEquals("ain't", StringEscapeUtils.unescapeXml("ain't")); assertEquals("ain't", StringEscapeUtils.escapeXml("ain't")); assertEquals("", StringEscapeUtils.escapeXml("")); assertEquals(null, StringEscapeUtils.escapeXml(null)); assertEquals(null, StringEscapeUtils.unescapeXml(null)); StringWriter sw = new StringWriter(); try { StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw); } catch (final IOException e) { } assertEquals("XML was escaped incorrectly", "<abc>", sw.toString() ); sw = new StringWriter(); try { StringEscapeUtils.UNESCAPE_XML.translate("<abc>", sw); } catch (final IOException e) { } assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() ); } /** * Tests Supplementary characters. * <p> * From http://www.w3.org/International/questions/qa-escapes * </p> * <blockquote> * Supplementary characters are those Unicode characters that have code points higher than the characters in * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect * - you must use the single, code point value for that character. For example, use 𣎴 rather than ��. * </blockquote> * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a> * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a> */ @Test public void testEscapeXmlSupplementaryCharacters() { final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) ); assertEquals("Supplementary character must be represented using a single escape", "𣎴", escapeXml.translate("\uD84C\uDFB4")); } @Test public void testEscapeXmlAllCharacters() { // http://www.w3.org/TR/xml/#charsets says: // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, // excluding the surrogate blocks, FFFE, and FFFF. */ final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19), NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000)); assertEquals("�", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008")); assertEquals("\t", escapeXml.translate("\t")); // 0x9 assertEquals("\n", escapeXml.translate("\n")); // 0xA assertEquals(" ", escapeXml.translate("\u000B\u000C")); assertEquals("\r", escapeXml.translate("\r")); // 0xD assertEquals("Hello World! Ain't this great?", escapeXml.translate("Hello World! Ain't this great?")); assertEquals("", escapeXml.translate("\u000E\u000F\u0018\u0019")); } /** * Reverse of the above. * * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a> */ @Test public void testUnescapeXmlSupplementaryCharacters() { assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4", StringEscapeUtils.unescapeXml("𣎴") ); } // Tests issue #38569 // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569 @Test public void testStandaloneAmphersand() { assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("<P&O>")); assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); assertEquals("<P&O>", StringEscapeUtils.unescapeXml("<P&O>")); assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); } @Test public void testLang313() { assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); } @Test public void testEscapeCsvString() throws Exception { assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar")); assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar")); assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar")); assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar")); assertEquals("", StringEscapeUtils.escapeCsv("")); assertEquals(null, StringEscapeUtils.escapeCsv(null)); } @Test public void testEscapeCsvWriter() throws Exception { checkCsvEscapeWriter("foo.bar", "foo.bar"); checkCsvEscapeWriter("\"foo,bar\"", "foo,bar"); checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar"); checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar"); checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar"); checkCsvEscapeWriter("", null); checkCsvEscapeWriter("", ""); } private void checkCsvEscapeWriter(final String expected, final String value) { try { final StringWriter writer = new StringWriter(); StringEscapeUtils.ESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (final IOException e) { fail("Threw: " + e); } } @Test public void testUnescapeCsvString() throws Exception { assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); assertEquals("", StringEscapeUtils.unescapeCsv("")); assertEquals(null, StringEscapeUtils.unescapeCsv(null)); assertEquals("\"foo.bar\"", StringEscapeUtils.unescapeCsv("\"foo.bar\"")); } @Test public void testUnescapeCsvWriter() throws Exception { checkCsvUnescapeWriter("foo.bar", "foo.bar"); checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); checkCsvUnescapeWriter("", null); checkCsvUnescapeWriter("", ""); checkCsvUnescapeWriter("\"foo.bar\"", "\"foo.bar\""); } private void checkCsvUnescapeWriter(final String expected, final String value) { try { final StringWriter writer = new StringWriter(); StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (final IOException e) { fail("Threw: " + e); } } /** * Tests // https://issues.apache.org/jira/browse/LANG-480 * * @throws java.io.UnsupportedEncodingException */ @Test public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingException { // this is the utf8 representation of the character: // COUNTING ROD UNIT DIGIT THREE // in Unicode // codepoint: U+1D362 final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 }; final String original = new String(data, "UTF8"); final String escaped = StringEscapeUtils.escapeHtml4( original ); assertEquals( "High Unicode should not have been escaped", original, escaped); final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "High Unicode should have been unchanged", original, unescaped); // TODO: I think this should hold, needs further investigation // String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); // assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity); } /** * Tests https://issues.apache.org/jira/browse/LANG-339 */ @Test public void testEscapeHiragana() { // Some random Japanese Unicode characters final String original = "\u304B\u304C\u3068"; final String escaped = StringEscapeUtils.escapeHtml4(original); assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4", original, escaped); final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped); } /** * Tests https://issues.apache.org/jira/browse/LANG-708 * * @throws IOException * if an I/O error occurs */ @Test public void testLang708() throws IOException { final String input = IOUtils.toString(new FileInputStream("src/test/resources/lang-708-input.txt"), "UTF-8"); final String escaped = StringEscapeUtils.escapeEcmaScript(input); // just the end: assertTrue(escaped, escaped.endsWith("}]")); // a little more: assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]")); } /** * Tests https://issues.apache.org/jira/browse/LANG-720 */ @Test public void testLang720() { final String input = new StringBuilder("\ud842\udfb7").append("A").toString(); final String escaped = StringEscapeUtils.escapeXml(input); assertEquals(input, escaped); } /** * Tests https://issues.apache.org/jira/browse/LANG-911 */ @Test public void testLang911() { String bellsTest = "\ud83d\udc80\ud83d\udd14"; String value = StringEscapeUtils.escapeJava(bellsTest); String valueTest = StringEscapeUtils.unescapeJava(value); assertEquals(bellsTest, valueTest); } @Test public void testEscapeJson() { assertEquals(null, StringEscapeUtils.escapeJson(null)); try { StringEscapeUtils.ESCAPE_JSON.translate(null, null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } try { StringEscapeUtils.ESCAPE_JSON.translate("", null); fail(); } catch (final IOException ex) { fail(); } catch (final IllegalArgumentException ex) { } assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\"")); String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; assertEquals(expected, StringEscapeUtils.escapeJson(input)); } }