/* * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * @test * @bug 4396708 * @summary Test URL encoder and decoder on a string that contains * surrogate pairs. * */ import java.io.*; import java.net.*; /* * Surrogate pairs are two character Unicode sequences where the first * character lies in the range [d800, dbff] and the second character lies * in the range [dc00, dfff]. They are used as an escaping mechanism to add * 1M more characters to Unicode. */ public class SurrogatePairs { static String[] testStrings = {"\uD800\uDC00", "\uD800\uDFFF", "\uDBFF\uDC00", "\uDBFF\uDFFF", "1\uDBFF\uDC00", "@\uDBFF\uDC00", "\uDBFF\uDC001", "\uDBFF\uDC00@", "\u0101\uDBFF\uDC00", "\uDBFF\uDC00\u0101" }; static String[] correctEncodings = {"%F0%90%80%80", "%F0%90%8F%BF", "%F4%8F%B0%80", "%F4%8F%BF%BF", "1%F4%8F%B0%80", "%40%F4%8F%B0%80", "%F4%8F%B0%801", "%F4%8F%B0%80%40", "%C4%81%F4%8F%B0%80", "%F4%8F%B0%80%C4%81" }; public static void main(String[] args) throws Exception { for (int i=0; i < testStrings.length; i++) { test(testStrings[i], correctEncodings[i]); } } private static void test(String str, String correctEncoding) throws Exception { System.out.println("Unicode bytes of test string are: " + getHexBytes(str)); String encoded = URLEncoder.encode(str, "UTF-8"); System.out.println("URLEncoding is: " + encoded); if (encoded.equals(correctEncoding)) System.out.println("The encoding is correct!"); else { throw new Exception("The encoding is incorrect!" + " It should be " + correctEncoding); } String decoded = URLDecoder.decode(encoded, "UTF-8"); System.out.println("Unicode bytes for URLDecoding are: " + getHexBytes(decoded)); if (str.equals(decoded)) System.out.println("The decoding is correct"); else { throw new Exception("The decoded is not equal to the original"); } System.out.println("---"); } private static String getHexBytes(String s) throws Exception { StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { int a = s.charAt(i); int b1 = (a >>8) & 0xff; int b2 = (byte)a; int b11 = (b1>>4) & 0x0f; int b12 = b1 & 0x0f; int b21 = (b2 >>4) & 0x0f; int b22 = b2 & 0x0f; sb.append(Integer.toHexString(b11)); sb.append(Integer.toHexString(b12)); sb.append(Integer.toHexString(b21)); sb.append(Integer.toHexString(b22)); sb.append(' '); } return sb.toString(); } }