/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jackrabbit.webdav.util; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; import java.util.BitSet; /** * <code>EncodeUtil</code> provides helper methods for URL encoding and decoding * (copied from jcr-commons jackrabbit.util.Text). * * @see <a href="https://issues.apache.org/jira/browse/JCR-2897">JCR-2897</a>. */ public final class EncodeUtil { /** * logger instance */ private static final Logger log = LoggerFactory.getLogger(EncodeUtil.class); /** * hextable used for {@link #escape(String, char, boolean)} */ public static final char[] hexTable = "0123456789abcdef".toCharArray(); /** * The list of characters that are not encoded by the <code>escape()</code> * and <code>unescape()</code> METHODS. They contains the characters as * defined 'unreserved' in section 2.3 of the RFC 2396 'URI generic syntax': * <p> * <pre> * unreserved = alphanum | mark * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" * </pre> */ private static BitSet URISave; /** * Same as {@link #URISave} but also contains the '/' */ private static BitSet URISaveEx; static { URISave = new BitSet(256); int i; for (i = 'a'; i <= 'z'; i++) { URISave.set(i); } for (i = 'A'; i <= 'Z'; i++) { URISave.set(i); } for (i = '0'; i <= '9'; i++) { URISave.set(i); } URISave.set('-'); URISave.set('_'); URISave.set('.'); URISave.set('!'); URISave.set('~'); URISave.set('*'); URISave.set('\''); URISave.set('('); URISave.set(')'); URISaveEx = (BitSet) URISave.clone(); URISaveEx.set('/'); } /** * Does a URL encoding of the <code>string</code>. The characters that * don't need encoding are those defined 'unreserved' in section 2.3 of * the 'URI generic syntax' RFC 2396. * * @param string the string to encode * @return the escaped string * @throws NullPointerException if <code>string</code> is <code>null</code>. */ public static String escape(String string) { return escape(string, '%', false); } /** * Does a URL encoding of the <code>path</code>. The characters that * don't need encoding are those defined 'unreserved' in section 2.3 of * the 'URI generic syntax' RFC 2396. In contrast to the * {@link #escape(String)} method, not the entire path string is escaped, * but every individual part (i.e. the slashes are not escaped). * * @param path the path to encode * @return the escaped path * @throws NullPointerException if <code>path</code> is <code>null</code>. */ public static String escapePath(String path) { return escape(path, '%', true); } /** * Does an URL encoding of the <code>string</code> using the * <code>escape</code> character. The characters that don't need encoding * are those defined 'unreserved' in section 2.3 of the 'URI generic syntax' * RFC 2396, but without the escape character. If <code>isPath</code> is * <code>true</code>, additionally the slash '/' is ignored, too. * * @param string the string to encode. * @param escape the escape character. * @param isPath if <code>true</code>, the string is treated as path * @return the escaped string * @throws NullPointerException if <code>string</code> is <code>null</code>. */ private static String escape(String string, char escape, boolean isPath) { try { BitSet validChars = isPath ? URISaveEx : URISave; byte[] bytes = string.getBytes("utf-8"); StringBuffer out = new StringBuffer(bytes.length); for (byte aByte : bytes) { int c = aByte & 0xff; if (validChars.get(c) && c != escape) { out.append((char) c); } else { out.append(escape); out.append(hexTable[(c >> 4) & 0x0f]); out.append(hexTable[(c) & 0x0f]); } } return out.toString(); } catch (UnsupportedEncodingException e) { throw new InternalError(e.toString()); } } /** * Does a URL decoding of the <code>string</code>. Please note that in * opposite to the {@link java.net.URLDecoder} it does not transform the + * into spaces. * * @param string the string to decode * @return the decoded string * @throws NullPointerException if <code>string</code> is <code>null</code>. * @throws ArrayIndexOutOfBoundsException if not enough character follow an * escape character * @throws IllegalArgumentException if the 2 characters following the escape * character do not represent a hex-number. */ public static String unescape(String string) { return unescape(string, '%'); } /** * Does a URL decoding of the <code>string</code> using the * <code>escape</code> character. Please note that in opposite to the * {@link java.net.URLDecoder} it does not transform the + into spaces. * * @param string the string to decode * @param escape the escape character * @return the decoded string * @throws NullPointerException if <code>string</code> is <code>null</code>. * @throws IllegalArgumentException if the 2 characters following the escape * character do not represent a hex-number * or if not enough characters follow an * escape character */ private static String unescape(String string, char escape) { try { byte[] utf8 = string.getBytes("utf-8"); // Check whether escape occurs at invalid position if ((utf8.length >= 1 && utf8[utf8.length - 1] == escape) || (utf8.length >= 2 && utf8[utf8.length - 2] == escape)) { throw new IllegalArgumentException("Premature end of escape sequence at end of input"); } ByteArrayOutputStream out = new ByteArrayOutputStream(utf8.length); for (int k = 0; k < utf8.length; k++) { byte b = utf8[k]; if (b == escape) { out.write((decodeDigit(utf8[++k]) << 4) + decodeDigit(utf8[++k])); } else { out.write(b); } } return new String(out.toByteArray(), "utf-8"); } catch (UnsupportedEncodingException e) { throw new InternalError(e.toString()); } } private static byte decodeDigit(byte b) { if (b >= 0x30 && b <= 0x39) { return (byte) (b - 0x30); } else if (b >= 0x41 && b <= 0x46) { return (byte) (b - 0x37); } else if (b >= 0x61 && b <= 0x66) { return (byte) (b - 0x57); } else { throw new IllegalArgumentException("Escape sequence is not hexadecimal: " + (char)b); } } /** * Private constructor: avoid instantiation. */ private EncodeUtil() { } }