// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.gdata.util.httputil; import com.google.common.annotations.VisibleForTesting; import com.google.gdata.util.common.base.PercentEscaper; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.BitSet; import java.util.logging.Level; import java.util.logging.Logger; /** * This class has been <b>deprecated</b>; use {@link * com.google.gdata.util.common.base.CharEscapers#uriEscaper()}, * {@link com.google.gdata.util.common.base.CharEscapers#cppUriEscaper()} or create your * own custom {@link com.google.gdata.util.common.base.PercentEscaper}. * * <p>Almost every use of FastURLEncoder can now be replaced with an instance of * the PercentEscaper class, which is much faster. * * <p>In most cases it should be possible to use the static instances available * from {@link com.google.gdata.util.common.base.CharEscapers} but it is also possible to * create your own escaper with custom behaviour. * * <p>See <a href="https://docs.google.com/a/google.com/View?docID=ahmsnsb8b5_85dwj83whg"> * Deprecating FastURLEncoder</a> for more information. * * <p>Note that the new uriEscaper only escapes using UTF-8 encoding and while * no examples of other encodings were found when preparing this class for * deprecation, it's possible that some instance were missed. If you have a * valid reason to escape URIs via an encoding other than UTF-8 please let * the java-libraries-team know. * * <p>FastURLEncoder is intended as a replacement for the slow and inefficient * java.net.URLEncoder. There are a few differences though: * <ul> * <li> URLEncoder.encode(String) uses the platform's default encoding * while FastURLEncoder.encode(String) always uses UTF-8. The default * encoding is unpredictable and so it shouldn't be used anyway. * <li> FastURLEncoder allocates much less memory. In my tests I escaped * 81735 bytes of data 20 bytes at a time. URLEncoder allocated over * 200 MB! FastURLEncoder allocated much less (probably about 500 kB). * <li> FastURLEncoder is over 30 times as fast. * <li> FastURLEncoder (optionally) lets you specify which octets should and * shouldn't be escaped and also whether spaces should be escaped as "+" or * "%20". * </ul> * * <p>It is possible that URLEncoder is doing really complicated stuff for * a reason and that I just don't understand why. If you are unsure of * FastURLEncoder just call FastURLEncoder.setVerifyAgainstJava(true). This * will run both versions and verify that the outputs are the same. * Of course this will be slow but it is useful for testing. I wouldn't * be surprised if the two differ for non-latin1, non-utf-8 encodings. * * <p>FastURLEncoder requires jdk 1.5. * * @see java.net.URLEncoder * */ public class FastURLEncoder { private static boolean verifyAgainstJava = false; private FastURLEncoder() { } /** * Set this to 'true' if you are not certain that FastURLEncoder is * going to do the right thing for you and want to test for a while. * Set to 'false' if you want the speed and memory benefits of * FastURLEncoder. If this is set to 'true' and FastURLEncoder disagrees * with URLEncoder then FastURLEncoder will log a * java.util.logging.Level.SEVERE message and return the value provided * by URLEncoder. */ @VisibleForTesting static void setVerifyAgainstJava(boolean shouldVerify) { verifyAgainstJava = shouldVerify; } /** * @return 'true' if we are going to verify all results against URLEncoder. */ @VisibleForTesting static boolean getVerifyAgainstJava() { return verifyAgainstJava; } /** * URL-escapes s by encoding it with the specified character encoding, and * then escaping all octets not included in safeOctets. * * @param s String to encode. * @param encoding character encoding to use (e.g., "UTF-8") * @param safeOctets set of octets that should not be escaped. * @param plusForSpace whether octet 0x20, i.e. "space", should be encoded as * a plus sign rather than "%20". Note that this parameter is effectively * ignored if 0x20 is in safeOctets. * * @return the encoded version of {@code s}. Will return {@code s} * itself if no encoding is necessary. * * @throws UnsupportedEncodingException if {@code encoding} is not supported. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()} * or create an instance of {@link com.google.gdata.util.common.base.PercentEscaper}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static String encode(final String s, final String encoding, BitSet safeOctets, boolean plusForSpace) throws UnsupportedEncodingException { StringBuilder out = new StringBuilder(s.length() * 2); boolean needsEncoding; try { needsEncoding = encode(s, encoding, safeOctets, plusForSpace, out); } catch (UnsupportedEncodingException e) { throw e; } catch (IOException e) { throw new AssertionError(e); } if (needsEncoding) { return out.toString(); } else { return s; } } /** * URL-escapes s by encoding it with the specified character encoding, * escaping all octets not included in safeOctets, and then outputting * the result to an Appendable. * * @param s String to encode. * @param encoding character encoding to use (e.g., "UTF-8") * @param safeOctets set of octets that should not be escaped. * @param plusForSpace whether octet 0x20, i.e. "space", should be encoded as * a plus sign rather than "%20". Note that this parameter is effectively * ignored if 0x20 is in safeOctets. * @param out the Appendable destination for the encoded string. * * @return true if {@code s} did need escaping, false otherwise. In * other words, this returns false only if {@code s} was output to * {@code out} verbatim. * * @throws UnsupportedEncodingException if {@code encoding} is not supported. * @throws IOException if {@code out} does so when appended to. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()} * or create an instance of {@link com.google.gdata.util.common.base.PercentEscaper}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static boolean encode(final String s, final String encoding, BitSet safeOctets, boolean plusForSpace, Appendable out) throws UnsupportedEncodingException, IOException { byte[] data = s.getBytes(encoding); boolean containsSpace = false; int outputLength = 0; for (int i = 0; i < data.length; i++) { int c = data[i]; if (c < 0) c += 256; // convert from [-128, 127] to [0, 255] if (safeOctets.get(c)) { out.append((char)c); outputLength += 1; } else if (plusForSpace && (c == ' ')) { containsSpace = true; out.append('+'); outputLength += 1; } else { out.append('%'); out.append(HEX_DIGITS[c >> 4]); out.append(HEX_DIGITS[c & 0xf]); outputLength += 3; } } return containsSpace || (outputLength != s.length()); } /** * This should be a direct replacement for java.net.URLEncoder.encode(). * @see java.net.URLEncoder#encode(String, String) * @param s String to encode. * @param encoding character encoding to use (e.g., "UTF-8") * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static String encode(String s, String encoding) throws UnsupportedEncodingException { String result = encode(s, encoding, DEFAULT_SAFE_OCTETS, true); if (verifyAgainstJava) { String jresult = URLEncoder.encode(s, encoding); if (!jresult.equals(result)) { Logger.getLogger(FastURLEncoder.class.getName()). log(Level.SEVERE, "FastURLEncoder does not match java. Java: '" + jresult + "' FastURLEncoder: '" + result + "'"); return jresult; } } return result; } /** * This should be a direct replacement for java.net.URLEncoder.encode(), * but appends its output to an Appendable. * * @see java.net.URLEncoder#encode(String, String) * @param s String to encode. * @param encoding character encoding to use (e.g., "UTF-8") * @param out the Appendable destination for the encoded string. * * @throws UnsupportedEncodingException if {@code encoding} is not supported. * @throws IOException if {@code out} does so when appended to. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static void encode(String s, String encoding, Appendable out) throws UnsupportedEncodingException, IOException { /* * Note that this method never compares its result with Java's * encoding; it does not not respect the verifyAgainstJava value */ encode(s, encoding, DEFAULT_SAFE_OCTETS, true, out); } /** * Shortcut for encode(s, "UTF-8"). * This is very similiar to java.net.URLEncoder.encode() except that it * uses UTF-8 instead of the platform's default encoding. * @see java.net.URLEncoder#encode(String) * @param s String to encode. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. */ @Deprecated public static String encode(String s) { try { return encode(s, "UTF-8"); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } } /** * Shortcut for encode(s, "UTF-8", out). * This is very similiar to java.net.URLEncoder.encode() except that it * uses UTF-8 instead of the platform's default encoding. * @see java.net.URLEncoder#encode(String) * @param s String to encode. * @param out the Appendable destination for the encoded string. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static void encode(String s, Appendable out) throws IOException { try { /* * Note that this method never compares its result with Java's * encoding; it does not not respect the verifyAgainstJava value */ encode(s, "UTF-8", out); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } } /** * Shortcut for encode(s, "UTF-8"). * This is very similiar to java.net.URLEncoder.encode() except that it * uses UTF-8 instead of the platform's default encoding. * @see java.net.URLEncoder#encode(String) * @param s String to encode. * @param safeOctets set of octets that should not be escaped. * @param plusForSpace whether octet 0x20, i.e., "space", should be encoded as * a plus sign rather than "%20". Note that this parameter is effectively * ignored if 0x20 is in safeOctets. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * or create an instance of {@link com.google.gdata.util.common.base.PercentEscaper}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static String encode(String s, BitSet safeOctets, boolean plusForSpace) { try { return encode(s, "UTF-8", safeOctets, plusForSpace); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } } /** java.net.URLEncoder uses upper-case hex digits so we should too. */ private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; /** * These octets all go directly into the URL, all others are escaped. */ private static final BitSet DEFAULT_SAFE_OCTETS = new BitSet(256); static { // These characters are specified as unreservered in RFC 2396: // "-", "_", ".", "!", "~", "*", "'", "(", ")", // "0".."9", "A".."Z", "a".."z" // But wait... Java also escapes !, ~, ', (, and ) // I'm only going to include -, _, ., and * to be consistent with java for (int i = '0'; i <= '9'; i++) DEFAULT_SAFE_OCTETS.set(i); for (int i = 'A'; i <= 'Z'; i++) DEFAULT_SAFE_OCTETS.set(i); for (int i = 'a'; i <= 'z'; i++) DEFAULT_SAFE_OCTETS.set(i); DEFAULT_SAFE_OCTETS.set('-'); DEFAULT_SAFE_OCTETS.set('_'); DEFAULT_SAFE_OCTETS.set('.'); DEFAULT_SAFE_OCTETS.set('*'); } /** * These octets mimic the ones escaped by the C++ webutil/url URL class -- * the kGoogle1Escape set. * To produce the same escaping as C++, use this BitSet with the plusForSpace * option. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#cppUriEscaper()} */ @Deprecated public static final BitSet CPLUSPLUS_COMPAT_SAFE_OCTETS = new BitSet(256); static { CPLUSPLUS_COMPAT_SAFE_OCTETS.set('!'); CPLUSPLUS_COMPAT_SAFE_OCTETS.set(')'); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('('); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('*'); CPLUSPLUS_COMPAT_SAFE_OCTETS.set(','); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('-'); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('.'); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('/'); for (int i = '0'; i <= '9'; i++) CPLUSPLUS_COMPAT_SAFE_OCTETS.set(i); CPLUSPLUS_COMPAT_SAFE_OCTETS.set(':'); for (int i = 'A'; i <= 'Z'; i++) CPLUSPLUS_COMPAT_SAFE_OCTETS.set(i); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('_'); for (int i = 'a'; i <= 'z'; i++) CPLUSPLUS_COMPAT_SAFE_OCTETS.set(i); CPLUSPLUS_COMPAT_SAFE_OCTETS.set('~'); } /** * Instead of retrieving this set to add your own safe characters, simply * provide your additional safe characters to the * {@link PercentEscaper#PercentEscaper(String, boolean)} constructor. * If you don't need to add your own safe characters, just use * {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * * @return a BitSet suitable for passing to * {@link #encode(String,String,BitSet,boolean)} or * {@link #encode(String,BitSet,boolean)}. It defaults to containing the * octets that would not be escaped by * {@link java.net.URLEncoder#encode(String)}. Callers can edit the * result for specialized purposes. * * @deprecated Use {@link com.google.gdata.util.common.base.CharEscapers#uriEscaper()}. * or create an instance of {@link com.google.gdata.util.common.base.PercentEscaper}. * See {@link FastURLEncoder} for more details. */ @Deprecated public static BitSet createSafeOctetBitSet() { return (BitSet) DEFAULT_SAFE_OCTETS.clone(); } }