/** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.waveprotocol.wave.model.id; import java.util.HashSet; import java.util.Set; /** * This class is able to percent escape path components of * WaveletId URI, WaveId URI or WaveName URI. * * This class encode/decode strings in the same way as the "segment/segment-nz" * component in "path-absolute" defined in * http://tools.ietf.org/html/rfc3986#section-3.3 * * path-absolute have the following syntax * * path-absolute = "/" [ segment-nz *( "/" segment ) ] * segment = *pchar * segment-nz = 1*pchar * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * pct-encoded = "%" HEXDIG HEXDIG * * This means the following are NOT escaped * * ":" / "@" / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" / * ALPHA / DIGIT / "-" / "." / "_" / "~" * * */ public class URIEncoderDecoder { /** * Use this exception when the encoding is incorrect during encoding/decoding. * * */ public static class EncodingException extends Exception { public EncodingException(String message) { super(message); } public EncodingException(Throwable ex) { super(ex); } public EncodingException(String message, Throwable ex) { super(message, ex); } } /** * We need this interface because there is no common library that works in both GWT * and on server side java code that can percent encoding. * * The encoding to used in percent escape is UTF-8, see http://tools.ietf.org/html/rfc3986. * Java strings are UTF-16. So it's not trivial to rewrite it again here. Instead * we have to inject the percent encoding library at run time. * * GWT (com.google.gwt.http.client.URL) and std java (java.net.URLEncoder) * * * */ public interface PercentEncoderDecoder { /** * Returns a string where all characters are encoded by converting it into * its UTF-8 encoding and then encoding each of the resulting bytes as a * %xx hexadecimal escape sequence. * * The encoding of the following chars are optional * * ":" / "@" / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" / * ALPHA / DIGIT / "-" / "." / "_" / "~" * * @param decodedValue value to be encoded. The behaviour is unspecified if * this is not valid UTF-16. * @return The encoded value * * @throws EncodingException if encoding fails. e.g. unable to a find an appropriate * UTF-8 encoder in the system. */ public String encode(String decodedValue) throws EncodingException; /** * Returns a string where all percent encoded sequences have been converted back * to their original character representations. The charset is UTF-8. * Note, '+' should not be decoded as a space like in URL. * * @param encodedValue to be decoded. * @return The decoded value * * @throws EncodingException if decoding fails. e.g. the hex values following percent in * encodedValue cannot be interpreted as valid UTF-8 or if a percent is not followed * by a hex value */ public String decode(String encodedValue) throws EncodingException; } private static final Set<Character> NOT_ESCAPED = new HashSet<Character>(); static { for (char c = 'a'; c <= 'z'; c++) { NOT_ESCAPED.add(c); } for (char c = 'A'; c <= 'Z'; c++) { NOT_ESCAPED.add(c); } for (char c = '0'; c <= '9'; c++) { NOT_ESCAPED.add(c); } String symbols = ":@!$&'()*+,;=-._~"; for (int i = 0; i < symbols.length(); i++) { NOT_ESCAPED.add(symbols.charAt(i)); } } private final PercentEncoderDecoder percentEncoder; public URIEncoderDecoder(PercentEncoderDecoder percentEncoder) { this.percentEncoder = percentEncoder; } /** * Percent escapes the given string. * @param decodedValue is the value that needs to be percent escaped to satisfy the * requirement for "segment/segment-nz" above. The behaviour is unspecified if * this is not valid UTF-16. * @return The percent escaped value. * * @throws EncodingException if encoding fails. e.g. unable to a find an appropriate * UTF-8 encoder in the system. */ public String encode(String decodedValue) throws EncodingException { StringBuilder out = new StringBuilder(decodedValue.length()); for (int i = 0; i < decodedValue.length();) { char c = decodedValue.charAt(i); if (NOT_ESCAPED.contains(c)) { out.append(c); i++; } else { int j = i; // convert to external encoding before hex conversion do { i++; } while (i < decodedValue.length() && !NOT_ESCAPED.contains((decodedValue.charAt(i)))); out.append(percentEncoder.encode(decodedValue.substring(j, i))); } } return out.toString(); } /** * @param encodedValue this the percent escaped "segment/segment-nz" value. * @return the decoded value. * * @throws EncodingException if decoding fails. e.g. the hex values following percent in * encodedValue cannot be interpreted as valid UTF-8 or if a percent is not followed * by a hex value. */ public String decode(String encodedValue) throws EncodingException { return percentEncoder.decode(encodedValue); } }