/** * Copyright 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.waveprotocol.wave.model.waveref; import org.waveprotocol.wave.model.id.WaveId; import org.waveprotocol.wave.model.id.WaveletId; /** * Implementation of an escaping scheme for encoding and decoding waverefs into * URI path segments and URI query strings * * @author meade@google.com <Edwina Mead> */ public class WaverefEncoder { /** Represents the different escapers available for use. */ private enum EscaperType { PATH, QUERY; } /** * We need this interface because there is no common library that works in * both GWT and on server side java code that can percent encoding. * * GWT (com.google.gwt.http.client.URL) and std java * (c.g.common.base.PercentEscaper) */ public interface PercentEncoderDecoder { /** * Returns a string where all characters are encoded by converting it into * its UTF-8 encoding and then encoding each of the resulting bytes as a * %xx hexadecimal escape sequence. * Safe characters should be -_.!~*'()@:$&,;=+ (ALPHA) (DIGIT) * * @param decodedValue value to be encoded. The behaviour is unspecified if * this is not valid UTF-16. * @return The encoded value */ public String pathEncode(String decodedValue); /** * Returns a string where all characters are encoded by converting it into * its UTF-8 encoding and then encoding each of the resulting bytes as a * %xx hexadecimal escape sequence. * Safe characters should be -_.!~*'()@:$ ,; /?:+ (ALPHA) (DIGIT) * * @param decodedValue value to be encoded. The behaviour is unspecified if * this is not valid UTF-16. * @return The encoded value */ public String queryEncode(String decodedValue); /** * Returns a string where all percent encoded sequences have been converted back * to their original character representations. The charset is UTF-8. * Note, '+' should not be decoded as a space like in URL. * * @param encodedValue to be decoded. * @return The decoded value */ public String decode(String encodedValue); } private final PercentEncoderDecoder encoderDecoder; /** * Creates a waveref encoder. * * @param decoder delegate for %-encoding */ public WaverefEncoder(PercentEncoderDecoder decoder) { this.encoderDecoder = decoder; } /** * Converts a waveref object to a URI path segment for use in a permalink to a * wave, for example: "example.com/w+abcd/~/conv+root/b+1234 * * @param ref the waveref to get converted to a uri path * @return the escaped path string */ public String encodeToUriPathSegment(WaveRef ref) { return encode(ref, EscaperType.PATH); } /** * Converts a WaveRef object to a URI query string to be used in an url * * @param ref the waveref to get converted to a URI query string * @return the escaped URI query string */ public String encodeToUriQueryString(WaveRef ref) { return encode(ref, EscaperType.QUERY); } /** * Percent escapes the given string to be usable in an URI path segment. eg. * the last part of http://wave.google.com/waveref/google.com/w+1234 * Safe non-alphanumeric characters are "-_.!~*'()@:$&,;=+" * * @param str The string to be escaped * @return the percent escaped string */ public String encodeToUriPathSegment(String str) { return encode(str, EscaperType.PATH); } /** * Percent escapes the given string to to be usable in an URI query string. * eg. the last part of * http://wave.google.com/?google.com/w+1234 * Safe non-alphanumeric characters are "-_.!~*'()@:$ ,; /?:+" * * @param str The string to be escaped * @return the percent escaped string */ public String encodeToUriQueryString(String str) { return encode(str, EscaperType.QUERY); } /** * Converts a waveref object to a URI path segment for use in a permalink to a * wave, or to a URI query string. * * @param ref the waveref to get converted to a uri path or uri query string * @param escaperType the type of escaping to be used, PATH or QUERY * @return the percent escaped string */ private String encode(WaveRef ref, EscaperType escaperType) { StringBuilder result = new StringBuilder(encode(ref.getWaveId().getDomain(), escaperType)); result.append("/").append(encode(ref.getWaveId().getId(), escaperType)); if (ref.hasWaveletId()) { if(ref.getWaveletId().getDomain().equals(ref.getWaveId().getDomain())) { result.append("/~"); } else { result.append("/").append(encode(ref.getWaveletId().getDomain(), escaperType)); } result.append("/").append(encode(ref.getWaveletId().getId(), escaperType)); } if (ref.hasDocumentId()) { result.append("/").append(encode(ref.getDocumentId(), escaperType)); } return result.toString(); } /** * Percent escapes the given wave domain or wave ID to a string usable in an * URI path segment. eg. the last part of * http://wave.google.com/waveref/google.com/w+1234 Safe non-alphanumeric for * path segments are "-_.!~*'()@:$&,;=+" and for query strings, they are * "-_.!~*'()@:$ ,; /?:+" * * @param str The string to be escaped * @param escaperType the type of escaping to be used, PATH or QUERY * @return the percent escaped string */ private String encode(String str, EscaperType escaperType) { if (escaperType == EscaperType.PATH) { return encoderDecoder.pathEncode(str); } else { return encoderDecoder.queryEncode(str); } } /** * Takes a path segment which includes the domain and wave ID,and optionally * the wavelet domain + ID and the blip ID, and converts it to a waveref * object. Further tokens after 5 are silently ignored. Examples of valid URI * path segments are: example.com/w+abcd example.com/w+abcd/~/conv+root * example.com/w+abcd/~/conv+root/b+45kg * * @param path The path containing the domain and wave ID. * @return The corresponding WaveRef object, or null if the path was invalid. * @throws InvalidWaveRefException If the path contains less than 2 tokens or 3 tokens. */ public WaveRef decodeWaveRefFromPath(String path) throws InvalidWaveRefException { String[] tokens = path.split("/"); if (tokens.length < 2 || tokens.length == 3) { throw new InvalidWaveRefException(path, "Invalid number of tokens in path given to decodeWaveRefFromPath"); } if (tokens[0].length() == 0 || tokens[1].length() == 0) { throw new InvalidWaveRefException(path, "The wave domain and the" + "wave Id must not be empty."); } String waveDomain = decode(tokens[0].toLowerCase()); String waveIdStr = decode(tokens[1]); WaveId waveId = null; try { waveId = WaveId.of(waveDomain, waveIdStr); } catch (IllegalArgumentException e) { throw new InvalidWaveRefException(path, "Invalid WaveID:" + e.getMessage()); } if (tokens.length == 2) { return WaveRef.of(waveId); } if (tokens[2].length() == 0 || tokens[3].length() == 0) { throw new InvalidWaveRefException(path, "The wavelet domain and the" + "wavelet Id must not be empty."); } String waveletDomain = decode(tokens[2]); if (waveletDomain.equals("~")) { waveletDomain = waveDomain; } String waveletIdStr = decode(tokens[3]); WaveletId waveletId = null; try { waveletId = WaveletId.of(waveletDomain, waveletIdStr); } catch (IllegalArgumentException e) { throw new InvalidWaveRefException(path, "Invalid WaveletID:" + e.getMessage()); } if (tokens.length == 4) { return WaveRef.of(waveId, waveletId); } else { String documentId = tokens[4]; // When there is a specification for document IDs, validate here. return WaveRef.of(waveId, waveletId, documentId); } } /** * Percent-decodes a US-ASCII string into a Unicode string. * * @param string a percent-encoded US-ASCII string * @return the decoded string */ public String decode(String string) { return encoderDecoder.decode(string); } }