/* Copyright (c) 2008 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.gdata.util.common.base;
import static com.google.gdata.util.common.base.Preconditions.checkNotNull;
import java.io.IOException;
/**
* An object that converts literal text into a format safe for inclusion in a
* particular context (such as an XML document). Typically (but not always), the
* inverse process of "unescaping" the text is performed automatically by the
* relevant parser.
*
* <p>For example, an XML escaper would convert the literal string {@code
* "Foo<Bar>"} into {@code "Foo<Bar>"} to prevent {@code "<Bar>"} from
* being confused with an XML tag. When the resulting XML document is parsed,
* the parser API will return this text as the original literal string {@code
* "Foo<Bar>"}.
*
* <p>A {@code CharEscaper} instance is required to be stateless, and safe when
* used concurrently by multiple threads.
*
* <p>Several popular escapers are defined as constants in the class {@link
* CharEscapers}. To create your own escapers, use {@link
* CharEscaperBuilder}, or extend this class and implement the {@link
* #escape(char)} method.
*
*
*/
public abstract class CharEscaper implements Escaper {
/**
* Returns the escaped form of a given literal string.
*
* @param string the literal string to be escaped
* @return the escaped form of {@code string}
* @throws NullPointerException if {@code string} is null
*/
public String escape(String string) {
// Inlineable fast-path loop which hands off to escapeSlow() only if needed
int length = string.length();
for (int index = 0; index < length; index++) {
if (escape(string.charAt(index)) != null) {
return escapeSlow(string, index);
}
}
return string;
}
/**
* Returns an {@code Appendable} instance which automatically escapes all
* text appended to it before passing the resulting text to an underlying
* {@code Appendable}.
*
* <p>The methods of the returned object will propagate any exceptions thrown
* by the underlying {@code Appendable}, and will throw {@link
* NullPointerException} if asked to append {@code null}, but do not otherwise
* throw any exceptions.
*
* <p>The escaping behavior is identical to that of {@link #escape(String)},
* so the following code is always equivalent to {@code
* escaper.escape(string)}: <pre> {@code
*
* StringBuilder sb = new StringBuilder();
* escaper.escape(sb).append(string);
* return sb.toString();}</pre>
*
* @param out the underlying {@code Appendable} to append escaped output to
* @return an {@code Appendable} which passes text to {@code out} after
* escaping it
* @throws NullPointerException if {@code out} is null.
*/
public Appendable escape(final Appendable out) {
checkNotNull(out);
return new Appendable() {
public Appendable append(CharSequence csq) throws IOException {
return append(csq, 0, csq.length());
}
public Appendable append(CharSequence csq, int start, int end)
throws IOException {
// "no escape" path (pushing the "escape" path into a separate method)
// to make this more "inlineable".
int unescapedChunkStart = start;
for (int i = start; i < end; i++) {
char[] escaped = escape(csq.charAt(i));
if (escaped != null) {
if (unescapedChunkStart < i) {
out.append(csq, unescapedChunkStart, i);
}
outputChars(escaped);
unescapedChunkStart = i + 1;
}
}
if (unescapedChunkStart < end) {
out.append(csq, unescapedChunkStart, end);
}
return this;
}
public Appendable append(char c) throws IOException {
char[] escaped = escape(c);
if (escaped == null) {
out.append(c);
} else {
outputChars(escaped);
}
return this;
}
private void outputChars(char[] chars) throws IOException {
for (char c : chars) {
out.append(c);
}
}
};
}
/**
* Returns the escaped form of a given literal string, starting at the given
* index. This method is called by the {@link #escape(String)} method when it
* discovers that escaping is required. It is protected to allow subclasses
* to override the fastpath escaping function to inline their escaping test.
* See {@link CharEscaperBuilder} for an example usage.
*
* @param s the literal string to be escaped
* @param index the index to start escaping from
* @return the escaped form of {@code string}
* @throws NullPointerException if {@code string} is null
*/
protected String escapeSlow(String s, int index) {
int slen = s.length();
// Get a destination buffer and setup some loop variables.
char[] dest = DEST_TL.get();
int destSize = dest.length;
int destIndex = 0;
int lastEscape = 0;
// Loop through the rest of the string, replacing when needed into the
// destination buffer, which gets grown as needed as well.
for (; index < slen; index++) {
// Get a replacement for the current character.
char[] r = escape(s.charAt(index));
// If no replacement is needed, just continue.
if (r == null) continue;
int rlen = r.length;
int charsSkipped = index - lastEscape; // Characters we skipped over.
// This is the size needed to add the replacement, not the full
// size needed by the string. We only regrow when we absolutely must.
int sizeNeeded = destIndex + charsSkipped + rlen;
if (destSize < sizeNeeded) {
destSize = sizeNeeded + (slen - index) + DEST_PAD;
dest = growBuffer(dest, destIndex, destSize);
}
// If we have skipped any characters, we need to copy them now.
if (charsSkipped > 0) {
s.getChars(lastEscape, index, dest, destIndex);
destIndex += charsSkipped;
}
// Copy the replacement string into the dest buffer as needed.
if (rlen > 0) {
System.arraycopy(r, 0, dest, destIndex, rlen);
destIndex += rlen;
}
lastEscape = index + 1;
}
// Copy leftover characters if there are any.
int charsLeft = slen - lastEscape;
if (charsLeft > 0) {
int sizeNeeded = destIndex + charsLeft;
if (destSize < sizeNeeded) {
// Regrow and copy, expensive! No padding as this is the final copy.
dest = growBuffer(dest, destIndex, sizeNeeded);
}
s.getChars(lastEscape, slen, dest, destIndex);
destIndex = sizeNeeded;
}
return new String(dest, 0, destIndex);
}
/**
* Returns the escaped form of the given character, or {@code null} if this
* character does not need to be escaped. If an empty array is returned, this
* effectively strips the input character from the resulting text.
*
* <p>If the character does not need to be escaped, this method should return
* {@code null}, rather than a one-character array containing the character
* itself. This enables the escaping algorithm to perform more efficiently.
*
* <p>An escaper is expected to be able to deal with any {@code char} value,
* so this method should not throw any exceptions.
*
* @param c the character to escape if necessary
* @return the replacement characters, or {@code null} if no escaping was
* needed
*/
protected abstract char[] escape(char c);
/**
* Helper method to grow the character buffer as needed, this only happens
* once in a while so it's ok if it's in a method call. If the index passed
* in is 0 then no copying will be done.
*/
private static final char[] growBuffer(char[] dest, int index, int size) {
char[] copy = new char[size];
if (index > 0) {
System.arraycopy(dest, 0, copy, 0, index);
}
return copy;
}
/**
* The amount of padding to use when growing the escape buffer.
*/
private static final int DEST_PAD = 32;
/**
* A thread-local destination buffer to keep us from creating new buffers.
* The starting size is 1024 characters. If we grow past this we don't
* put it back in the threadlocal, we just keep going and grow as needed.
*/
private static final ThreadLocal<char[]> DEST_TL = new ThreadLocal<char[]>() {
@Override
protected char[] initialValue() {
return new char[1024];
}
};
}