/**
* Copyright 2008 - CommonCrawl Foundation
*
* CommonCrawl licenses this file to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.commoncrawl.util;
import java.util.LinkedList;
import java.util.List;
/**
* Utility methods for escaping characters in strings.
*
* @author Albert Chern
*/
public class EscapeUtils {
/**
* A backslash, the character used to begin an escape sequence.
*/
public static final char ESCAPE = '\\';
/**
* Given a string and a set of characters to escape, returns an escaped
* version of the string.
*
* @see #ESCAPE
* @see #unescape
*
* @param s
* the string to escape characters in
* @param charsToEscape
* the set of characters that need to be escaped
*
* @return an escaped version of the string
*/
public static String escape(String s, char[] charsToEscape) {
StringBuilder buf = new StringBuilder(s.length());
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == ESCAPE || hasChar(charsToEscape, c)) {
buf.append(ESCAPE);
}
buf.append(c);
}
return buf.toString();
}
/**
* Given an escaped string returned by {@link #escape} and the original set of
* characters to escape, returns the original string.
*
* @see #ESCAPE
* @see #escape
*
* @param s
* the escaped string
* @param charsToEscape
* the set of characters that need to be escaped
*
* @return the original unescaped string
*/
public static String unescape(String s, char[] charsToEscape) {
StringBuilder buf = new StringBuilder(s.length());
boolean inEscapeSequence = false;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (inEscapeSequence) {
if (c != ESCAPE && !hasChar(charsToEscape, c)) {
throw new IllegalArgumentException(c
+ " is not a valid escape sequence character");
}
buf.append(c);
inEscapeSequence = false;
} else if (hasChar(charsToEscape, c)) {
throw new IllegalArgumentException(c + " must be escaped");
} else if (c == ESCAPE) {
inEscapeSequence = true;
} else {
buf.append(c);
}
}
if (inEscapeSequence) {
throw new IllegalArgumentException("Unterminated escape sequence");
}
return buf.toString();
}
/**
* Checks if a <tt>char[]</tt> contains a particular character.
*
* @param chars
* the array to search in
* @param target
* the <tt>char</tt> to search for
*
* @return <tt>true</tt> if <tt>chars</tt> contains <tt>target</tt>, or
* <tt>false</tt> otherwise
*/
private static boolean hasChar(char[] chars, char target) {
// The escape set will most likely be small, so just loop through it
for (char c : chars) {
if (c == target) {
return true;
}
}
return false;
}
/**
* Concatenates multiple strings into one string with each string separated by
* the separator character and any ocurrences of the separator character in
* the original strings escaped.
*
* @see #split
*
* @param separator
* the separator character
* @param strings
* the strings to concatenate
*
* @return the original strings concatenated and separated by
* <tt>separator</tt>
*/
public static String concatenate(char separator, String... strings) {
char[] charsToEscape = new char[] { separator };
StringBuilder buf = new StringBuilder();
for (String s : strings) {
if (buf.length() > 0) {
buf.append(',');
}
buf.append(escape(s, charsToEscape));
}
return buf.toString();
}
/**
* Splits a string returned by {@link #concatenate} into its original
* constituents.
*
* @see #concatenate
*
* @param separator
* the character that was originally used in concatenation
* @param s
* the concatenated string to split
*
* @return a <tt>String[]</tt> with the original constituents
*/
public static String[] split(char separator, String s) {
char[] charsToEscape = new char[] { separator };
List<String> strings = new LinkedList<String>();
StringBuilder field = new StringBuilder();
boolean inEscapeSequence = false;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (!inEscapeSequence && c == separator) {
strings.add(unescape(field.toString(), charsToEscape));
field.setLength(0);
} else {
field.append(c);
inEscapeSequence = (c == ESCAPE && !inEscapeSequence);
}
}
strings.add(unescape(field.toString(), charsToEscape));
return strings.toArray(new String[strings.size()]);
}
}