/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*/
/**
* Licensed under the GNU LESSER GENERAL PUBLIC LICENSE, Version 3, 29 June 2007;
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.gnu.org/licenses/lgpl-3.0.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.thingml.compilers.utils;
/*******************************************************************************
* Copyright (c) 2006-2009
* Software Technology Group, Dresden University of Technology
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option) any
* later version. This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU Lesser General Public License for more details. You should have
* received a copy of the GNU Lesser General Public License along with this
* program; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* Contributors:
* Software Technology Group - TU Dresden, Germany
* - initial API and implementation
******************************************************************************/
/**
* A CharacterEscaper can be used to escape and unescape special characters
* in Java strings and character literals. Among these special characters are
* tabs, single and double quotes, line breaks and backslashes.
*/
public class CharacterEscaper {
private static final char BACKSLASH = '\\';
/**
* Given the input string with escaped unicode characters convert them
* to their native unicode characters and return the result. This is quite
* similar to the functionality found in property file handling. White space
* escapes are not processed (as they are consumed by the template library).
* Any bogus escape codes will remain in place.
* <p>
* When files are provided in another encoding, they can be converted to ascii using
* the native2ascii tool (a java sdk binary). This tool will escape all the
* non Latin1 ASCII characters and convert the file into Latin1 with unicode escapes.
*
* This code is from http://www.antlr.org/wiki/display/ST/unicode_escapes but was
* modified and extended to support other escaped characters.
*
* @param source
* string with unicode escapes
* @return
* string with all unicode characters, all unicode escapes expanded.
*
* @author Caleb Lyness (modified by Mirko Seifert)
*/
public static String unescapeEscapedCharacters(String source) {
/* could use regular expression, but not this time... */
final int srcLen = source.length();
char c;
StringBuffer buffer = new StringBuffer(srcLen);
// Must have format \\uXXXX where XXXX is a hexadecimal number
int i = 0;
while (i < srcLen) {
c = source.charAt(i++);
if (c == BACKSLASH) {
char nc = source.charAt(i);
switch (nc) {
case 'u': {
// Now we found the 'u' we need to find another 4 hex digits
// Note: shifting left by 4 is the same as multiplying by 16
int v = 0; // Accumulator
for (int j = 1; j < 5; j++) {
nc = source.charAt(i + j);
switch (nc) {
case 48: // '0'
case 49: // '1'
case 50: // '2'
case 51: // '3'
case 52: // '4'
case 53: // '5'
case 54: // '6'
case 55: // '7'
case 56: // '8'
case 57: // '9'
v = ((v << 4) + nc) - 48;
break;
case 97: // 'a'
case 98: // 'b'
case 99: // 'c'
case 100: // 'd'
case 101: // 'e'
case 102: // 'f'
v = ((v << 4) + 10 + nc) - 97;
break;
case 65: // 'A'
case 66: // 'B'
case 67: // 'C'
case 68: // 'D'
case 69: // 'E'
case 70: // 'F'
v = ((v << 4) + 10 + nc) - 65;
break;
default:
// almost but no go
j = 6; // terminate the loop
v = 0; // clear the accumulator
break;
}
} // for each of the 4 digits
if (v > 0) { // We got a full conversion
c = (char) v; // Use the converted char
i += 5; // skip the numeric values
}
break;
}
// octal characters: \0 to \377
case '0':
case '1':
case '2':
case '3': {
// Now we found the '0' we need to find up to 3 octal digits
// Note: shifting left by 3 is the same as multiplying by 8
int v = 0; // Accumulator
int j;
boolean stop = false;
for (j = 0; j < 3 && !stop; j++) {
if (i + j < source.length()) {
nc = source.charAt(i + j);
switch (nc) {
case 48: // '0'
case 49: // '1'
case 50: // '2'
case 51: // '3'
case 52: // '4'
case 53: // '5'
case 54: // '6'
case 55: // '7'
v = ((v << 3) + nc) - 48;
break;
default:
// some other character
// almost but no go
stop = true;
// we have to go back one character, because we've read to far
j--;
break;
}
}
} // for each of the digits
if (v >= 0) { // We got a full conversion
c = (char) v; // Use the converted char
i += j; // skip the numeric values
}
break;
}
// escape sequences: \b \t \n \f \r \" \' \\
case BACKSLASH: {
// if the next character is a backslash we have an
// escaped backslash - not an unicode sequence
// skip the second backslash
i++;
break;
}
case 'b': {
c = '\b';
i++;
break;
}
case 't': {
c = '\t';
i++;
break;
}
case 'n': {
c = '\n';
i++;
break;
}
case 'f': {
c = '\f';
i++;
break;
}
case 'r': {
c = '\r';
i++;
break;
}
case '\"': {
c = '\"';
i++;
break;
}
case '\'': {
c = '\'';
i++;
break;
}
}
}
buffer.append(c);
}
// Fill in the remaining characters from the buffer
while (i < srcLen) {
buffer.append(source.charAt(i++));
}
return buffer.toString();
}
public static String escapeEscapedCharacters(String source) {
source = source.replaceAll("\\\\", "\\\\\\\\");
source = source.replaceAll("\\\b", "\\\\b");
source = source.replaceAll("\\\t", "\\\\t");
source = source.replaceAll("\\\n", "\\\\n");
source = source.replaceAll("\\\f", "\\\\f");
source = source.replaceAll("\\\r", "\\\\r");
source = source.replaceAll("\"", "\\\\\"");
source = source.replaceAll("\'", "\\\\\'");
return source;
}
}