/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. */ /** * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE, Version 3, 29 June 2007; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.gnu.org/licenses/lgpl-3.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.thingml.compilers.utils; /******************************************************************************* * Copyright (c) 2006-2009 * Software Technology Group, Dresden University of Technology * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) any * later version. This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License for more details. You should have * received a copy of the GNU Lesser General Public License along with this * program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, * Suite 330, Boston, MA 02111-1307 USA * * Contributors: * Software Technology Group - TU Dresden, Germany * - initial API and implementation ******************************************************************************/ /** * A CharacterEscaper can be used to escape and unescape special characters * in Java strings and character literals. Among these special characters are * tabs, single and double quotes, line breaks and backslashes. */ public class CharacterEscaper { private static final char BACKSLASH = '\\'; /** * Given the input string with escaped unicode characters convert them * to their native unicode characters and return the result. This is quite * similar to the functionality found in property file handling. White space * escapes are not processed (as they are consumed by the template library). * Any bogus escape codes will remain in place. * <p> * When files are provided in another encoding, they can be converted to ascii using * the native2ascii tool (a java sdk binary). This tool will escape all the * non Latin1 ASCII characters and convert the file into Latin1 with unicode escapes. * * This code is from http://www.antlr.org/wiki/display/ST/unicode_escapes but was * modified and extended to support other escaped characters. * * @param source * string with unicode escapes * @return * string with all unicode characters, all unicode escapes expanded. * * @author Caleb Lyness (modified by Mirko Seifert) */ public static String unescapeEscapedCharacters(String source) { /* could use regular expression, but not this time... */ final int srcLen = source.length(); char c; StringBuffer buffer = new StringBuffer(srcLen); // Must have format \\uXXXX where XXXX is a hexadecimal number int i = 0; while (i < srcLen) { c = source.charAt(i++); if (c == BACKSLASH) { char nc = source.charAt(i); switch (nc) { case 'u': { // Now we found the 'u' we need to find another 4 hex digits // Note: shifting left by 4 is the same as multiplying by 16 int v = 0; // Accumulator for (int j = 1; j < 5; j++) { nc = source.charAt(i + j); switch (nc) { case 48: // '0' case 49: // '1' case 50: // '2' case 51: // '3' case 52: // '4' case 53: // '5' case 54: // '6' case 55: // '7' case 56: // '8' case 57: // '9' v = ((v << 4) + nc) - 48; break; case 97: // 'a' case 98: // 'b' case 99: // 'c' case 100: // 'd' case 101: // 'e' case 102: // 'f' v = ((v << 4) + 10 + nc) - 97; break; case 65: // 'A' case 66: // 'B' case 67: // 'C' case 68: // 'D' case 69: // 'E' case 70: // 'F' v = ((v << 4) + 10 + nc) - 65; break; default: // almost but no go j = 6; // terminate the loop v = 0; // clear the accumulator break; } } // for each of the 4 digits if (v > 0) { // We got a full conversion c = (char) v; // Use the converted char i += 5; // skip the numeric values } break; } // octal characters: \0 to \377 case '0': case '1': case '2': case '3': { // Now we found the '0' we need to find up to 3 octal digits // Note: shifting left by 3 is the same as multiplying by 8 int v = 0; // Accumulator int j; boolean stop = false; for (j = 0; j < 3 && !stop; j++) { if (i + j < source.length()) { nc = source.charAt(i + j); switch (nc) { case 48: // '0' case 49: // '1' case 50: // '2' case 51: // '3' case 52: // '4' case 53: // '5' case 54: // '6' case 55: // '7' v = ((v << 3) + nc) - 48; break; default: // some other character // almost but no go stop = true; // we have to go back one character, because we've read to far j--; break; } } } // for each of the digits if (v >= 0) { // We got a full conversion c = (char) v; // Use the converted char i += j; // skip the numeric values } break; } // escape sequences: \b \t \n \f \r \" \' \\ case BACKSLASH: { // if the next character is a backslash we have an // escaped backslash - not an unicode sequence // skip the second backslash i++; break; } case 'b': { c = '\b'; i++; break; } case 't': { c = '\t'; i++; break; } case 'n': { c = '\n'; i++; break; } case 'f': { c = '\f'; i++; break; } case 'r': { c = '\r'; i++; break; } case '\"': { c = '\"'; i++; break; } case '\'': { c = '\''; i++; break; } } } buffer.append(c); } // Fill in the remaining characters from the buffer while (i < srcLen) { buffer.append(source.charAt(i++)); } return buffer.toString(); } public static String escapeEscapedCharacters(String source) { source = source.replaceAll("\\\\", "\\\\\\\\"); source = source.replaceAll("\\\b", "\\\\b"); source = source.replaceAll("\\\t", "\\\\t"); source = source.replaceAll("\\\n", "\\\\n"); source = source.replaceAll("\\\f", "\\\\f"); source = source.replaceAll("\\\r", "\\\\r"); source = source.replaceAll("\"", "\\\\\""); source = source.replaceAll("\'", "\\\\\'"); return source; } }