/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.atlas.lib; import org.apache.jena.atlas.AtlasException ; import org.apache.jena.atlas.io.AWriter ; import org.apache.jena.atlas.io.OutputUtils ; import org.apache.jena.atlas.io.StringWriterI ; /** String escape utilities */ public class EscapeStr { /* * Escape characters in a string according to Turtle rules. */ public static String stringEsc(String s) { AWriter w = new StringWriterI() ; stringEsc(w, s, Chars.CH_QUOTE2, true, CharSpace.UTF8) ; return w.toString() ; } /** Write a string - basic escaping, no quote escaping. */ public static void stringEsc(AWriter out, String s, boolean asciiOnly) { int len = s.length() ; for (int i = 0; i < len; i++) { char c = s.charAt(i); // \\ Escape always possible. if (c == '\\') { out.print('\\') ; out.print(c) ; continue ; } switch(c) { case '\n': out.print("\\n"); continue; case '\t': out.print("\\t"); continue; case '\r': out.print("\\r"); continue; case '\f': out.print("\\f"); continue; default: // Drop through } if ( !asciiOnly ) out.print(c); else writeCharAsASCII(out, c) ; } } public static void stringEsc(AWriter out, String s, char quoteChar, boolean singleLineString) { stringEsc(out, s, quoteChar, singleLineString, CharSpace.UTF8); } public static void stringEsc(AWriter out, String s, char quoteChar, boolean singleLineString, CharSpace charSpace) { boolean ascii = ( CharSpace.ASCII == charSpace ) ; int len = s.length() ; int quotesInARow = 0 ; for (int i = 0; i < len; i++) { char c = s.charAt(i); // \\ Escape always possible. if (c == '\\') { out.print('\\') ; out.print(c) ; continue ; } if ( ! singleLineString ) { // Multiline string. if ( c == quoteChar ) { quotesInARow++ ; if ( quotesInARow == 3 ) { out.print("\\"); out.print(quoteChar); quotesInARow = 0; continue; } } else { quotesInARow = 0 ; } } else { if ( c == quoteChar ) { out.print("\\"); out.print(c) ; continue ; } switch(c) { case '\n': out.print("\\n"); continue; case '\t': out.print("\\t"); continue; case '\r': out.print("\\r"); continue; case '\f': out.print("\\f"); continue; default: // Drop through } } if ( !ascii ) out.print(c); else writeCharAsASCII(out, c) ; } } /** Write a string with Unicode to ASCII conversion using \-u escapes */ public static void writeASCII(AWriter out, String s) { int len = s.length() ; for (int i = 0; i < len; i++) { char c = s.charAt(i); writeCharAsASCII(out, c); } } /** Write a character with Unicode to ASCII conversion using \-u escapes */ public static void writeCharAsASCII(AWriter out, char c) { if ( c >= 32 && c < 127 ) out.print(c); else { // Outside the charset range. // Does not cover beyond 16 bits codepoints directly // (i.e. \U escapes) but Java keeps these as surrogate // pairs and will print as characters out.print("\\u") ; OutputUtils.printHex(out, c, 4) ; } } // Utilities to remove escapes /** Replace \ escapes (\\u, \t, \n etc) in a string */ public static String unescapeStr(String s) { return unescapeStr(s, '\\') ; } /** Replace \ escapes (\\u, \t, \n etc) in a string */ public static String unescapeStr(String s, char escapeChar) { return unescape(s, escapeChar, false) ; } // Main worker function for unescaping strings. public static String unescape(String s, char escape, boolean pointCodeOnly) { int i = s.indexOf(escape) ; if ( i == -1 ) return s ; // Dump the initial part straight into the string buffer StringBuilder sb = new StringBuilder(s.substring(0,i)) ; for ( ; i < s.length() ; i++ ) { char ch = s.charAt(i) ; if ( ch != escape ) { sb.append(ch) ; continue ; } // Escape if ( i >= s.length()-1 ) throw new AtlasException("Illegal escape at end of string") ; char ch2 = s.charAt(i+1) ; i = i + 1 ; // \\u and \\U if ( ch2 == 'u' ) { // i points to the \ so i+6 is next character if ( i+4 >= s.length() ) throw new AtlasException("\\u escape too short") ; int x = Hex.hexStringToInt(s, i+1, 4) ; sb.append((char)x) ; // Jump 1 2 3 4 -- already skipped \ and u i = i+4 ; continue ; } if ( ch2 == 'U' ) { // i points to the \ so i+6 is next character if ( i+8 >= s.length() ) throw new AtlasException("\\U escape too short") ; int x = Hex.hexStringToInt(s, i+1, 8) ; // Convert to UTF-16 codepoint pair. sb.append((char)x) ; // Jump 1 2 3 4 5 6 7 8 -- already skipped \ and u i = i+8 ; continue ; } // Are we doing just point code escapes? // If so, \X-anything else is legal as a literal "\" and "X" if ( pointCodeOnly ) { sb.append('\\') ; sb.append(ch2) ; i = i + 1 ; continue ; } // Not just codepoints. Must be a legal escape. char ch3 = 0 ; switch (ch2) { case 'n': ch3 = '\n' ; break ; case 't': ch3 = '\t' ; break ; case 'r': ch3 = '\r' ; break ; case 'b': ch3 = '\b' ; break ; case 'f': ch3 = '\f' ; break ; case '\'': ch3 = '\'' ; break ; case '\"': ch3 = '\"' ; break ; case '\\': ch3 = '\\' ; break ; default: throw new AtlasException("Unknown escape: \\"+ch2) ; } sb.append(ch3) ; } return sb.toString() ; } }