/* * Copyright 2012-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package com.facebook.buck.util; import com.facebook.buck.util.environment.Platform; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; import com.google.common.base.Function; import com.google.common.base.Preconditions; import java.io.File; import java.nio.file.Path; import java.util.Iterator; public final class Escaper { private static final char MAKEFILE_ESCAPE_CHAR = '\\'; /** Utility class: do not instantiate. */ private Escaper() {} /** The quoting style to use when escaping. */ public static enum Quoter { SINGLE { @Override public String quote(String str) { return '\'' + str.replace("\'", "'\\''") + '\''; } }, DOUBLE { @Override public String quote(String str) { return '"' + str.replace("\"", "\\\"") + '"'; } }, DOUBLE_WINDOWS_JAVAC { @Override public String quote(String str) { return '"' + str.replace("\\", "\\\\") + '"'; } }; /** @return the string with this quoting style applied. */ public abstract String quote(String str); } /** * Escapes the special characters identified the {@link CharMatcher}, using single quotes. * * @param matcher identifies characters to be escaped * @param str string to quote * @return possibly quoted string */ public static String escape(Quoter quoter, CharMatcher matcher, String str) { if (matcher.matchesAnyOf(str) || str.isEmpty()) { return quoter.quote(str); } else { return str; } } /** * @return a escaper function using the given quote style and escaping characters determined by * the given matcher. */ public static Function<String, String> escaper(final Quoter quoter, final CharMatcher matcher) { return input -> escape(quoter, matcher, input); } public static Function<String, String> javacEscaper() { if (Platform.detect() == Platform.WINDOWS) { return Escaper.escaper( Quoter.DOUBLE_WINDOWS_JAVAC, CharMatcher.anyOf("#'").or(CharMatcher.whitespace())); } else { return Escaper.escaper( Escaper.Quoter.DOUBLE, CharMatcher.anyOf("#\"'").or(CharMatcher.whitespace())); } } private static final CharMatcher BASH_SPECIAL_CHARS = CharMatcher.anyOf("<>|!?*[]$\\(){}\"'`&;=").or(CharMatcher.whitespace()); /** * Bash quoting {@link com.google.common.base.Function Function} which can be passed to {@link * com.google.common.collect.Iterables#transform Iterables.transform()}. */ public static final Function<String, String> BASH_ESCAPER = escaper(Quoter.SINGLE, BASH_SPECIAL_CHARS); /** * CreateProcess (Windows) quoting {@link com.google.common.base.Function Function} which can be * passed to {@link com.google.common.collect.Iterables#transform Iterables.transform()}. */ public static final Function<String, String> CREATE_PROCESS_ESCAPER = WindowsCreateProcessEscape::quote; /** * Platform-aware shell quoting {@link com.google.common.base.Function Function} which can be * passed to {@link com.google.common.collect.Iterables#transform Iterables.transform()} * TODO(sdwilsh): get proper cmd.EXE escaping implemented on Windows */ public static final Function<String, String> SHELL_ESCAPER = Platform.detect() == Platform.WINDOWS ? CREATE_PROCESS_ESCAPER : BASH_ESCAPER; /** * Escaper for argfiles for clang and gcc. * * <p>Based on the following docs in the gcc manual: * * <p>{@literal @file} Read command-line options from file. The options read are inserted in place * of the original {@literal @file} option. If file does not exist, or cannot be read, then the * option will be treated literally, and not removed. * * <p>Options in file are separated by whitespace. A whitespace character may be included in an * option by surrounding the entire option in either single or double quotes. Any character * (including a backslash) may be included by prefixing the character to be included with a * backslash. The file may itself contain additional {@literal @file} options; any such options * will be processed recursively. */ public static final Function<String, String> ARGFILE_ESCAPER = escaper(Quoter.DOUBLE, CharMatcher.anyOf("\"\\").or(CharMatcher.whitespace())); /** * Quotes a string to be passed to the shell, if necessary. This works for the appropriate shell * regardless of the platform it is run on. * * @param str string to escape * @return possibly escaped string */ public static String escapeAsShellString(String str) { return SHELL_ESCAPER.apply(str); } /** * Quotes a string to be passed to Bash, if necessary. Uses single quotes to prevent variable * expansion, `...` evaluation etc. * * @param str string to quote * @return possibly quoted string */ public static String escapeAsBashString(String str) { if (Platform.detect() == Platform.WINDOWS) { return CREATE_PROCESS_ESCAPER.apply(str); } else { return escape(Quoter.SINGLE, BASH_SPECIAL_CHARS, str); } } public static String escapeAsBashString(Path path) { return escapeAsBashString(path.toString()); } // Adapted from org.apache.commons.lang.StringEscapeUtils /** * @return a double-quoted string with metacharacters and quotes escaped with a backslash; * non-ASCII characters escaped as \u */ public static String escapeAsPythonString(String str) { StringBuilder builder = new StringBuilder(); builder.append('"'); for (Character ch : str.toCharArray()) { // Handle Unicode. if (ch > 0xfff) { builder.append("\\u" + hex(ch)); } else if (ch > 0xff) { builder.append("\\u0" + hex(ch)); } else if (ch > 0x7f) { builder.append("\\u00" + hex(ch)); } else if (ch < 32) { switch (ch) { case '\b': builder.append('\\'); builder.append('b'); break; case '\n': builder.append('\\'); builder.append('n'); break; case '\t': builder.append('\\'); builder.append('t'); break; case '\f': builder.append('\\'); builder.append('f'); break; case '\r': builder.append('\\'); builder.append('r'); break; default: if (ch > 0xf) { builder.append("\\u00" + hex(ch)); } else { builder.append("\\u000" + hex(ch)); } break; } } else { switch (ch) { case '\'': builder.append('\\'); builder.append('\''); break; case '"': builder.append('\\'); builder.append('"'); break; case '\\': builder.append('\\'); builder.append('\\'); break; default: builder.append(ch); break; } } } builder.append('"'); return builder.toString(); } private static boolean shouldEscapeMakefileString(String escapees, String blob, int index) { Preconditions.checkArgument(blob.length() > index); for (int i = index; i < blob.length(); i++) { if (escapees.indexOf(blob.charAt(i)) != -1) { return true; } if (blob.charAt(i) != MAKEFILE_ESCAPE_CHAR) { return false; } } return true; } private static String escapeAsMakefileString(String escapees, String str) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < str.length(); i++) { if (shouldEscapeMakefileString(escapees, str, i)) { builder.append(MAKEFILE_ESCAPE_CHAR); } builder.append(str.charAt(i)); } return builder.toString().replace("$", "$$"); } /** * @return an escaped string suitable for use in a GNU makefile on the right side of a variable * assignment. */ public static String escapeAsMakefileValueString(String str) { return escapeAsMakefileString("#", str); } /** * Escapes forward slashes in a Path as a String that is safe to consume with other tools (such as * gcc). On Unix systems, this is equivalent to {@link java.nio.file.Path Path.toString()}. * * @param path the Path to escape * @return the escaped Path */ public static String escapePathForCIncludeString(Path path) { if (File.separatorChar != '\\') { return path.toString(); } StringBuilder result = new StringBuilder(); if (path.startsWith(File.separator)) { result.append("\\\\"); } for (Iterator<Path> iterator = path.iterator(); iterator.hasNext(); ) { result.append(iterator.next()); if (iterator.hasNext()) { result.append("\\\\"); } } if (path.getNameCount() > 0 && path.endsWith(File.separator)) { result.append("\\\\"); } return result.toString(); } @VisibleForTesting static String hex(char ch) { return Integer.toHexString(ch).toUpperCase(); } /** * Unescape a path string obtained from preprocessor output, as in: <a * href="https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html"> * https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html </a>. * * @see #decodeNumericEscape(StringBuilder, String, int, int, int, int) */ public static String unescapeLineMarkerPath(String escaped) { StringBuilder ret = new StringBuilder(); for (int i = 0; i < escaped.length(); /* i incremented below */ ) { // consume character, advance index char c = escaped.charAt(i); i++; if (c != '\\') { ret.append(c); } else { // So sayeth the GCC docs: // "filename will never contain any non-printing characters; they are replaced with octal // escape sequences." -- https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html if (i >= escaped.length()) { throw new IllegalArgumentException("malformed string: ends with single backslash"); } c = escaped.charAt(i); // peek (don't consume) next char switch (c) { // standard escapes: http://en.cppreference.com/w/cpp/language/escape case '\\': case '"': case '\'': case '?': ret.append(c); i++; // consume it break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': i = decodeNumericEscape(ret, escaped, i, /*maxCodeLength=*/ 3, /*base*/ 8); break; case 'x': i = decodeNumericEscape(ret, escaped, i, /*maxCodeLength=*/ 2, /*base*/ 16); break; case 'u': i = decodeNumericEscape( ret, escaped, i, /*maxCodeLength=*/ 4, /*base*/ 16, /*maxCodes*/ 2); break; default: throw new IllegalArgumentException("malformed string: bad char in escape seq: " + c); } } } return ret.toString(); } /** * Decode a numeric escape as explained in this page: <a * href="http://en.cppreference.com/w/cpp/language/escape"> * http://en.cppreference.com/w/cpp/language/escape </a>. The pointed-to substring shouldn't * contain the leading backslash + optional 'x' or 'u'. * * @param out receives decoded characters * @param escaped the string containing the escape sequence * @param pos starting index of escape (but after the backslash) * @param base number base, e.g. 8 for octal, or 16 for hex or unicode. * @param maxCodes maximum number of sequences of escape numbers to decode. This is mainly to * support unicode escape sequences which might represent one or two characters. * @return position to first character just after the consumed numeric code. Is number of consumed * code bytes + {@code pos} argument. */ public static int decodeNumericEscape( StringBuilder out, String escaped, int pos, int maxCodeLength, int base, int maxCodes) { final String table = "0123456789abcdef"; for (int code = 0; code < maxCodes; code++) { char c = 0; boolean valid = false; for (int i = 0; i < maxCodeLength && pos < escaped.length(); i++) { final int digit = table.indexOf(Character.toLowerCase(escaped.charAt(pos))); if (digit == -1 || digit >= base) { break; } // "digit" is a valid value for the digit read, in the range [0, base). // Now we can increment the position, consuming that digit. pos++; c = (char) ((c * base) + digit); valid = true; } if (valid) { out.append(c); } } return pos; } /** * Call {@link #decodeNumericEscape(StringBuilder, String, int, int, int, int)} to parse at most * one escaped character; i.e. calls that method with {@code maxCodes = 1}. */ public static int decodeNumericEscape( StringBuilder out, String escaped, int pos, int maxCodeLength, int base) { return decodeNumericEscape(out, escaped, pos, maxCodeLength, base, 1); } }