/* * Copyright 2015-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package com.facebook.buck.model; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.UnmodifiableIterator; import java.util.NoSuchElementException; import java.util.Optional; import javax.annotation.Nullable; /** * Replace and extracts macros from input strings. * * <p>Examples: $(exe //foo:bar) $(location :bar) $(platform) */ public class MacroFinder { public Optional<MacroMatchResult> match(ImmutableSet<String> macros, String blob) throws MacroException { MacroFinderAutomaton macroFinder = new MacroFinderAutomaton(blob); if (!macroFinder.hasNext()) { return Optional.empty(); } MacroMatchResult result = macroFinder.next(); if (!result.isEscaped() && result.getStartIndex() == 0 && result.getEndIndex() == blob.length() && !macros.contains(result.getMacroType())) { throw new MacroException( String.format("expanding %s: no such macro \"%s\"", blob, result.getMacroType())); } return Optional.of(result); } /** * Expand macros embedded in a string. * * @param replacers a map of macro names to {@link MacroReplacer} objects used to expand them. * @param blob the input string containing macros to be expanded * @param resolveEscaping whether to drop characters used to escape literal uses of `$(...)` * @return a copy of the input string with all macros expanded */ public String replace( ImmutableMap<String, MacroReplacer> replacers, String blob, boolean resolveEscaping) throws MacroException { StringBuilder expanded = new StringBuilder(); // Iterate over all macros found in the string, expanding each found macro. int lastEnd = 0; MacroFinderAutomaton matcher = new MacroFinderAutomaton(blob); while (matcher.hasNext()) { MacroMatchResult matchResult = matcher.next(); // Add everything from the original string since the last match to this one. expanded.append(blob.substring(lastEnd, matchResult.getStartIndex())); // If the macro is escaped, add the macro text (but omit the escaping backslash) if (matchResult.isEscaped()) { expanded.append( blob.substring( matchResult.getStartIndex() + (resolveEscaping ? 1 : 0), matchResult.getEndIndex())); } else { // Call the relevant expander and add the expanded value to the string. MacroReplacer replacer = replacers.get(matchResult.getMacroType()); if (replacer == null) { throw new MacroException( String.format( "expanding %s: no such macro \"%s\"", blob.substring(matchResult.getStartIndex(), matchResult.getEndIndex()), matchResult.getMacroType())); } try { expanded.append(replacer.replace(matchResult.getMacroInput())); } catch (MacroException e) { throw new MacroException( String.format( "expanding %s: %s", blob.substring(matchResult.getStartIndex(), matchResult.getEndIndex()), e.getMessage()), e); } } lastEnd = matchResult.getEndIndex(); } // Append the remaining part of the original string after the last match. expanded.append(blob.substring(lastEnd, blob.length())); return expanded.toString(); } public ImmutableList<MacroMatchResult> findAll(ImmutableSet<String> macros, String blob) throws MacroException { ImmutableList.Builder<MacroMatchResult> results = ImmutableList.builder(); MacroFinderAutomaton matcher = new MacroFinderAutomaton(blob); while (matcher.hasNext()) { MacroMatchResult matchResult = matcher.next(); if (matchResult.isEscaped()) { continue; } if (!macros.contains(matchResult.getMacroType())) { throw new MacroException(String.format("no such macro \"%s\"", matchResult.getMacroType())); } results.add(matchResult); } return results.build(); } /** * A push-down automaton that searches for occurrences of a macro in linear time with respect to * the search string. The automaton keeps track of 5 pieces of state: 1) The current automaton * state 2) The position in the input string 3) The current nested depth of parentheses 4) The * type of quote (if any) which bounds the current sequence 5) The return state to go back to * after an escape sequence ends * * <p>The automaton accumulates intermediate values in the string builder and the match result * builder. The <code>find()</code> method will advance the automaton along the input string until * it finds a macro, and returns a match, or until it reaches the end of the string and returns * null. * * <p>Examples of valid matching patterns: $(macro) $(macro argument) $(macro nested * parens(argument)) $(macro 'ignored paren )') * * <p>If the macro is preceeded by a '\' the match result will be marked as escaped, and the * capture group will include the escaping backslash. Example: \$(macro) * * <p>Here are the state transitions in dot format (with glossing over of saved state): * * <pre> * digraph G { * "SEARCHING" -> "FOUND_DOLLAR" [label="'$'"]; * "FOUND_DOLLAR" -> "SEARCHING" [label="*"]; * "FOUND_DOLLAR" -> "READING_MACRO_NAME" [label="'('"]; * "READING_MACRO_NAME" -> "FOUND_MACRO" [label="')'"]; * "READING_MACRO_NAME" -> "READING_ARGS" [label="\\s"]; * "READING_MACRO_NAME" -> "READING_MACRO_NAME" [label="\\w"]; * "READING_ARGS" -> "FOUND_MACRO" [label="Balanced ')'"]; * "READING_ARGS" -> "READING_QUOTED_ARGS" [label="'|\""]; * "READING_QUOTED_ARGS" -> "READING_ARGS" [label="Matching '|\""]; * "READING_QUOTED_ARGS" -> "READING_QUOTED_ARGS" [label="[^'\"]"]; * "READING_ARGS" -> "READING_ARGS" [label="[^'\")]"]; * "SEARCHING" -> "IN_ESCAPE_SEQUENCE" [label="\\"]; * "IN_ESCAPE_SEQUENCE" -> "FOUND_DOLLAR" [label="$"]; * "READING_ARGS" -> "IN_ESCAPE_ARG_SEQUENCE" [label="\\"]; * "READING_QUOTED_ARGS" -> "IN_ESCAPE_ARG_SEQUENCE" [label="\\"]; * } * </pre> * * Not shown: transitions back from "IN_ESCAPE_SEQUENCE" and "IN_ESCAPE_ARG_SEQUENCE", as they * return to the previous state */ public static class MacroFinderAutomaton extends UnmodifiableIterator<MacroMatchResult> { private enum State { // Looking for the start of a macro SEARCHING, // The last character was a '\' so we're looking for an escaped macro IN_ESCAPE_SEQUENCE, // The last character was a '$' so we're looking for a '(' FOUND_DOLLAR, // We found a '$' followed by a '(' so now we're reading the macro name READING_MACRO_NAME, // We finished the macro name, now we count matching parens until we run out READING_ARGS, // We don't want to count parentheses inside of quotes, so we switch to counting quotes READING_QUOTED_ARGS, // The last character was a '\' so we won't count a ')' or a quote next IN_ESCAPE_ARG_SEQUENCE, // We have found a macro and it is ready to be built FOUND_MACRO, // Then transition back to SEARCHING } private String blob; private State state; private int index; private int parenthesesDepth; private char startQuote; private State returnState; private MacroMatchResult.Builder currentResultBuilder; private StringBuilder buffer; @Nullable private MacroMatchResult next; public MacroFinderAutomaton(String blob) { this.blob = blob; this.parenthesesDepth = 0; this.state = State.SEARCHING; this.index = 0; this.currentResultBuilder = MacroMatchResult.builder(); this.buffer = new StringBuilder(); this.startQuote = '\0'; this.returnState = this.state; // Initialize the iterator next = find(); } @Nullable private MacroMatchResult find() { for (; index < blob.length(); ++index) { state = consumeChar(blob.charAt(index), index); if (state == State.FOUND_MACRO) { state = State.SEARCHING; return currentResultBuilder.build(); } } return null; } @Override public boolean hasNext() { return next != null; } @Override public MacroMatchResult next() { MacroMatchResult toReturn = this.next; next = find(); if (toReturn == null) { throw new NoSuchElementException("No more macro matches"); } return toReturn; } private String takeBuffer() { String result = buffer.toString(); buffer.setLength(0); return result; } private State consumeChar(char c, int index) { switch (this.state) { case SEARCHING: switch (c) { case '\\': returnState = State.SEARCHING; return State.IN_ESCAPE_SEQUENCE; case '$': currentResultBuilder = MacroMatchResult.builder().setStartIndex(index).setEscaped(false); return State.FOUND_DOLLAR; default: return State.SEARCHING; } case IN_ESCAPE_SEQUENCE: // We want to capture the escaped macro, but mark it as escaped switch (c) { case '$': currentResultBuilder = MacroMatchResult.builder().setStartIndex(index - 1).setEscaped(true); return State.FOUND_DOLLAR; default: return returnState; } case FOUND_DOLLAR: switch (c) { case '(': parenthesesDepth += 1; return State.READING_MACRO_NAME; case '\\': returnState = State.SEARCHING; return State.IN_ESCAPE_SEQUENCE; case '$': currentResultBuilder = MacroMatchResult.builder().setStartIndex(index).setEscaped(false); return State.FOUND_DOLLAR; default: return State.SEARCHING; } case READING_MACRO_NAME: switch (c) { case ')': parenthesesDepth -= 1; currentResultBuilder .setMacroInput(ImmutableList.of()) .setMacroType(takeBuffer()) .setEndIndex(index + 1); return State.FOUND_MACRO; case '\t': case ' ': case '\n': case '\r': currentResultBuilder.setMacroType(takeBuffer()); return State.READING_ARGS; default: buffer.append(c); return State.READING_MACRO_NAME; } case READING_ARGS: switch (c) { case ' ': currentResultBuilder.addMacroInput(takeBuffer().trim()); return State.READING_ARGS; case '\\': returnState = State.READING_ARGS; return State.IN_ESCAPE_ARG_SEQUENCE; case '\'': case '"': startQuote = c; buffer.append(c); return State.READING_QUOTED_ARGS; case '(': parenthesesDepth += 1; buffer.append(c); return State.READING_ARGS; case ')': parenthesesDepth -= 1; if (parenthesesDepth == 0) { currentResultBuilder.addMacroInput(takeBuffer().trim()).setEndIndex(index + 1); return State.FOUND_MACRO; } else { buffer.append(c); } return State.READING_ARGS; default: buffer.append(c); return State.READING_ARGS; } case READING_QUOTED_ARGS: switch (c) { case '\\': returnState = State.READING_QUOTED_ARGS; return State.IN_ESCAPE_ARG_SEQUENCE; case '"': case '\'': buffer.append(c); if (c == startQuote) { startQuote = '\0'; return State.READING_ARGS; } else { return State.READING_QUOTED_ARGS; } default: buffer.append(c); return State.READING_QUOTED_ARGS; } case IN_ESCAPE_ARG_SEQUENCE: buffer.append(c); return returnState; case FOUND_MACRO: throw new IllegalStateException( "The state must be reset to searching before more input may be consumed"); } throw new IllegalStateException("Unknown state " + state); } } }