/* * Copyright 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.parsepasses.contextautoesc; import com.google.common.collect.ImmutableSet; /** * Some utilities for dealing with JavaScript syntax. * */ final class JsUtil { /** * True iff a slash after the given run of non-whitespace tokens starts a regular expression * instead of a div operator : (/ or /=). * * <p>This fails on some valid but nonsensical JavaScript programs like {@code x = ++/foo/i} which * is quite different than {@code x++/foo/i}, but is not known to fail on any known useful * programs. It is based on the draft <a * href="http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html">JavaScript 2.0 * lexical grammar</a> and requires one token of lookbehind. * * @param jsTokens A run of non-whitespace, non-comment, non string tokens not including the '/' * character. Non-empty. */ public static boolean isRegexPreceder(String jsTokens) { // Tokens that precede a regular expression in JavaScript. // "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", "+", "+=", ",", // "-", "-=", "->", ".", "..", "...", "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", // "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", // "{", "|", "|=", "||", "||=", "~", // "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", // "throw", "try", "typeof" int jsTokensLen = jsTokens.length(); char lastChar = jsTokens.charAt(jsTokensLen - 1); switch (lastChar) { case '=': case '#': case '%': case '&': case '(': case '*': case ',': case '<': case '>': case '?': case ':': case ';': case '^': case '{': case '|': case '}': case '~': case '[': return true; case '+': case '-': // ++ and -- are not int signStart = jsTokensLen - 1; // Count the number of adjacent dashes or pluses. while (signStart > 0 && jsTokens.charAt(signStart - 1) == lastChar) { --signStart; } int numAdjacent = jsTokensLen - signStart; // True for odd numbers since "---" is the same as "-- -". // False for even numbers since "----" is the same as "-- --" which ends with a decrement, // not a minus sign. return (numAdjacent & 1) == 1; case '.': if (jsTokensLen == 1) { return true; } // There is likely to be a .. or ... operator in newer versions of EcmaScript. char ch = jsTokens.charAt(jsTokensLen - 2); return !('0' <= ch && ch <= '9'); default: // Look for one of the keywords above. int wordStart = jsTokensLen; while (wordStart > 0 && Character.isJavaIdentifierPart(jsTokens.charAt(wordStart - 1))) { --wordStart; } return REGEX_PRECEDER_KEYWORDS.contains(jsTokens.substring(wordStart)); } } private static final ImmutableSet<String> REGEX_PRECEDER_KEYWORDS = ImmutableSet.of( "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof"); private JsUtil() { // Not instantiable. } }