/* * Copyright (C) 2015 Google, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.aitorvs.autoparcel.internal.codegen; /** * A simplistic Java scanner. This scanner returns a sequence of tokens that can be used to * reconstruct the source code. Since the source code is coming from a string, the scanner in fact * just returns token boundaries rather than the tokens themselves. * * <p>We are not dealing with arbitrary user code so we can assume there are no exotic things like * tabs or Unicode escapes that resolve into quotes. The purpose of the scanner here is to * return a sequence of offsets that split the string up in a way that allows us to work with * spaces without having to worry whether they are inside strings or comments. The particular * properties we use are that every string and character literal and every comment is a single * token; every newline plus all following indentation is a single token; and every other string * of consecutive spaces outside a comment or literal is a single token. That means that we can * safely compress a token that starts with a space into a single space, without falsely removing * indentation or changing the contents of strings. * * <p>In addition to real Java syntax, this scanner recognizes tokens of the form * {@code `text`}, which are used in the templates to wrap fully-qualified type names, so that they * can be extracted and replaced by imported names if possible. * * @author Éamonn McManus */ // This scanner is different from the one in EclipseHackTokenizer (which is only needed for // EclipseHack). The needs of the two scanner are very different: EclipseHackTokenizer is only // needed to scan through an existing source file to find abstract method declarations, so it // can discard everything that isn't needed for that, including comments and string literals for // example. Meanwhile, this scanner needs to return a sequence of tokens that can be used to // reconstruct the source code. EclipseHackTokenizer also operates on a Reader (which in practice is // coming from a file), while here we already have the source code in a String, which means that we // can just return token boundaries rather than the tokens themselves. class JavaScanner { private final String s; JavaScanner(String s) { if (!s.endsWith("\n")) { s += "\n"; // This allows us to avoid checking for the end of the string in most cases. } this.s = s; } int tokenEnd(int start) { if (start >= s.length()) { return s.length(); } switch (s.charAt(start)) { case ' ': case '\n': return spaceEnd(start); case '/': if (s.charAt(start + 1) == '*') { return blockCommentEnd(start); } else if (s.charAt(start + 1) == '/') { return lineCommentEnd(start); } else { return start + 1; } case '\'': case '"': case '`': return quoteEnd(start); default: // Every other character is considered to be its own token. return start + 1; } } private int spaceEnd(int start) { assert s.charAt(start) == ' ' || s.charAt(start) == '\n'; int i; for (i = start + 1; i < s.length() && s.charAt(i) == ' '; i++) { } return i; } private int blockCommentEnd(int start) { assert s.charAt(start) == '/' && s.charAt(start + 1) == '*'; int i; for (i = start + 2; s.charAt(i) != '*' || s.charAt(i + 1) != '/'; i++) { } return i + 2; } private int lineCommentEnd(int start) { assert s.charAt(start) == '/' && s.charAt(start + 1) == '/'; int end = s.indexOf('\n', start + 2); assert end > 0; return end; } private int quoteEnd(int start) { char quote = s.charAt(start); assert quote == '\'' || quote == '"' || quote == '`'; int i; for (i = start + 1; s.charAt(i) != quote; i++) { if (s.charAt(i) == '\\') { i++; } } return i + 1; } }