/* * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.apache.flex.compiler.internal.parsing.as; import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.flex.compiler.clients.ASC; import org.apache.flex.compiler.constants.IASKeywordConstants; import org.apache.flex.compiler.filespecs.IFileSpecification; import org.apache.flex.compiler.internal.parsing.ITokenStreamFilter; import org.apache.flex.compiler.internal.parsing.SourceFragmentsReader; import org.apache.flex.compiler.internal.parsing.TokenBase; import org.apache.flex.compiler.internal.units.ASCompilationUnit; import org.apache.flex.compiler.parsing.IASToken; import org.apache.flex.compiler.parsing.IASTokenizer; import org.apache.flex.compiler.parsing.IASToken.ASTokenKind; import org.apache.flex.compiler.problems.CyclicalIncludesProblem; import org.apache.flex.compiler.problems.ExpectXmlBeforeNamespaceProblem; import org.apache.flex.compiler.problems.FileNotFoundProblem; import org.apache.flex.compiler.problems.ICompilerProblem; import org.apache.flex.compiler.problems.InternalCompilerProblem2; import org.apache.flex.compiler.problems.UnexpectedTokenProblem; import org.apache.flex.utils.ILengthAwareReader; import org.apache.flex.utils.NonLockingStringReader; import org.apache.flex.utils.ILengthAwareReader.InputType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; /** * This Tokenizer provides tokens to be used by various clients, most notably * the ASParser. Given the nature of ambiguities in the ActionScript 3 language, * this tokenizer also serves to disambiguate tokens based on a combination of * look behind and lookahead. For all cases of ambiguity, only one token is * needed for look behind, and in our worst case, n tokens forwards where n is * the number of tokens that can be produced. Some other state is kept in order * to know which type of container we may exist in (function, class, interface, * etc). We buffer LA token results to avoid unneeded lookahead */ public class StreamingASTokenizer implements ASTokenTypes, IASTokenizer, Closeable { private static final String FOR_EACH = "for each"; private static final String XML = "xml"; private static final String DEFAULT_XML_NAMESPACE = "default xml namespace"; private static final String ZERO = "0"; /** * Map from keyword text to token type. * <p> * We use a HashMap here to avoid slowing down the performance of the * underlying lexer. We are avoid the "longest match" problem, requiring a * lot of rescanning on the lexer level to determine keywords from * identifiers. And since hash map lookup is constant, this is (in theory) * faster than doing this in the scanner since we're not bound by i/o or * state machine back-tracing. */ private static final Map<String, Integer> keywordToTokenMap = new ImmutableMap.Builder<String, Integer>() .put(IASKeywordConstants.AS, TOKEN_KEYWORD_AS) .put(IASKeywordConstants.IS, TOKEN_KEYWORD_IS) .put(IASKeywordConstants.INSTANCEOF, TOKEN_KEYWORD_INSTANCEOF) .put(IASKeywordConstants.IN, TOKEN_KEYWORD_IN) .put(IASKeywordConstants.DELETE, TOKEN_KEYWORD_DELETE) .put(IASKeywordConstants.TYPEOF, TOKEN_KEYWORD_TYPEOF) .put(IASKeywordConstants.CONST, TOKEN_KEYWORD_CONST) .put(IASKeywordConstants.GET, TOKEN_RESERVED_WORD_GET) .put(IASKeywordConstants.IMPLEMENTS, TOKEN_RESERVED_WORD_IMPLEMENTS) .put(IASKeywordConstants.IMPORT, TOKEN_KEYWORD_IMPORT) .put(IASKeywordConstants.USE, TOKEN_KEYWORD_USE) .put(IASKeywordConstants.EXTENDS, TOKEN_RESERVED_WORD_EXTENDS) .put(IASKeywordConstants.NEW, TOKEN_KEYWORD_NEW) .put(IASKeywordConstants.DYNAMIC, TOKEN_MODIFIER_DYNAMIC) .put(IASKeywordConstants.FINAL, TOKEN_MODIFIER_FINAL) .put(IASKeywordConstants.NATIVE, TOKEN_MODIFIER_NATIVE) .put(IASKeywordConstants.OVERRIDE, TOKEN_MODIFIER_OVERRIDE) .put(IASKeywordConstants.STATIC, TOKEN_MODIFIER_STATIC) .put(IASKeywordConstants.VIRTUAL, TOKEN_MODIFIER_VIRTUAL) .put(IASKeywordConstants.SET, TOKEN_RESERVED_WORD_SET) // Keywords with special token types that affect subsequent blocks .put(IASKeywordConstants.CATCH, TOKEN_KEYWORD_CATCH) .put(IASKeywordConstants.CLASS, TOKEN_KEYWORD_CLASS) .put(IASKeywordConstants.FUNCTION, TOKEN_KEYWORD_FUNCTION) .put(IASKeywordConstants.INTERFACE, TOKEN_KEYWORD_INTERFACE) .put(IASKeywordConstants.PACKAGE, TOKEN_KEYWORD_PACKAGE) // #120009: allow "var" inside parameter list, even though it's not // valid AS (don't turn the subsequent function block open into a block open .put(IASKeywordConstants.VAR, TOKEN_KEYWORD_VAR) .put(IASKeywordConstants.FALSE, TOKEN_KEYWORD_FALSE) .put(IASKeywordConstants.NULL, TOKEN_KEYWORD_NULL) .put(IASKeywordConstants.TRUE, TOKEN_KEYWORD_TRUE) .put(IASKeywordConstants.PUBLIC, HIDDEN_TOKEN_BUILTIN_NS) .put(IASKeywordConstants.PRIVATE, HIDDEN_TOKEN_BUILTIN_NS) .put(IASKeywordConstants.PROTECTED, HIDDEN_TOKEN_BUILTIN_NS) .put(IASKeywordConstants.INTERNAL, HIDDEN_TOKEN_BUILTIN_NS) .put(IASKeywordConstants.INCLUDE, TOKEN_KEYWORD_INCLUDE) // Keywords for statements that affect subsequent blocks .put(IASKeywordConstants.DO, TOKEN_KEYWORD_DO) .put(IASKeywordConstants.WHILE, TOKEN_KEYWORD_WHILE) .put(IASKeywordConstants.BREAK, TOKEN_KEYWORD_BREAK) .put(IASKeywordConstants.CONTINUE, TOKEN_KEYWORD_CONTINUE) .put(IASKeywordConstants.GOTO, TOKEN_RESERVED_WORD_GOTO) .put(IASKeywordConstants.FOR, TOKEN_KEYWORD_FOR) .put(StreamingASTokenizer.FOR_EACH, TOKEN_KEYWORD_FOR) .put(IASKeywordConstants.EACH, TOKEN_RESERVED_WORD_EACH) .put(IASKeywordConstants.WITH, TOKEN_KEYWORD_WITH) .put(IASKeywordConstants.ELSE, TOKEN_KEYWORD_ELSE) .put(IASKeywordConstants.IF, TOKEN_KEYWORD_IF) .put(IASKeywordConstants.SWITCH, TOKEN_KEYWORD_SWITCH) .put(IASKeywordConstants.CASE, TOKEN_KEYWORD_CASE) .put(IASKeywordConstants.DEFAULT, TOKEN_KEYWORD_DEFAULT) .put(IASKeywordConstants.TRY, TOKEN_KEYWORD_TRY) .put(IASKeywordConstants.FINALLY, TOKEN_KEYWORD_FINALLY) // Keywords with a generic keyword token type that have no effect // on subsequent blocks. .put(IASKeywordConstants.NAMESPACE, TOKEN_RESERVED_WORD_NAMESPACE) .put(IASKeywordConstants.CONFIG, TOKEN_RESERVED_WORD_CONFIG) .put(IASKeywordConstants.THROW, TOKEN_KEYWORD_THROW) .put(IASKeywordConstants.SUPER, TOKEN_KEYWORD_SUPER) .put(IASKeywordConstants.THIS, TOKEN_KEYWORD_THIS) .put(IASKeywordConstants.VOID, TOKEN_KEYWORD_VOID) .put(IASKeywordConstants.RETURN, TOKEN_KEYWORD_RETURN) .build(); /** * Configuration for out tokenizer */ private static final class TokenizerConfig { /** * Flag that lets us ignore keywords for more general string parsing */ public boolean ignoreKeywords = false; /** * Flag that lets us be aware of metadata */ public boolean findMetadata = true; /** * Flag indicating that we are tokenizing full content/files, and not * segments */ public boolean completeContent = true; /** * IFilter for old APIs */ public ITokenStreamFilter filter; /** * Flag indicating we should collect comments */ public boolean collectComments = false; /** * Flag indicating we follow include statements, including their tokens */ public boolean followIncludes = true; } private Reader reader; //underlying lexer private RawASTokenizer tokenizer; //last exception to prevent us from looping forever private Exception lastException = null; //LA buffer private final List<ASToken> lookAheadBuffer; private int bufferSize = 0; //maintain size ourselves since it's faster //last token we encountered, used for lookback private ASToken lastToken; private int offsetAdjustment; //for offset adjustment private int lineAdjustment = 0; private int columnAdjustment = 0; private IncludeHandler includeHandler; /** * The forked tokenizer for included files. If not null, {@link #next()} * will return a token from this tokenizer. * <p> * After all the tokens are returned from the included source file, * {@link #closeIncludeTokenizer()} closes the tokenizer and set this field * to null. */ private StreamingASTokenizer forkIncludeTokenizer; /** * Flag to indicate if we have followed include statements */ private boolean hasEncounteredIncludeStatements = false; private TokenizerConfig config; /** * Source file handler. This is used by resolving included file path. * {@link #StreamingASTokenizer(IFileSpecification)} and * {@link #StreamingASTokenizer(IFileSpecification, Stack)} sets the value. */ private String sourcePath; /** * Lexer problems. * */ private final List<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(); /** * Imaginary tokens generated for {@code asc -in} option. */ private Iterator<ASToken> ascIncludeImaginaryTokens; /** * You should probably not use this constructor. There is some legacy code * that uses this constructor, but that code should be updated to use one of * the static create methods below. * <p> * TODO: make this private. */ public StreamingASTokenizer(final Reader reader) { this(); setReader(reader); } /** * A pool to reduce duplicated string literals created */ private final HashMap<String, String> stringPool; /** * You should probably not use this constructor. There is a lot of code that * uses this constructor, but that code should be updated to use one of the * static create methods below. * <p> * TODO: make this private. */ public StreamingASTokenizer() { tokenizer = new RawASTokenizer(); config = new TokenizerConfig(); lookAheadBuffer = new ArrayList<ASToken>(5); includeHandler = IncludeHandler.creatDefaultIncludeHandler(); stringPool = new HashMap<String, String>(); // Initialize string pool with keyword strings. The keyword strings // are declared as constants which are automatically "interned". for (final String keyword : keywordToTokenMap.keySet()) { stringPool.put(keyword, keyword); } } /** * Creates a tokenizer suitable for the mxml indexing code. fragments the * new tokenizer will tokenize. * * @return A new tokenizer suitable for tokenizing script fragments in an * mxml document that is being tokenized for the full text search index. */ public static StreamingASTokenizer createForMXMLIndexing(String fileName) { StreamingASTokenizer result = new StreamingASTokenizer(); result.setPath(fileName); result.includeHandler.enterFile(result.sourcePath); return result; } /** * Fork a new tokenizer when an "include" directive is found. This method * will pass the {@code StructureTracker} of the current tokenizer down to * the forked tokenizer. * * @param currentTokenizer Current tokenizer. * @param fileSpec File specification of the included file. * @param includeHandler Include handler. * @return A tokenizer for the included file. * @throws FileNotFoundException Error. */ private static StreamingASTokenizer createForIncludeFile( final StreamingASTokenizer currentTokenizer, final IFileSpecification fileSpec, final IncludeHandler includeHandler) throws FileNotFoundException { final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler); return tokenizer; } /** * Create a tokenizer from a source file. This is the lexer entry-point used * by {@link ASCompilationUnit}. * * @param fileSpec File specification provides the reader and the file path. * @param includeHandler Include handler. * @throws FileNotFoundException error */ protected static StreamingASTokenizer create( final IFileSpecification fileSpec, final IncludeHandler includeHandler) throws FileNotFoundException { assert fileSpec != null : "File specification can't be null."; assert includeHandler != null : "Include handler can't be null."; final StreamingASTokenizer tokenizer = new StreamingASTokenizer(); tokenizer.setReader(fileSpec.createReader()); tokenizer.setPath(fileSpec.getPath()); tokenizer.includeHandler = includeHandler; tokenizer.includeHandler.enterFile(tokenizer.sourcePath); return tokenizer; } /** * Create a tokenizer for {@code ASParser#parseFile()}. * * @param fileSpec File specification provides the reader and the file path. * @param includeHandler Include handler. * @param followIncludes True if included files are also parsed. * @param includedFiles A list of included file paths. * @return Lexer. * @throws FileNotFoundException error */ protected static StreamingASTokenizer createForASParser( final IFileSpecification fileSpec, final IncludeHandler includeHandler, final boolean followIncludes, final List<String> includedFiles) throws FileNotFoundException { final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler); tokenizer.setFollowIncludes(followIncludes); final ImmutableList.Builder<ASToken> imaginaryTokensBuilder = new ImmutableList.Builder<ASToken>(); for (final String filename : includedFiles) { imaginaryTokensBuilder.add(new ASToken( ASTokenTypes.TOKEN_KEYWORD_INCLUDE, 0, 0, 0, 0, "include")); imaginaryTokensBuilder.add(new ASToken( ASTokenTypes.TOKEN_LITERAL_STRING, 0, 0, 0, 0, '"' + filename + '"')); } tokenizer.ascIncludeImaginaryTokens = imaginaryTokensBuilder.build().iterator(); return tokenizer; } /** * This creator doesn't "enter file" on creation. */ protected static StreamingASTokenizer createForInlineScriptScopeBuilding( final Reader reader, final String path, final IncludeHandler includeHandler, final int offsetAdjustment, final int lineAdjustment, final int columnAdjustment) { assert reader != null : "Reader can't be null"; assert path != null : "Path can't be null"; assert includeHandler != null : "IncludeHandler can't be null"; final StreamingASTokenizer tokenizer = new StreamingASTokenizer(); tokenizer.setReader(reader); tokenizer.setPath(path); tokenizer.includeHandler = includeHandler; tokenizer.setSourcePositionAdjustment( offsetAdjustment, lineAdjustment, columnAdjustment); return tokenizer; } /** * Create a tokenizer to parse an Expression. */ protected static StreamingASTokenizer createForInlineExpressionParsing( final Reader reader, final String path ) { assert reader != null : "Reader can't be null"; assert path != null : "Path can't be null"; final StreamingASTokenizer tokenizer = new StreamingASTokenizer(); tokenizer.setReader(reader); tokenizer.setPath(path); tokenizer.includeHandler.enterFile(path); // Have to do this to get the tokenizer to work right - some things, like function expressions, // won't tokenize correctly unless the last token is '=' or some other special tokens. tokenizer.lastToken = new ASToken(ASTokenTypes.TOKEN_OPERATOR_ASSIGNMENT, -1, -1, -1, -1, "="); return tokenizer; } /** * This method can create a {@code StreamingASTokenizer} with optional * "follow includes". If {@code IncludeHandler} is not null, it will follow * {@code include} directives. * * @param reader Input to the tokenizer. * @param path File path of the input. * @param includeHandler If not null, the created tokenizer will follow * {@code include} directives. * @return A {@code StreamingASTokenizer}. */ public static StreamingASTokenizer createForRepairingASTokenizer( final Reader reader, final String path, final IncludeHandler includeHandler) { assert path != null || includeHandler == null : "We need a source path to follow includes"; final StreamingASTokenizer tokenizer = new StreamingASTokenizer(); tokenizer.setReader(reader); tokenizer.setPath(path); if (includeHandler != null) { tokenizer.includeHandler = includeHandler; includeHandler.enterFile(path); } return tokenizer; } /** * Sets the {@link Reader} that supplies the content to this tokenizer. It * is up to the client to close any previous readers that have been in use. * It is also up to the client to close the reader once it has been used * * @param reader a {@link Reader} */ public void setReader(final Reader reader) { setReader(reader, 0, 0, 0); } /** * Sets the {@link Reader} that supplies the content to this tokenizer. It * is up to the client to close any previous readers that have been in use. * It is also up to the client to close the reader once it has been used * * @param reader a {@link Reader} * @param offset Offset adjustment. If the specified reader is reading from * a string extracted from a source file, this should be the offset of the * first character read from the reader in the source file. * @param line Line adjustment. * @param column Column adjustment */ public void setReader(final Reader reader, int offset, int line, int column) { this.reader = reader; tokenizer = new RawASTokenizer(); tokenizer.yyreset(reader); tokenizer.setCollectComments(config.collectComments); setSourcePositionAdjustment(offset, line, column); } /** * Sets the path to the file this tokenizer is scanning * * @param path a file path */ @Override public void setPath(String path) { assert path != null : "path of tokenizer shouldn't be null"; sourcePath = path; tokenizer.setSourcePath(path); } /** * Allows for the adjustment of offset, line and column information when * parsing subsequences of text. This should be called before tokenization * has started * * @param offset The offset where the fragment starts. * @param line The line where the fragment starts. This should be a * ZERO-based line number * @param column The column where the fragment starts. This should be a * ZERO-based column number */ public void setSourcePositionAdjustment(int offset, int line, int column) { offsetAdjustment = offset; lineAdjustment = line; columnAdjustment = column; } /** * Sets whether we comments are collected: single line and multi-line. * Default is <code>false</code> * * @param collect true if we should collect comments */ @Override public void setCollectComments(final boolean collect) { config.collectComments = collect; if (tokenizer != null) tokenizer.setCollectComments(collect); } /** * Sets whether we follow include statements, including their tokens. * Default is <code>true</code> * * @param followIncludes true if we should follow includes */ @Override public void setFollowIncludes(final boolean followIncludes) { config.followIncludes = followIncludes; } /** * Closes the underlying reader */ @Override public void close() throws IOException { if (tokenizer != null) { tokenizer.reset(); tokenizer.yyclose(); //close the reader } } /** * Sets whether we ignore keywords while scanning. Default is * <code>false</code> * * @param ignore true if we should ignore keywords */ public void setIgnoreKeywords(final boolean ignore) { config.ignoreKeywords = ignore; } /** * Sets whether we are scanning a full file, or a fragment. Default is * <code>true</code> * * @param full true if we are scanning a full file. */ public void setScanningFullContent(final boolean full) { config.completeContent = full; } /** * Sets whether we will find metadata constructs Default is * <code>true</code> * * @param aware true if we will find metadata */ public void setIsMetadataAware(final boolean aware) { config.findMetadata = aware; } /** * Sets the {@link ITokenStreamFilter} used to filter out unwanted tokens * * @param filter the token filter to alter the stream returned from the * tokenizer */ public void setTokenFilter(ITokenStreamFilter filter) { config.filter = filter; } /** * Sets the include handler used by this tokenizer to get * {@link IFileSpecification} for included files. * * @param handler {@link IncludeHandler} this tokenizer should use. */ public void setIncludeHandler(IncludeHandler handler) { includeHandler = handler; } /** * Indicated that we have tokenization problems. Can be called once scanning * has begun * * @return true if problems have been encountered */ public boolean hasTokenizationProblems() { return tokenizer.hasProblems() || problems.size() > 0; } /** * Indicated whether this tokenizer has encountered include statements, * regardless of whether it is set to follow them or not * * @return true if we have encountered includes */ public boolean hasEncounteredIncludeStatements() { return hasEncounteredIncludeStatements; } /** * Returns a collection of problems that have been encountered while * scanning. * * @return a list of problems, never null */ public List<ICompilerProblem> getTokenizationProblems() { ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems); problems.addAll(tokenizer.getProblems()); return problems; } public ASToken[] getTokens(final Reader reader, ITokenStreamFilter filter) { setReader(reader); List<ASToken> tokenList = initializeTokenList(reader); ASToken token = null; do { token = next(); if (token != null && filter.accept(token)) tokenList.add(token.clone()); //make a copy because of object pool } while (token != null); return tokenList.toArray(new ASToken[0]); } @Override public ASToken[] getTokens(final Reader reader) { if (config.filter != null) return getTokens(reader, config.filter); setReader(reader); List<ASToken> tokenList = initializeTokenList(reader); ASToken token = null; do { token = next(); if (token != null) tokenList.add(token.clone()); //copy ctor because of object pool } while (token != null); return tokenList.toArray(new ASToken[0]); } /** * @param reader * @return */ private List<ASToken> initializeTokenList(final Reader reader) { List<ASToken> tokenList; int listSize = 8012; if (reader instanceof NonLockingStringReader) { //we know the length of this string. For string of length x, their are roughly x/5 tokens that //can be constructed from that string. size the array appropriately. listSize = 5; if (((NonLockingStringReader)reader).getLength() > 0) { listSize = Math.max((int)((NonLockingStringReader)reader).getLength() / 5, 5); } } else if (reader instanceof ILengthAwareReader && ((ILengthAwareReader)reader).getInputType() == InputType.FILE) { listSize = 9; if (((ILengthAwareReader)reader).getLength() > 0) { listSize = Math.max((int)((ILengthAwareReader)reader).getLength() / 9, 9); } } tokenList = new ArrayList<ASToken>(listSize); return tokenList; } @Override public IASToken[] getTokens(final String range) { return getTokens(new NonLockingStringReader(range)); } /** * Returns the next token that can be produced from the underlying reader * * @param filter an {@link ITokenStreamFilter} to restrict the tokens that * are returned * @return an ASToken, or null if no more tokens can be produced */ public final ASToken next(final ITokenStreamFilter filter) { ASToken retVal = null; while (true) { retVal = next(); if (retVal == null || filter.accept(retVal)) { break; } } return retVal; } /** * Returns the next token that can be produced from the underlying reader. * <p> * If the forked "include file tokenizer" is open (not null), return the * next token from it. If the forked tokenizer reaches the end of the * included file, close (set to null) the forked tokenizer and return token * from the main source file. * * @return an ASToken, or null if no more tokens can be produced */ public final ASToken next() { ASToken retVal = null; // If the lexer for the included file is open, read from the included tokenizer. boolean consumeSemi = false; try { // Return token from the main file. if (forkIncludeTokenizer != null) { retVal = forkIncludeTokenizer.next(); // Check if the forked tokenizer reached EOF. if (retVal == null) { closeIncludeTokenizer(); // We should consume the next semicolon we find. // Most include statements are terminated with a semicolon, // and because we read the contents of the included file, // this could cause problems with a semicolon in a place // we don't want it. consumeSemi = true; } else return retVal; } if (bufferSize > 0) { retVal = lookAheadBuffer.remove(0); bufferSize--; } else { retVal = nextTokenFromReader(); } if (retVal == null) return null; final int tokenType = retVal.getType(); switch (tokenType) { // if we're seeing each in this part of the loop, it's not a // syntactic keyword // since we do lookahead when we see "for", checking for "each" case TOKEN_RESERVED_WORD_EACH: treatKeywordAsIdentifier(retVal); processUserDefinedNamespace(retVal, 0); return retVal; case TOKEN_KEYWORD_INCLUDE: { if (lastToken != null) { int lastTokenType = lastToken.getType(); switch (lastTokenType) { case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FUNCTION: case TOKEN_RESERVED_WORD_GET: case TOKEN_RESERVED_WORD_SET: case TOKEN_OPERATOR_MEMBER_ACCESS: { retVal.setType(TOKEN_IDENTIFIER); return retVal; } } } // "followIncludes=false" is usually used for code model // partitioner. They want the "include" token. if (!config.followIncludes) return retVal; final ASToken token = LT(1); // "include" at EOF is always a keyword if (token == null) return retVal; if (!matches(token, TOKEN_LITERAL_STRING)) { treatKeywordAsIdentifier(retVal); // it's an identifier processUserDefinedNamespace(retVal, 0); } else { hasEncounteredIncludeStatements = true; // Consume the file path after the include token. consume(1); final String filenameTokenText = token.getText(); final String includeString = filenameTokenText.substring(1, filenameTokenText.length() - 1); if (sourcePath == null) throw new NullPointerException("Source file is needed for resolving included file path."); IFileSpecification includedFileSpec = null; //respond to problems from our file handler includedFileSpec = includeHandler.getFileSpecificationForInclude(sourcePath, includeString); // if (includedFileSpec == null) { ICompilerProblem problem = new FileNotFoundProblem(token, filenameTokenText); //the text will be the path not found problems.add(problem); retVal = next(); return retVal; } if (includeHandler.isCyclicInclude(includedFileSpec.getPath())) { ICompilerProblem problem = new CyclicalIncludesProblem(token); problems.add(problem); retVal = next(); return retVal; } else { // Fork a tokenizer for the included file try { forkIncludeTokenizer = createForIncludeFile(this, includedFileSpec, includeHandler); retVal = forkIncludeTokenizer.next(); } catch (FileNotFoundException fnfe) { includeHandler.handleFileNotFound(includedFileSpec); ICompilerProblem problem = new FileNotFoundProblem(token, includedFileSpec.getPath()); problems.add(problem); retVal = next(); return retVal; } } } // Recover from compiler problems and continue. if (retVal == null) { // Included file is empty. closeIncludeTokenizer(); // Fall back to main source. retVal = this.next(); } return retVal; } case TOKEN_RESERVED_WORD_CONFIG: if (matches(LT(1), TOKEN_RESERVED_WORD_NAMESPACE)) { //we config namespace retVal.setType(TOKEN_RESERVED_WORD_CONFIG); return retVal; } treatKeywordAsIdentifier(retVal); //identifier processUserDefinedNamespace(retVal, 0); return retVal; case HIDDEN_TOKEN_BUILTIN_NS: if (matches(LT(1), TOKEN_OPERATOR_NS_QUALIFIER)) { //we have public:: and this structure is not an annotation but a name ref retVal.setType(TOKEN_NAMESPACE_NAME); return retVal; } retVal.setType(TOKEN_NAMESPACE_ANNOTATION); return retVal; case TOKEN_MODIFIER_DYNAMIC: case TOKEN_MODIFIER_FINAL: case TOKEN_MODIFIER_NATIVE: case TOKEN_MODIFIER_OVERRIDE: case TOKEN_MODIFIER_STATIC: case TOKEN_MODIFIER_VIRTUAL: { // previous token is either a modifier or a namespace, or if // null, assume keyword // next token is from a definition or a modifier or a namespace final ASToken nextToken = LT(1); if (nextToken != null) { switch (nextToken.getType()) { case TOKEN_KEYWORD_CLASS: case TOKEN_KEYWORD_FUNCTION: case TOKEN_KEYWORD_INTERFACE: case TOKEN_RESERVED_WORD_NAMESPACE: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_CONST: case TOKEN_MODIFIER_DYNAMIC: case TOKEN_MODIFIER_FINAL: case TOKEN_MODIFIER_NATIVE: case TOKEN_MODIFIER_OVERRIDE: case TOKEN_MODIFIER_STATIC: case TOKEN_MODIFIER_VIRTUAL: case TOKEN_NAMESPACE_ANNOTATION: case TOKEN_NAMESPACE_NAME: case HIDDEN_TOKEN_BUILTIN_NS: return retVal; case TOKEN_IDENTIFIER: if (isUserDefinedNamespace(nextToken, 1)) // we're already looking ahead one so make sure we look ahead one further return retVal; default: // Not applicable to other token types. break; } } treatKeywordAsIdentifier(retVal); processUserDefinedNamespace(retVal, 0); return retVal; } //we combine +/- for numeric literals here case TOKEN_OPERATOR_MINUS: case TOKEN_OPERATOR_PLUS: { if (lastToken == null || !lastToken.canPreceedSignedOperator()) { final ASToken nextToken = LT(1); if (nextToken != null) { switch (nextToken.getType()) { case TOKEN_LITERAL_NUMBER: case TOKEN_LITERAL_HEX_NUMBER: retVal.setEnd(nextToken.getEnd()); final StringBuilder builder = new StringBuilder(retVal.getText()); builder.append(nextToken.getText()); retVal.setText(poolString(builder.toString())); consume(1); retVal.setType(nextToken.getType()); break; default: // ignore other tokens break; } } } return retVal; } //RECOGNIZE: for each case TOKEN_KEYWORD_FOR: { final ASToken token = LT(1); if (matches(token, TOKEN_RESERVED_WORD_EACH)) { retVal.setEnd(token.getEnd()); retVal.setText(FOR_EACH); consume(1); return retVal; } if (lastToken != null) { int lastTokenType = lastToken.getType(); switch (lastTokenType) { case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FUNCTION: case TOKEN_RESERVED_WORD_GET: case TOKEN_RESERVED_WORD_SET: case TOKEN_OPERATOR_MEMBER_ACCESS: retVal.setType(TOKEN_IDENTIFIER); } } return retVal; } //RECOGNIZE: default xml namespace //default xml namespace must exist on the same line case TOKEN_KEYWORD_DEFAULT: { final ASToken maybeNS = LT(2); final boolean foundTokenNamespace = maybeNS != null && maybeNS.getType() == TOKEN_RESERVED_WORD_NAMESPACE; final ASToken maybeXML = LT(1); if (foundTokenNamespace) { final boolean foundTokenXML = maybeXML != null && maybeXML.getType() == TOKEN_IDENTIFIER && XML.equals(maybeXML.getText()); if (!foundTokenXML) { final ICompilerProblem problem = new ExpectXmlBeforeNamespaceProblem(maybeNS); problems.add(problem); } //combine all of these tokens together retVal.setEnd(maybeNS.getEnd()); retVal.setText(DEFAULT_XML_NAMESPACE); retVal.setType(TOKEN_DIRECTIVE_DEFAULT_XML); consume(2); } // if this isn't "default xml namespace" then // see if it is the default case in a switch // otherwise, assume it is an identiferName else if (maybeXML != null && maybeXML.getType() != TOKEN_COLON) retVal.setType(TOKEN_IDENTIFIER); else if (lastToken != null) { int lastTokenType = lastToken.getType(); switch (lastTokenType) { case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FUNCTION: case TOKEN_RESERVED_WORD_GET: case TOKEN_RESERVED_WORD_SET: case TOKEN_OPERATOR_MEMBER_ACCESS: retVal.setType(TOKEN_IDENTIFIER); } } return retVal; } case TOKEN_KEYWORD_VOID: { //check for void 0 final ASToken token = LT(1); if (matches(token, TOKEN_LITERAL_NUMBER) && ZERO.equals(token.getText())) { retVal.setType(TOKEN_VOID_0); combineText(retVal, token); consume(1); } //check for void(0) else if (matches(token, TOKEN_PAREN_OPEN)) { final ASToken zeroT = LT(2); if (matches(zeroT, TOKEN_LITERAL_NUMBER) && ZERO.equals(zeroT.getText())) { final ASToken closeParenT = LT(3); if (matches(closeParenT, TOKEN_PAREN_CLOSE)) { combineText(retVal, token); combineText(retVal, zeroT); combineText(retVal, closeParenT); retVal.setType(TOKEN_VOID_0); consume(3); } } } return retVal; } case TOKEN_IDENTIFIER: { //check for user-defined namespace before we return anything processUserDefinedNamespace(retVal, 0); return retVal; } //this is for metadata processing case TOKEN_SQUARE_OPEN: { retVal = tryParseMetadata(retVal); return retVal; } case HIDDEN_TOKEN_STAR_ASSIGNMENT: { //this is to solve an ambiguous case, where we can't tell the difference between //var foo:*=null and foo *= null; retVal.setType(TOKEN_OPERATOR_STAR); retVal.setEnd(retVal.getEnd() - 1); retVal.setText("*"); //add the equals final ASToken nextToken = tokenizer.buildToken(TOKEN_OPERATOR_ASSIGNMENT, retVal.getEnd() + 1, retVal.getEnd() + 2, retVal.getLine(), retVal.getColumn(), "="); nextToken.setSourcePath(sourcePath); addTokenToBuffer(nextToken); return retVal; } case TOKEN_SEMICOLON: if (consumeSemi) { return next(); } return retVal; case TOKEN_VOID_0: case TOKEN_LITERAL_REGEXP: case TOKEN_COMMA: case TOKEN_COLON: case TOKEN_PAREN_OPEN: case TOKEN_PAREN_CLOSE: case TOKEN_SQUARE_CLOSE: case TOKEN_ELLIPSIS: case TOKEN_OPERATOR_PLUS_ASSIGNMENT: case TOKEN_OPERATOR_MINUS_ASSIGNMENT: case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT: case TOKEN_OPERATOR_DIVISION_ASSIGNMENT: case TOKEN_OPERATOR_MODULO_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT: case TOKEN_OPERATOR_STAR: case TOKEN_OPERATOR_NS_QUALIFIER: case TOKEN_ASDOC_COMMENT: case TOKEN_OPERATOR_DIVISION: case TOKEN_OPERATOR_MODULO: case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT: case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT: case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT: case TOKEN_OPERATOR_LESS_THAN: case TOKEN_OPERATOR_GREATER_THAN: case TOKEN_OPERATOR_LESS_THAN_EQUALS: case TOKEN_OPERATOR_GREATER_THAN_EQUALS: case TOKEN_OPERATOR_EQUAL: case TOKEN_OPERATOR_NOT_EQUAL: case TOKEN_OPERATOR_STRICT_EQUAL: case TOKEN_OPERATOR_STRICT_NOT_EQUAL: case TOKEN_OPERATOR_BITWISE_AND: case TOKEN_OPERATOR_BITWISE_XOR: case TOKEN_OPERATOR_BITWISE_OR: case TOKEN_OPERATOR_LOGICAL_AND: case TOKEN_OPERATOR_LOGICAL_OR: case TOKEN_OPERATOR_LOGICAL_AND_ASSIGNMENT: case TOKEN_OPERATOR_LOGICAL_OR_ASSIGNMENT: case TOKEN_TYPED_COLLECTION_OPEN: case TOKEN_TYPED_COLLECTION_CLOSE: case TOKEN_OPERATOR_MEMBER_ACCESS: case TOKEN_RESERVED_WORD_NAMESPACE: case TOKEN_RESERVED_WORD_GET: case TOKEN_RESERVED_WORD_SET: case TOKEN_OPERATOR_ASSIGNMENT: case TOKEN_TYPED_LITERAL_CLOSE: case TOKEN_TYPED_LITERAL_OPEN: case TOKEN_OPERATOR_TERNARY: case TOKEN_OPERATOR_DECREMENT: case TOKEN_OPERATOR_INCREMENT: case TOKEN_OPERATOR_ATSIGN: case TOKEN_OPERATOR_BITWISE_NOT: case TOKEN_OPERATOR_LOGICAL_NOT: case TOKEN_E4X_BINDING_CLOSE: case TOKEN_E4X_BINDING_OPEN: case TOKEN_OPERATOR_DESCENDANT_ACCESS: case TOKEN_NAMESPACE_ANNOTATION: case TOKEN_NAMESPACE_NAME: case TOKEN_BLOCK_OPEN: case TOKEN_BLOCK_CLOSE: case TOKEN_KEYWORD_FUNCTION: return retVal; case HIDDEN_TOKEN_MULTI_LINE_COMMENT: case HIDDEN_TOKEN_SINGLE_LINE_COMMENT: if (tokenizer.isCollectingComments()) { return retVal; } assert (false); return null; case TOKEN_KEYWORD_INSTANCEOF: case TOKEN_KEYWORD_AS: case TOKEN_KEYWORD_IN: case TOKEN_KEYWORD_IS: if (lastToken != null) { int lastTokenType = lastToken.getType(); switch (lastTokenType) { case TOKEN_SEMICOLON: case TOKEN_BLOCK_OPEN: case TOKEN_COMMA: retVal.setType(TOKEN_IDENTIFIER); return retVal; } } else { // we are first token so assume identifier retVal.setType(TOKEN_IDENTIFIER); return retVal; } // and fall through case TOKEN_KEYWORD_DELETE: ASToken nextToken = LT(1); if (nextToken != null) { int nextTokenType = nextToken.getType(); switch (nextTokenType) { // if followed by a token assume it is the // keyword and not the identiferName; case TOKEN_IDENTIFIER: return retVal; // followed by a comma or semicolon // probably being used in an expression case TOKEN_COMMA: case TOKEN_SEMICOLON: retVal.setType(TOKEN_IDENTIFIER); return retVal; } } // and fall through case TOKEN_KEYWORD_BREAK: case TOKEN_KEYWORD_CASE: case TOKEN_KEYWORD_CATCH: case TOKEN_KEYWORD_CLASS: case TOKEN_KEYWORD_CONST: case TOKEN_KEYWORD_CONTINUE: case TOKEN_KEYWORD_DO: case TOKEN_KEYWORD_ELSE: case TOKEN_KEYWORD_FALSE: case TOKEN_KEYWORD_FINALLY: case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_IMPORT: case TOKEN_KEYWORD_INTERFACE: case TOKEN_KEYWORD_NULL: case TOKEN_KEYWORD_PACKAGE: case TOKEN_KEYWORD_SUPER: case TOKEN_KEYWORD_SWITCH: case TOKEN_KEYWORD_THIS: case TOKEN_KEYWORD_TRUE: case TOKEN_KEYWORD_TRY: case TOKEN_KEYWORD_TYPEOF: case TOKEN_KEYWORD_USE: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_WHILE: case TOKEN_KEYWORD_WITH: case TOKEN_KEYWORD_RETURN: case TOKEN_KEYWORD_THROW: case TOKEN_KEYWORD_NEW: if (lastToken != null) { int lastTokenType = lastToken.getType(); switch (lastTokenType) { case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FUNCTION: case TOKEN_RESERVED_WORD_GET: case TOKEN_RESERVED_WORD_SET: case TOKEN_OPERATOR_MEMBER_ACCESS: retVal.setType(TOKEN_IDENTIFIER); } } return retVal; default: if (ASToken.isE4X(tokenType)) return retVal; if (retVal.isKeywordOrContextualReservedWord() || retVal.isLiteral()) return retVal; // If we reach here, the token fails to match any processing logic. final UnexpectedTokenProblem problem = new UnexpectedTokenProblem( retVal, ASTokenKind.UNKNOWN); problems.add(problem); } } catch (final Exception e) { if (lastException != null) { if (lastException.getClass().isInstance(e)) { ICompilerProblem problem = new InternalCompilerProblem2(sourcePath, e, "StreamingASTokenizer"); problems.add(problem); return null; } } else { lastException = e; retVal = null; return next(); } } finally { consumeSemi = false; lastToken = retVal; } return null; } /** * Error recovery: convert the given keyword token into an identifier token, * and log a syntax error. * * @param token Keyword token. */ private void treatKeywordAsIdentifier(final ASToken token) { assert token != null : "token can't be null"; assert token.isKeywordOrContextualReservedWord() : "only transfer reserved words"; if (token.isKeyword()) { final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(token, ASTokenKind.IDENTIFIER); problems.add(problem); } token.setType(TOKEN_IDENTIFIER); } /** * Decide within the current context whether the following content can be * parsed as a metadata tag token. * * @param nextToken The next token coming from * {@link #nextTokenFromReader()}. * @return If the following content can be a metadata tag, the result is a * token of type {@link ASTokenTypes#TOKEN_ATTRIBUTE}. Otherwise, the * argument {@code nextToken} is returned. * @throws Exception Parsing error. */ private ASToken tryParseMetadata(ASToken nextToken) throws Exception { // Do not initialize this variable so that Java flow-analysis can check if // the following rules cover all the possibilities. final boolean isNextMetadata; if (!config.findMetadata) { // The lexer is configured to not recognize metadata. isNextMetadata = false; } else if (lastToken == null) { // An "[" at the beginning of a script is always a part of a metadata. isNextMetadata = true; } else { switch (lastToken.getType()) { case TOKEN_ASDOC_COMMENT: case TOKEN_SEMICOLON: case TOKEN_ATTRIBUTE: case TOKEN_BLOCK_OPEN: // "[" after these tokens are always part of a metadata token. isNextMetadata = true; break; case TOKEN_SQUARE_CLOSE: case TOKEN_IDENTIFIER: // "[" following a "]" is an array access. // "[" following an identifier is an array access. isNextMetadata = false; break; case TOKEN_KEYWORD_INCLUDE: case TOKEN_BLOCK_CLOSE: case TOKEN_OPERATOR_STAR: // "[" after these tokens are part of a metadata token, if // the "[" is on a new line. isNextMetadata = !lastToken.matchesLine(nextToken); break; default: // If we are lexing an entire file // then at this point we "know" that the next token // is not meta-data. if (config.completeContent) { isNextMetadata = false; } else { // In "fragment" mode which is used by the syntax coloring code // in builder, we assume the following list of tokens can not // precede meta-data because they all start or occur in expressions. switch (lastToken.getType()) { case TOKEN_OPERATOR_EQUAL: case TOKEN_OPERATOR_TERNARY: case TOKEN_COLON: case TOKEN_OPERATOR_PLUS: case TOKEN_OPERATOR_MINUS: case TOKEN_OPERATOR_STAR: case TOKEN_OPERATOR_DIVISION: case TOKEN_OPERATOR_MODULO: case TOKEN_OPERATOR_BITWISE_AND: case TOKEN_OPERATOR_BITWISE_OR: case TOKEN_KEYWORD_AS: case TOKEN_OPERATOR_BITWISE_XOR: case TOKEN_OPERATOR_LOGICAL_AND: case TOKEN_OPERATOR_LOGICAL_OR: case TOKEN_PAREN_OPEN: case TOKEN_COMMA: case TOKEN_OPERATOR_BITWISE_NOT: case TOKEN_OPERATOR_LOGICAL_NOT: case TOKEN_OPERATOR_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT: case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT: case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT: case TOKEN_OPERATOR_LESS_THAN: case TOKEN_OPERATOR_GREATER_THAN: case TOKEN_OPERATOR_LESS_THAN_EQUALS: case TOKEN_OPERATOR_GREATER_THAN_EQUALS: case TOKEN_OPERATOR_NOT_EQUAL: case TOKEN_OPERATOR_STRICT_EQUAL: case TOKEN_OPERATOR_STRICT_NOT_EQUAL: case TOKEN_OPERATOR_PLUS_ASSIGNMENT: case TOKEN_OPERATOR_MINUS_ASSIGNMENT: case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT: case TOKEN_OPERATOR_DIVISION_ASSIGNMENT: case TOKEN_OPERATOR_MODULO_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT: case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT: isNextMetadata = false; break; default: isNextMetadata = true; break; } } break; } } final ASToken result; if (isNextMetadata) result = consumeMetadata(nextToken); else result = nextToken; return result; } /** * Close the forked include file tokenizer, and set it to null. */ private void closeIncludeTokenizer() { if (forkIncludeTokenizer == null) return; try { problems.addAll(forkIncludeTokenizer.problems); forkIncludeTokenizer.close(); } catch (IOException e) { throw new RuntimeException(e); } includeHandler.leaveFile(forkIncludeTokenizer.getEndOffset()); forkIncludeTokenizer = null; } /** * @throws Exception */ private final ASToken consumeMetadata(final ASToken startToken) throws Exception { final ASToken originalToken = new ASToken(startToken); MetaDataPayloadToken payload = new MetaDataPayloadToken(originalToken); final ArrayList<ASToken> safetyNet = new ArrayList<ASToken>(5); boolean isMetadata = true; while (true) { tokenizer.setReuseLastToken(); final ASToken next = LT(1); if (next == null) { break; } safetyNet.add(new ASToken(next)); //sadly, we have to deal with the extra object creation if we're wrong payload.addToken(next); //here too if (!next.canExistInMetadata()) { isMetadata = false; //consume the last token we saw so that we don't get ourselves into an infinite loop //it was the last token of the metadata, and this makes "next" the current token. consume(1); break; } consume(1); if (next.getType() == TOKEN_SQUARE_CLOSE) { break; } } if (!isMetadata) { //we're wrong, so let's add back the tokens to our lookahead buffer lookAheadBuffer.addAll(safetyNet); bufferSize = lookAheadBuffer.size(); return originalToken; } return payload; } private final void fill(final int distance) throws Exception { int pos = 0; while (pos < distance) { addTokenToBuffer(nextTokenFromReader()); pos++; } } /** * @param nextToken */ private final void addTokenToBuffer(final ASToken nextToken) { bufferSize++; lookAheadBuffer.add(nextToken); // at EOF, nextToken can be null. if (nextToken != null) nextToken.lock(); } /** * Get the pooled version of a given string. * * @param text String literal. * @return Pooled string. */ private final String poolString(final String text) { String pooledString = stringPool.get(text); if (pooledString == null) { stringPool.put(text, text); pooledString = text; } return pooledString; } /** * Get the next token from the source input. If this tokenizer is created * for a source file by {@link ASC}, and there are files included by * {@code -in} option, the tokenizer will return the * "injected include tokens" before real tokens coming from the JFlex * generated tokenizer. * * @return next token from the source input * @throws IOException error * @see ASCompilationUnit#createMainCompilationUnitForASC() */ private final ASToken nextTokenFromReader() throws IOException { final ASToken nextToken; if (ascIncludeImaginaryTokens != null && ascIncludeImaginaryTokens.hasNext()) nextToken = ascIncludeImaginaryTokens.next(); else if (tokenizer.hasBufferToken()) nextToken = tokenizer.getBufferToken(); else nextToken = tokenizer.nextToken(); if (nextToken != null) { // Converting unicode on-the-fly in the lexer is much slower than // converting it here after the token is made, especially for // identifiers. switch (nextToken.getType()) { case TOKEN_LITERAL_NUMBER: nextToken.setText(poolString(nextToken.getText())); break; case TOKEN_LITERAL_REGEXP: // Any "backslash-u" entities left after "convertUnicode" // are invalid unicode escape sequences. According to AS3 // behavior, the backslash character is dropped. nextToken.setText(poolString(convertUnicode(nextToken.getText()).replaceAll("\\\\u", "u"))); break; case TOKEN_IDENTIFIER: // Intern 'identifiers' and 'keywords'. // 'keywords' were 'identifiers' before they are analyzed. final String originalIdentifierName = nextToken.getText(); final String normalizedIdentifierName = poolString(convertUnicode(originalIdentifierName)); nextToken.setText(normalizedIdentifierName); if (!config.ignoreKeywords) { /** * If the identifier has escaped unicode sequence, it * can't be a keyword. * <p> * According to ASL syntax spec chapter 3.4: * <blockquote> Unicode escape sequences may be used to * spell the names of identifiers that would otherwise * be keywords. This is in contrast to ECMAScript. * </blockquote> */ if (originalIdentifierName.equals(normalizedIdentifierName)) { // do keyword analysis here final Integer info = keywordToTokenMap.get(nextToken.getText()); if (info != null) nextToken.setType(info); } } break; default: // Ignore other tokens. break; } //so we want to adjust all of our offsets here, BUT //the column is really only valid for the first line, which is line 0. //if we're not the first line, don't bother nextToken.adjustLocation( offsetAdjustment, lineAdjustment, nextToken.getLine() == 0 ? columnAdjustment : 0); nextToken.storeLocalOffset(); if (includeHandler != null) { nextToken.setSourcePath(includeHandler.getIncludeStackTop()); includeHandler.onNextToken(nextToken); } if (nextToken.getSourcePath() == null) nextToken.setSourcePath(sourcePath); if (reader instanceof SourceFragmentsReader) ((SourceFragmentsReader)reader).adjustLocation(nextToken); } return nextToken; } /** * Consume tokens in the buffer * * @param distance the number of tokens to consume */ private final void consume(int distance) { if (bufferSize >= distance) { for (; distance > 0; distance--) { lookAheadBuffer.remove(bufferSize - 1); bufferSize--; } } } /** * Returns the next token that will be produced by the underlying lexer * * @param distance distance to look ahead * @return an {@link ASToken} * @throws Exception */ private final ASToken LT(final int distance) throws Exception { if (bufferSize < distance) { fill(distance - bufferSize); } return lookAheadBuffer.get(distance - 1); } private static final boolean matches(final ASToken token, final int type) { return token != null && token.getType() == type; } /** * Retrieve the end offset of the file. * <p> * The result is the end offset of the file, not the offset of the last * token, this allows any trailing space to be included so that the parser * can span the result {@code FileNode} to the entire file. * * @return the end offset of the input file */ public final int getEndOffset() { return tokenizer.getOffset() + offsetAdjustment; } /** * Computers whether the following token is a user-defined namespace. This * method calls processUserDefinedNamespace which will change token types * * @param token token to start our analysis * @param lookaheadOffset offset of the tokens to look at * @return true if we're a user-defined namespace * @throws Exception */ private final boolean isUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception { processUserDefinedNamespace(token, lookaheadOffset); return token.getType() == TOKEN_NAMESPACE_ANNOTATION || token.getType() == TOKEN_NAMESPACE_NAME; } /** * Because AS3 supports qualified/unqualified namespaces as decorators on * definitions, we need to detect them before we even make it to the parser. * These look exactly like names/qnames, and so if they're on the same line * as a definition they might be a namespace name instead of a standard * identifier. This method will detect these cases, and change token types * accordingly * * @param token token token to start our analysis * @param lookaheadOffset offset of the tokens to look at * @throws Exception */ private final void processUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception { token.lock(); //determine if we have a user-defined namespace //our first token will be an identifier, and the cases we're looking for are: //1.) user_namespace (function|var|dynamic|static|final|native|override) //2.) my.pack.user_namespace (function|var|dynamic|static|final|native|override) //option number 1 is probably the 99% case so optimize for it ASToken nextToken = LT(1 + lookaheadOffset); if (token.matchesLine(nextToken)) { // If the next token is an identifier check to see if it should // be modified to a TOKEN_NAMESPACE_ANNOTATION // This is so that code like: // ns1 ns2 var x; // gets parsed correctly (2 namespace annotations, which is an error) if (nextToken.getType() == TOKEN_IDENTIFIER) processUserDefinedNamespace(nextToken, 1 + lookaheadOffset); switch (nextToken.getType()) { case TOKEN_KEYWORD_FUNCTION: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_CONST: case TOKEN_RESERVED_WORD_NAMESPACE: case TOKEN_MODIFIER_DYNAMIC: case TOKEN_MODIFIER_FINAL: case TOKEN_MODIFIER_NATIVE: case TOKEN_MODIFIER_OVERRIDE: case TOKEN_MODIFIER_STATIC: case TOKEN_MODIFIER_VIRTUAL: case TOKEN_KEYWORD_CLASS: case TOKEN_KEYWORD_INTERFACE: case TOKEN_NAMESPACE_ANNOTATION: case HIDDEN_TOKEN_BUILTIN_NS: token.setType(TOKEN_NAMESPACE_ANNOTATION); return; case TOKEN_OPERATOR_NS_QUALIFIER: //simple name with a :: binding after it. has to be a NS token.setType(TOKEN_NAMESPACE_NAME); return; } if (nextToken.getType() == TOKEN_OPERATOR_MEMBER_ACCESS) { int nextValidPart = TOKEN_IDENTIFIER; final ArrayList<ASToken> toTransform = new ArrayList<ASToken>(3); toTransform.add(token); toTransform.add(nextToken); int laDistance = lookaheadOffset + 1; while (true) { nextToken = LT(++laDistance); if (token.matchesLine(nextToken)) { if (nextToken.getType() == nextValidPart) { nextValidPart = (nextToken.getType() == TOKEN_IDENTIFIER) ? TOKEN_OPERATOR_MEMBER_ACCESS : TOKEN_IDENTIFIER; toTransform.add(nextToken); } else if (nextValidPart != TOKEN_IDENTIFIER && nextToken.canFollowUserNamespace()) { // Next token is in the follow set of a namespace, // so all the buffered tokens need to be converted // into namespace tokens. for (final ASToken ttToken : toTransform) { if (ttToken.getType() == TOKEN_IDENTIFIER) ttToken.setType(TOKEN_NAMESPACE_ANNOTATION); else ttToken.setType(TOKEN_OPERATOR_MEMBER_ACCESS); } break; } else { break; } } else { break; } } } } } /** * Combines the text of two tokens, adding whitespace between them and * adjusting offsets appropriately * * @param target the base token that we will add the next to * @param source the source of the text to add */ private final void combineText(TokenBase target, TokenBase source) { StringBuilder text = new StringBuilder(); text.append(target.getText()); //add whitespace for gaps between tokens for (int i = 0; i < (source.getStart() - target.getEnd()); i++) { text.append(" "); } text.append(source.getText()); target.setText(poolString(text.toString())); target.setEnd(target.getStart() + text.length()); } /** * Unicode pattern for {@code \u0000}. */ private static final Pattern UNICODE_PATTERN = Pattern.compile(BaseRawASTokenizer.PATTERN_U4); /** * Leading characters of a unicode pattern. */ private static final String UNICODE_LEADING_CHARS = "\\u"; /** * Convert escaped unicode sequence in a string. For example: * {@code foo\u0051bar} is converted into {@code fooQbar}. * * @param text input string * @return converted text */ static String convertUnicode(final String text) { // Calling Pattern.matcher() is much slower than String.contains(), so // we need this predicate to skip unnecessary RegEx computation. if (text.contains(UNICODE_LEADING_CHARS)) { final StringBuilder result = new StringBuilder(); final Matcher matcher = UNICODE_PATTERN.matcher(text); int start = 0; while (matcher.find()) { result.append(text, start, matcher.start()); result.append(Character.toChars(BaseRawASTokenizer.decodeEscapedUnicode(matcher.group()))); start = matcher.end(); } result.append(text, start, text.length()); return result.toString(); } else { return text; } } /** * Gets the source path to the file being tokenized. */ public String getSourcePath() { return sourcePath; } }