/* * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.apache.flex.compiler.internal.parsing.as; import org.apache.flex.compiler.common.ISourceLocation; import org.apache.flex.compiler.problems.ASDocNotClosedProblem; import org.apache.flex.compiler.problems.CDataNotClosedProblem; import org.apache.flex.compiler.problems.CommentNotClosedProblem; import org.apache.flex.compiler.problems.ICompilerProblem; import org.apache.flex.compiler.problems.StringLiteralMustBeTerminatedBeforeLineBreakProblem; import org.apache.flex.compiler.problems.StringLiteralNotClosedProblem; /** * Base class for RawActionScriptTokenizer. Pulling out code into Java backing * class so it's easier to deal with. Uses a token pool to avoid object * creation. The token pool will keep a buffer of 10 tokens in memory, and it is * generally unsafe to hold on to tokens passed the point they are needed. If * tokens are to be preserved, their copy constructor should be used */ public abstract class BaseRawASTokenizer extends BaseRawTokenizer<ASToken> { /** * Tracks if we are in a close tag. Used in the generated code */ protected boolean isInCloseTag = false; /** * The depth of open tags, used to determine if we're still in E4X content. * Each time we see a tag open, this count is incremented. When we see any * kind of tag close (</ or />) we decrement the counter. */ protected int e4xTagDepth = 0; /** * The depth of the braces in E4X content. Used to determine when to pop out * of E4X databindings and back to E4X content */ protected int e4xBraceBalance = 0; /** * State to return to from E4X. Used in the generated code */ protected int e4xReturnState = RawASTokenizer.E4X; /** * Depth of typed collection constructs. Used in the generated code */ protected int typedDepth = 0; /** * Nested '<' bracket level (for <!DOCTYPE et al) */ protected int docTypeLevel; /** * Flag to indicate we should collect comments */ protected boolean collectComments = false; /** * token that we may need to return for rules that may return more than one * token */ protected ASToken bufferToken; @Override protected void continueAggregate() { if (!hasAggregateContents()) super.startAggregate(); else super.continueAggregate(); } /** * Convert the unicode escape sequence and append the unicode character to * text buffer. The input <b>must</b> have the leading escape character "\". * * @param escapeSequence Unicode escape sequence like {@code \u00FF} or * {@code \xFF}. */ protected final void aggregateEscapedUnicodeChar(final String escapeSequence) { final int unicode = decodeEscapedUnicode(escapeSequence); //check to make sure we are in valid unicode range if (!Character.isValidCodePoint(unicode)) addBadCharacterProblem(yytext()); else continueAggregate(Character.toChars(unicode)); } /** * Build a token from the aggregation buffer. * * @param type Type of the new token. * @return New token. */ @Override public final ASToken buildAggregateToken(final int type) { final ASToken token = fetchToken( type, aggregateStart, getOffset() + (markedPosition() - readStart()), aggregateStartLine, aggregateStartColumn, super.aggregateContents); aggregateContents = null; setLastToken(token); return token; } /** * Build an e4x text token from a given entity type. */ protected final ASToken buildE4XTextToken(final int type) { final ASToken token = buildToken(type); if (hasAggregateContents()) { bufferToken = token; final int ttype; if (((RawASTokenizer)this).yystate() == RawASTokenizer.E4XTEXTVALUE) ttype = ASTokenTypes.TOKEN_E4X_TEXT; else ttype = ASTokenTypes.TOKEN_E4X_STRING; return buildAggregateToken(ttype); } else { aggregateContents = null; return token; } } @Override protected final void fillBuffer(StringBuilder builder) { builder.append(buffer(), readStart(), markedPosition() - readStart()); } /** * returns the start of the current read * * @return a non-negative int */ protected abstract int readStart(); /** * returns the marked position in the current read * * @return a non-negative int */ protected abstract int markedPosition(); /** * returns the end of the current read * * @return a non-negative int */ protected abstract int readEnd(); /** * returns the current active buffer. this will change and is not constant * * @return the current buffer */ protected abstract char[] buffer(); /** * True if we want to collect comments, besides ASDoc. ASDoc comments will * always be returned * * @param collect <code>true</code to collect non-ASDoc comments, * <code>false</code> to ignore them. */ public void setCollectComments(final boolean collect) { collectComments = collect; } /** * in E4X, <code><</code> is allowed in char and string literals. * * @param allow true if <code><</code> is allowed */ protected abstract void setAllowLTInE4XStringLiterals(boolean allow); @Override protected ASToken[] initTokenPool() { return new ASToken[10]; } /** * @return true if we are still parsing XML content */ public boolean isInXML() { return e4xTagDepth > 0 && e4xBraceBalance == 0; } /** * @return true if we are parsing inside of an E4x databinding expression */ public boolean isInE4XDatabinding() { return e4xBraceBalance > 0; } /** * begins the current state in the lexer * * @param state the state to begin */ protected abstract void yybegin(int state); /** * returns a char at the given offset into the internal char buffer * * @param pos the offset into the buffer * @return a char */ protected abstract char yycharat(int pos); /** * returns the length of the current read * * @return a non-negative int */ protected abstract int yylength(); @Override protected final ASToken newToken(final int type, final int start, final int end, final int line, final int column, final CharSequence text) { return new ASToken(type, start, end, line, column, text); } /** * @return true if we are collecting comments */ public final boolean isCollectingComments() { return collectComments; } public final ASToken getBufferToken() { final ASToken retVal = bufferToken.clone(); bufferToken = null; return retVal; } public final boolean hasBufferToken() { return bufferToken != null; } @Override public void reset() { super.reset(); e4xBraceBalance = 0; e4xTagDepth = 0; } /** * Matches escaped unicode sequence like {@code \u00FF}. */ static final String PATTERN_U4 = "\\\\u[a-fA-F0-9]{4}"; /** * Matches escaped unicode sequence like {@code \xFF}. */ private static final String PATTERN_X2 = "\\\\x[a-fA-F0-9]{2}"; /** * Matches either {@link #PATTERN_U4} or {@link #PATTERN_X2}. */ private static final String PATTERN_UNICODE = String.format("(%s)|(%s)", PATTERN_U4, PATTERN_X2); /** * Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF} * to HTML entities like {@code ÿ}. * * @param escapedUnicode Escaped unicode sequence in the form of either * {@code \u0000} or {@code \xFF}. * @return Encoded HTML entity string. */ protected String escapedUnicodeToHtmlEntity(final String escapedUnicode) { final int unicode = decodeEscapedUnicode(escapedUnicode); return String.format("&#x%H;", unicode); } /** * Report unexpected line terminators in a string literal. */ protected final void reportInvalidLineTerminatorInStringLiteral() { final ISourceLocation location = getCurrentSourceLocation(0); final ICompilerProblem problem = new StringLiteralMustBeTerminatedBeforeLineBreakProblem(location); getProblems().add(problem); } /** * Report syntax error: input ended before reaching the closing quotation * mark for a string literal. */ protected final void reportUnclosedStringLiteral() { final ISourceLocation location = getCurrentSourceLocation(0); final ICompilerProblem problem = new StringLiteralNotClosedProblem(location); getProblems().add(problem); } /** * Report syntax error: input ended before ASDoc is closed. */ protected final void reportUnclosedASDoc() { final ISourceLocation location = getCurrentSourceLocation(0); final ICompilerProblem problem = new ASDocNotClosedProblem(location); getProblems().add(problem); } /** * Report syntax error: input ended before Comment is closed. */ protected final void reportUnclosedComment() { final ISourceLocation location = getCurrentSourceLocation(0); final ICompilerProblem problem = new CommentNotClosedProblem(location); getProblems().add(problem); } /** * Report syntax error: input ended before CDATA is closed. */ protected final void reportUnclosedCDATA() { final ISourceLocation location = getCurrentSourceLocation(0); final ICompilerProblem problem = new CDataNotClosedProblem(location); getProblems().add(problem); } /** * Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF} * to unicode code point. * * @param escapedUnicode Escaped unicode sequence in the form of either * {@code \u0000} or {@code \xFF}. * @return Unicode number. */ protected static int decodeEscapedUnicode(final String escapedUnicode) { if (escapedUnicode == null) throw new IllegalArgumentException("Escape sequence can't be null"); if (!escapedUnicode.matches(PATTERN_UNICODE)) throw new IllegalStateException("Only call this method from a lexer rule that matches unicode sequence pattern."); return Integer.parseInt(escapedUnicode.substring(2), 16); } }