BaseRawASTokenizer.java example

Explorer
flex-falcon-master
/*
 *
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

package org.apache.flex.compiler.internal.parsing.as;

import org.apache.flex.compiler.common.ISourceLocation;
import org.apache.flex.compiler.problems.ASDocNotClosedProblem;
import org.apache.flex.compiler.problems.CDataNotClosedProblem;
import org.apache.flex.compiler.problems.CommentNotClosedProblem;
import org.apache.flex.compiler.problems.ICompilerProblem;
import org.apache.flex.compiler.problems.StringLiteralMustBeTerminatedBeforeLineBreakProblem;
import org.apache.flex.compiler.problems.StringLiteralNotClosedProblem;

/**
 * Base class for RawActionScriptTokenizer. Pulling out code into Java backing
 * class so it's easier to deal with. Uses a token pool to avoid object
 * creation. The token pool will keep a buffer of 10 tokens in memory, and it is
 * generally unsafe to hold on to tokens passed the point they are needed. If
 * tokens are to be preserved, their copy constructor should be used
 */
public abstract class BaseRawASTokenizer extends BaseRawTokenizer<ASToken>
{
    /**
     * Tracks if we are in a close tag. Used in the generated code
     */
    protected boolean isInCloseTag = false;

    /**
     * The depth of open tags, used to determine if we're still in E4X content.
     * Each time we see a tag open, this count is incremented. When we see any
     * kind of tag close (</ or />) we decrement the counter.
     */
    protected int e4xTagDepth = 0;

    /**
     * The depth of the braces in E4X content. Used to determine when to pop out
     * of E4X databindings and back to E4X content
     */
    protected int e4xBraceBalance = 0;

    /**
     * State to return to from E4X. Used in the generated code
     */
    protected int e4xReturnState = RawASTokenizer.E4X;

    /**
     * Depth of typed collection constructs. Used in the generated code
     */
    protected int typedDepth = 0;

/**
     * Nested '<' bracket level (for <!DOCTYPE et al)
     */
    protected int docTypeLevel;

    /**
     * Flag to indicate we should collect comments
     */
    protected boolean collectComments = false;

    /**
     * token that we may need to return for rules that may return more than one
     * token
     */
    protected ASToken bufferToken;

    @Override
    protected void continueAggregate()
    {
        if (!hasAggregateContents())
            super.startAggregate();
        else
            super.continueAggregate();
    }

    /**
     * Convert the unicode escape sequence and append the unicode character to
     * text buffer. The input <b>must</b> have the leading escape character "\".
     * 
     * @param escapeSequence Unicode escape sequence like {@code \u00FF} or
     * {@code \xFF}.
     */
    protected final void aggregateEscapedUnicodeChar(final String escapeSequence)
    {
        final int unicode = decodeEscapedUnicode(escapeSequence);
        //check to make sure we are in valid unicode range
        if (!Character.isValidCodePoint(unicode))
            addBadCharacterProblem(yytext());
        else
            continueAggregate(Character.toChars(unicode));
    }

    /**
     * Build a token from the aggregation buffer.
     * 
     * @param type Type of the new token.
     * @return New token.
     */
    @Override
    public final ASToken buildAggregateToken(final int type)
    {
        final ASToken token = fetchToken(
                type,
                aggregateStart,
                getOffset() + (markedPosition() - readStart()),
                aggregateStartLine,
                aggregateStartColumn,
                super.aggregateContents);
        aggregateContents = null;
        setLastToken(token);
        return token;
    }

    /**
     * Build an e4x text token from a given entity type.
     */
    protected final ASToken buildE4XTextToken(final int type)
    {
        final ASToken token = buildToken(type);
        if (hasAggregateContents())
        {
            bufferToken = token;
            final int ttype;
            if (((RawASTokenizer)this).yystate() == RawASTokenizer.E4XTEXTVALUE)
                ttype = ASTokenTypes.TOKEN_E4X_TEXT;
            else
                ttype = ASTokenTypes.TOKEN_E4X_STRING;
            return buildAggregateToken(ttype);
        }
        else
        {
            aggregateContents = null;
            return token;
        }
    }

    @Override
    protected final void fillBuffer(StringBuilder builder)
    {
        builder.append(buffer(), readStart(), markedPosition() - readStart());
    }

    /**
     * returns the start of the current read
     * 
     * @return a non-negative int
     */
    protected abstract int readStart();

    /**
     * returns the marked position in the current read
     * 
     * @return a non-negative int
     */
    protected abstract int markedPosition();

    /**
     * returns the end of the current read
     * 
     * @return a non-negative int
     */
    protected abstract int readEnd();

    /**
     * returns the current active buffer. this will change and is not constant
     * 
     * @return the current buffer
     */
    protected abstract char[] buffer();

    /**
     * True if we want to collect comments, besides ASDoc. ASDoc comments will
     * always be returned
     * 
     * @param collect <code>true</code to collect non-ASDoc comments,
     * <code>false</code> to ignore them.
     */
    public void setCollectComments(final boolean collect)
    {
        collectComments = collect;
    }

    /**
     * in E4X, <code><</code> is allowed in char and string literals.
     * 
     * @param allow true if <code><</code> is allowed
     */
    protected abstract void setAllowLTInE4XStringLiterals(boolean allow);

    @Override
    protected ASToken[] initTokenPool()
    {
        return new ASToken[10];
    }

    /**
     * @return true if we are still parsing XML content
     */
    public boolean isInXML()
    {
        return e4xTagDepth > 0 && e4xBraceBalance == 0;
    }

    /**
     * @return true if we are parsing inside of an E4x databinding expression
     */
    public boolean isInE4XDatabinding()
    {
        return e4xBraceBalance > 0;
    }

    /**
     * begins the current state in the lexer
     * 
     * @param state the state to begin
     */
    protected abstract void yybegin(int state);

    /**
     * returns a char at the given offset into the internal char buffer
     * 
     * @param pos the offset into the buffer
     * @return a char
     */
    protected abstract char yycharat(int pos);

    /**
     * returns the length of the current read
     * 
     * @return a non-negative int
     */
    protected abstract int yylength();

    @Override
    protected final ASToken newToken(final int type, final int start, final int end, final int line, final int column, final CharSequence text)
    {
        return new ASToken(type, start, end, line, column, text);
    }

    /**
     * @return true if we are collecting comments
     */
    public final boolean isCollectingComments()
    {
        return collectComments;
    }

    public final ASToken getBufferToken()
    {
        final ASToken retVal = bufferToken.clone();
        bufferToken = null;
        return retVal;
    }

    public final boolean hasBufferToken()
    {
        return bufferToken != null;
    }

    @Override
    public void reset()
    {
        super.reset();
        e4xBraceBalance = 0;
        e4xTagDepth = 0;
    }

    /**
     * Matches escaped unicode sequence like {@code \u00FF}.
     */
    static final String PATTERN_U4 = "\\\\u[a-fA-F0-9]{4}";

    /**
     * Matches escaped unicode sequence like {@code \xFF}.
     */
    private static final String PATTERN_X2 = "\\\\x[a-fA-F0-9]{2}";

    /**
     * Matches either {@link #PATTERN_U4} or {@link #PATTERN_X2}.
     */
    private static final String PATTERN_UNICODE = String.format("(%s)|(%s)", PATTERN_U4, PATTERN_X2);

    /**
     * Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF}
     * to HTML entities like {@code ÿ}.
     * 
     * @param escapedUnicode Escaped unicode sequence in the form of either
     * {@code \u0000} or {@code \xFF}.
     * @return Encoded HTML entity string.
     */
    protected String escapedUnicodeToHtmlEntity(final String escapedUnicode)
    {
        final int unicode = decodeEscapedUnicode(escapedUnicode);
        return String.format("&#x%H;", unicode);
    }

    /**
     * Report unexpected line terminators in a string literal.
     */
    protected final void reportInvalidLineTerminatorInStringLiteral()
    {
        final ISourceLocation location = getCurrentSourceLocation(0);
        final ICompilerProblem problem = new StringLiteralMustBeTerminatedBeforeLineBreakProblem(location);
        getProblems().add(problem);
    }

    /**
     * Report syntax error: input ended before reaching the closing quotation
     * mark for a string literal.
     */
    protected final void reportUnclosedStringLiteral()
    {
        final ISourceLocation location = getCurrentSourceLocation(0);
        final ICompilerProblem problem = new StringLiteralNotClosedProblem(location);
        getProblems().add(problem);
    }

    /**
     * Report syntax error: input ended before ASDoc is closed.
     */
    protected final void reportUnclosedASDoc()
    {
        final ISourceLocation location = getCurrentSourceLocation(0);
        final ICompilerProblem problem = new ASDocNotClosedProblem(location);
        getProblems().add(problem);
    }

    /**
     * Report syntax error: input ended before Comment is closed.
     */
    protected final void reportUnclosedComment()
    {
        final ISourceLocation location = getCurrentSourceLocation(0);
        final ICompilerProblem problem = new CommentNotClosedProblem(location);
        getProblems().add(problem);
    }

    /**
     * Report syntax error: input ended before CDATA is closed.
     */
    protected final void reportUnclosedCDATA()
    {
        final ISourceLocation location = getCurrentSourceLocation(0);
        final ICompilerProblem problem = new CDataNotClosedProblem(location);
        getProblems().add(problem);
    }

    /**
     * Convert escaped unicode sequence such as {@code \u00FF} and {@code \xFF}
     * to unicode code point.
     * 
     * @param escapedUnicode Escaped unicode sequence in the form of either
     * {@code \u0000} or {@code \xFF}.
     * @return Unicode number.
     */
    protected static int decodeEscapedUnicode(final String escapedUnicode)
    {
        if (escapedUnicode == null)
            throw new IllegalArgumentException("Escape sequence can't be null");

        if (!escapedUnicode.matches(PATTERN_UNICODE))
            throw new IllegalStateException("Only call this method from a lexer rule that matches unicode sequence pattern.");

        return Integer.parseInt(escapedUnicode.substring(2), 16);
    }

}