/*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.flex.compiler.internal.parsing.mxml;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.flex.compiler.common.MutablePrefixMap;
import org.apache.flex.compiler.common.PrefixMap;
import org.apache.flex.compiler.filespecs.FileSpecification;
import org.apache.flex.compiler.filespecs.IFileSpecification;
import org.apache.flex.compiler.internal.parsing.as.ASTokenTypes;
import org.apache.flex.compiler.parsing.IMXMLToken;
import org.apache.flex.compiler.parsing.IMXMLTokenizer;
import org.apache.flex.compiler.parsing.MXMLTokenTypes;
import org.apache.flex.compiler.problems.ICompilerProblem;
import org.apache.flex.compiler.problems.InternalCompilerProblem2;
import org.apache.flex.utils.NonLockingStringReader;
/**
* Tokenizes MXML files. Uses RawTagTokenizer to get basic tokens. Ignores comments (<!--...-->),
* processing instructions (<?...?>), and whitespace. Replaces CDATA tokens with text tokens (strips
* out the cdata stuff.
*/
public class MXMLTokenizer implements IMXMLTokenizer, Closeable
{
/**
* Start offset (for when you're parsing a section of the document that
* doesn't start at the beginning)
*/
protected int startOffset;
private int tagDepth = -1;
/**
* Specifies that we are within a tags content, ie inside < and >
*/
private boolean inTagContent = false;
private RawMXMLTokenizer tokenizer;
protected MXMLToken xmlNSToken = null;
protected MutablePrefixMap rootPrefixMap;
private MXMLToken postRepairToken = null;
private boolean isRepairing = true;
private boolean wasRepaired = false;
private static final int SIZE = 100;
private List<ICompilerProblem> problems;
private String path;
private MXMLToken lastToken = null;
private static final String SUB_SYSTEM = "MXMLTokenizer";
/**
* Constructor
*/
public MXMLTokenizer(String path)
{
tokenizer = new RawMXMLTokenizer();
tokenizer.setSourcePath(path);
problems = new ArrayList<ICompilerProblem>();
rootPrefixMap = new MutablePrefixMap();
this.path = path;
}
public MXMLTokenizer() {
this("");
}
public MXMLTokenizer(IFileSpecification specification) {
this(specification.getPath());
}
/**
* Reparse constructor. Allows you to start the tokenizer with a start
* offset (for when you're parsing a section of the document that doesn't
* start at the beginning).
* @param startOffset Start offset
*/
public MXMLTokenizer(int startOffset)
{
this("");
this.startOffset = startOffset;
}
public void setPath(String path) {
this.path = path;
tokenizer.setSourcePath(path);
}
public void setReader(Reader reader) {
tokenizer.reset();
tokenizer.yyreset(reader);
}
@Override
public void close() throws IOException
{
if (tokenizer != null)
{
tokenizer.reset();
tokenizer.yyclose(); //close the reader
}
}
/**
* If it exists, return the PrefixMap from the last parse
* @return a {@link PrefixMap} or null
*/
public PrefixMap getPrefixMap() {
return rootPrefixMap;
}
/**
* Sets a flag to indicate whether this tokenizer should try to repair its token stream
* @param isRepairing <code>true</code> to repair, <code>false</code> to not repair
*/
@Override
public void setIsRepairing(boolean isRepairing) {
this.isRepairing = isRepairing;
}
@Override
public IMXMLToken[] getTokens(Reader reader) {
List<MXMLToken> parseTokens = parseTokens(reader);
return parseTokens.toArray( new IMXMLToken[0]);
}
@Override
public IMXMLToken[] getTokens(String range) {
List<MXMLToken> parseTokens = parseTokens(new NonLockingStringReader(range));
return parseTokens.toArray( new IMXMLToken[0]);
}
/**
* Determines if the the tokenizer has encountered any problems as it lexed the given input
* @return true if we have encountered any problems
*/
public boolean hasTokenizationProblems() {
return tokenizer.hasProblems() || problems.size() > 0;
}
/**
* Processes the given input and builds a {@link PrefixMap} for the root tag found within this document
*/
public PrefixMap getRootTagPrefixMap() {
boolean cont = true;
do {
MXMLToken token = nextToken();
if(token == null || token.isTagEnd()) {
cont = false;
}
} while(cont);
return rootPrefixMap;
}
/**
* Returns a collection of problems encountered while processing the given input
* @return a {@link Collection} of {@link ICompilerProblem} objects, or an empty {@link Collection}
*/
public List<ICompilerProblem> getTokenizationProblems() {
ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems);
problems.addAll(tokenizer.getProblems());
return problems;
}
/**
* Returns the next token that can be produced from the given input, without performing any repair code
* @return an {@link MXMLToken} or null when no more tokens can be produced
*/
private final MXMLToken nextTokenInternal() {
MXMLToken retVal = null;
boolean cont = true;
while(cont) {
try
{
MXMLToken token = tokenizer.hasBufferToken() ? (MXMLToken)tokenizer.getBufferToken() : (MXMLToken)tokenizer.nextToken();
if(token == null)
return null;
MXMLToken mxmlToken = processToken(token);
if(mxmlToken != null) {
retVal = mxmlToken;
return retVal;
}
}
catch (Exception e)
{
ICompilerProblem problem = new InternalCompilerProblem2(path, e, SUB_SYSTEM);
problems.add(problem);
return null;
}
}
return null;
}
/**
* Returns the next token that can be produced from the given input
* @return an {@link MXMLToken} or null when no more tokens can be produced
*/
public MXMLToken nextToken() {
if(isRepairing) {
if(postRepairToken != null) {
MXMLToken retVal = postRepairToken;
postRepairToken = null;
return retVal;
}
MXMLToken mxmlToken = nextTokenInternal();
MXMLToken addedToken = analyzeForEndTagProblems(mxmlToken);
if(addedToken != null) {
postRepairToken = mxmlToken;
wasRepaired = true;
return addedToken;
}
return mxmlToken;
}
return nextTokenInternal();
}
/**
* Parse the contents of input
* @param input Reader containing file to be parsed
* @return List of MXMLTokens
*/
public List<MXMLToken> parseTokens(Reader input) {
// Add fake characters onto the end of the stream to make it easier to handle
// unclosed constructs like <![CDATA[ and <!--.
wasRepaired = false;
setReader(input);
// Set the start offset in the tokenizer
// This is done after setReader() as setReader() resets the tokenizer, setting yychar to 0
tokenizer.setOffset(startOffset);
MXMLToken token = null;
List<MXMLToken> list = new ArrayList<MXMLToken>(SIZE);
try {
do {
token = nextToken();
if(token != null)
buildTokenList((MXMLToken)token.clone(), list);
}while(token != null);
lastToken = null;
return list;
} finally {
try {
tokenizer.yyclose();
} catch (IOException e) {
ICompilerProblem problem = new InternalCompilerProblem2(path, e, SUB_SYSTEM);
problems.add(problem);
}
}
}
// TODO: remove this. It now does nothing. See note below
private MXMLToken analyzeForEndTagProblems(MXMLToken currentToken) {
if(currentToken == null)
return null;
try {
if(currentToken.isTagStart() && lastToken != null) {
switch(lastToken.getType()) {
case MXMLTokenTypes.TOKEN_WHITESPACE:
case MXMLTokenTypes.TOKEN_PROCESSING_INSTRUCTION:
case MXMLTokenTypes.TOKEN_COMMENT:
case MXMLTokenTypes.TOKEN_ASDOC_COMMENT:
case MXMLTokenTypes.TOKEN_STRING:
case MXMLTokenTypes.TOKEN_TEXT:
case MXMLTokenTypes.TOKEN_CDATA:
case MXMLTokenTypes.TOKEN_TAG_END:
case MXMLTokenTypes.TOKEN_EMPTY_TAG_END:
case -1:
return null; //all legal to come before open tag start
default:
// turn off this logic that makes up a fake token. The MXMLData already
// known how do to this. And if we do it here, we lose the information that the repair
// was done. Since we actually care, this causes bugs.
return null;
}
}
return null;
}
finally
{
lastToken = currentToken;
}
}
/**
* Determines if any tokens were added as a side effect of repair. This can only be called after a tokenize call
* @return true if the token stream was modified
*/
public boolean tokensWereRepaired() {
return wasRepaired;
}
/**
* Processes tokens, performs various transforms on the tokens that we return, such as:
* <ul>
* <li>transform XMLNS style tokens to name tokens for easier consumption by clients</li>
* <li>filter out state combiner tokens</li>
* <li>track xmlns string values</li>
* </ul>
* Note that we don't modify/merge whitespace and text tokens here as there are a number
* of tests which are sensitive to whitespace, ie MetaMXMLSuite.
* @param token
* @return an {@link MXMLToken} or null if it was not accepted
*/
private MXMLToken processToken(final MXMLToken token) {
//TODO find xmlns uri values in the lexer instead of here
switch (token.getType())
{
// tags (and also DTD directives)
case MXMLTokenTypes.TOKEN_OPEN_TAG_START:
tagDepth++;
inTagContent = true;
return token;
case MXMLTokenTypes.TOKEN_CLOSE_TAG_START:
tagDepth--;
inTagContent = true;
return token;
case MXMLTokenTypes.TOKEN_TAG_END:
case MXMLTokenTypes.TOKEN_EMPTY_TAG_END:
inTagContent = false;
return token;
// stuff inside tags
case MXMLTokenTypes.TOKEN_EQUALS:
//outside tags
case MXMLTokenTypes.TOKEN_CDATA:
return token;
case MXMLTokenTypes.TOKEN_NAME:
xmlNSToken = null;
return token;
case MXMLTokenTypes.TOKEN_XMLNS:
token.setType(MXMLTokenTypes.TOKEN_NAME);
xmlNSToken = token;
return token;
case MXMLTokenTypes.TOKEN_STRING:
//if the current namespace we are tracking is not null, then this string should yield the namespace URI
//only track the namespace of the root document
if(xmlNSToken != null && tagDepth == 0) {
String prefix = "";
String text = xmlNSToken.getText();
if(text.length() > 5) { //has prefix
prefix = text.substring(6);
}
String nsText = token.getText();
String ns = nsText.length() > 1 ?
nsText.substring(1, nsText.length() -1) : "";
rootPrefixMap.add(prefix, ns);
}
return token;
// stuff outside tags
default:
{
if(tagDepth != 0 && !tokenizer.isInE4XDatabinding() && !inTagContent) {
//probably mixed content. Allow it and let it fail downstream if we're wrong
if(token.isLiteral() || token.getType() == ASTokenTypes.TOKEN_IDENTIFIER) {
token.setType(MXMLTokenTypes.TOKEN_TEXT);
}
}
return token;
}
}
}
/**
* Handles the addition of tokens to the internal token list. Subclasses should override this method to handle
* different tokenizing strategies
* @param token The current token.
* @param list The list of tokens being built.
*/
protected void buildTokenList(MXMLToken token, List<MXMLToken> list)
{
if(token != null) {
list.add(token);
}
}
public static void main(String[] args)
{
final FileSpecification fileSpec = new FileSpecification(args[0]);
final MXMLTokenizer tokenizer = new MXMLTokenizer(fileSpec.getPath());
try
{
List<MXMLToken> tokens = tokenizer.parseTokens(fileSpec.createReader());
for (MXMLToken token : tokens)
{
System.out.println(token.toDumpString());
}
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
finally
{
IOUtils.closeQuietly(tokenizer);
}
}
}