/*
* Sonar Web Plugin
* Copyright (C) 2010 Matthijs Galesloot
* dev@sonar.codehaus.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sonar.plugins.web.lex;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.sonar.channel.ChannelDispatcher;
import org.sonar.channel.CodeReader;
import org.sonar.plugins.web.node.Node;
import org.sonar.plugins.web.node.NodeType;
import org.sonar.plugins.web.node.TagNode;
/**
* Lexical analysis of a web page.
*
* @author Matthijs Galesloot
*/
@SuppressWarnings("unchecked")
public final class PageLexer {
/**
* The order of the tokenizers is significant, as they are processed in this order.
*
* TextTokenizer must be last, it will always consume the characters until the next token arrives.
*/
private static List tokenizers = Arrays.asList(
/* HTML Comments */
new CommentTokenizer("<!--", "-->", true),
/* JSP Comments */
new CommentTokenizer("<%--", "--%>", false),
/* HTML Directive */
new DoctypeTokenizer("<!DOCTYPE", ">"),
/* XML Directives */
new DirectiveTokenizer("<?", "?>"),
/* JSP Directives */
new DirectiveTokenizer("<%@", "%>"),
/* JSP Expressions */
new ExpressionTokenizer("<%", "%>"),
/* XML and HTML Tags */
new ElementTokenizer("<", ">"),
/* Text (for everything else) */
new TextTokenizer());
/**
* Parse a nested node.
*/
public List<Node> nestedParse(CodeReader reader) {
List<Node> nodeList = new ArrayList<Node>();
for (AbstractTokenizer tokenizer : (List<AbstractTokenizer>) tokenizers) {
if (tokenizer.consume(reader, nodeList)) {
break;
}
}
return nodeList;
}
/**
* Parse the input into a list of tokens, with parent/child relations between the tokens.
*/
public List<Node> parse(Reader reader) {
// CodeReader reads the file stream
CodeReader codeReader = new CodeReader(reader);
// ArrayList collects the nodes
List<Node> nodeList = new ArrayList<Node>();
// ChannelDispatcher manages the tokenizers
ChannelDispatcher<List<Node>> channelDispatcher = new ChannelDispatcher<List<Node>>(tokenizers);
channelDispatcher.consume(codeReader, nodeList);
createNodeHierarchy(nodeList);
// clean up
codeReader.close();
return nodeList;
}
/**
* Scan the nodes and build the hierarchy of parent and child nodes.
*/
private void createNodeHierarchy(List<Node> nodeList) {
TagNode current = null;
for (Node node : nodeList) {
if (node.getNodeType() == NodeType.Tag) {
TagNode element = (TagNode) node;
// start element
if ( !element.isEndElement()) {
element.setParent(current);
current = element;
}
// end element
if ((element.isEndElement() || element.hasEnd()) && current != null) {
current = current.getParent();
}
}
}
}
}