/* * Copyright 2010 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gradle.build.docs.dsl.docbook; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; /** * Converts the main description of a javadoc comment into a stream of tokens. */ class BasicJavadocLexer implements JavadocLexer { private static final Pattern HTML_ELEMENT = Pattern.compile("(?s)<\\\\?[^<]+?>"); private static final Pattern ELEMENT_ATTRIBUTE = Pattern.compile("(?s)\\w+(\\s*=\\s*('.*?')|(\".*?\"))?"); private static final Pattern END_ATTRIBUTE_NAME = Pattern.compile("=|(\\s)|(/>)|>"); private static final Pattern ATTRIBUTE_SEPARATOR = Pattern.compile("\\s*=\\s*"); private static final Pattern END_ELEMENT_NAME = Pattern.compile("\\s+|(/>)|>"); private static final Pattern END_ELEMENT = Pattern.compile("(/>)|>"); private static final Pattern HTML_ENCODED_CHAR = Pattern.compile("&#\\d+;"); private static final Pattern HTML_ENTITY = Pattern.compile("&.+?;"); private static final Pattern TAG = Pattern.compile("(?s)\\{@.+?\\}"); private static final Pattern END_TAG_NAME = Pattern.compile("(?s)\\s|}"); private static final Pattern WHITESPACE_WITH_EOL = Pattern.compile("(?s)\\s+"); private static final String START_HTML_COMMENT = "<!--"; private static final String END_HTML_COMMENT = "-->"; private static final Map<String, String> ENTITIES = new HashMap<String, String>(); static { ENTITIES.put("amp", "&"); ENTITIES.put("lt", "<"); ENTITIES.put("gt", ">"); ENTITIES.put("quot", "\""); ENTITIES.put("apos", "'"); } private final JavadocScanner scanner; BasicJavadocLexer(JavadocScanner scanner) { this.scanner = scanner; } public void pushText(String rawCommentText) { scanner.pushText(rawCommentText); } public void visit(TokenVisitor visitor) { while (!scanner.isEmpty()) { if (scanner.lookingAt(START_HTML_COMMENT)) { skipComment(); continue; } if (scanner.lookingAt(HTML_ELEMENT)) { parseStartElement(visitor); continue; } if (scanner.lookingAt(TAG)) { parseJavadocTag(visitor); continue; } StringBuilder text = new StringBuilder(); while (!scanner.isEmpty()) { if (scanner.lookingAt(START_HTML_COMMENT)) { skipComment(); continue; } if (scanner.lookingAt(HTML_ELEMENT)) { break; } if (scanner.lookingAt(TAG)) { break; } if (scanner.lookingAt(HTML_ENCODED_CHAR)) { parseHtmlEncodedChar(text); } else if (scanner.lookingAt(HTML_ENTITY)) { parseHtmlEntity(text); } else { text.append(scanner.getFirst()); scanner.next(); } } visitor.onText(text.toString()); } visitor.onEnd(); } private void skipComment() { scanner.next(4); while (!scanner.isEmpty() && !scanner.lookingAt(END_HTML_COMMENT)) { scanner.next(); } if (!scanner.isEmpty()) { scanner.next(3); } } private void parseHtmlEntity(StringBuilder buffer) { scanner.next(); scanner.mark(); scanner.find(';'); String value = ENTITIES.get(scanner.region().toLowerCase()); buffer.append(value); scanner.next(); } private void parseHtmlEncodedChar(StringBuilder buffer) { scanner.next(2); scanner.mark(); scanner.find(';'); String value = new String(new char[]{(char) Integer.parseInt(scanner.region())}); buffer.append(value); scanner.next(); } private void parseJavadocTag(TokenVisitor visitor) { // start of tag marker scanner.next(2); // tag name scanner.mark(); scanner.find(END_TAG_NAME); String tagName = scanner.region(); visitor.onStartJavadocTag(tagName); scanner.skip(WHITESPACE_WITH_EOL); // value if (!scanner.lookingAt('}')) { scanner.mark(); scanner.find('}'); String value = scanner.region(); visitor.onText(value); } // end of tag marker if (scanner.lookingAt('}')) { visitor.onEndJavadocTag(tagName); scanner.next(); } } private void parseStartElement(TokenVisitor visitor) { // start element marker scanner.next(); boolean isEnd = false; if (scanner.lookingAt('/')) { isEnd = true; scanner.next(); } // element name scanner.skip(WHITESPACE_WITH_EOL); scanner.mark(); scanner.find(END_ELEMENT_NAME); String elementName = scanner.region().toLowerCase(); if (isEnd) { visitor.onEndHtmlElement(elementName); } else { visitor.onStartHtmlElement(elementName); } // attributes scanner.skip(WHITESPACE_WITH_EOL); while (!scanner.isEmpty() && scanner.lookingAt(ELEMENT_ATTRIBUTE)) { // attribute name scanner.mark(); scanner.find(END_ATTRIBUTE_NAME); String attrName = scanner.region(); // separator scanner.skip(ATTRIBUTE_SEPARATOR); // value char quote = scanner.getFirst(); scanner.next(); StringBuilder attrValue = new StringBuilder(); while (!scanner.isEmpty() && !scanner.lookingAt(quote)) { if (scanner.lookingAt(HTML_ENCODED_CHAR)) { parseHtmlEncodedChar(attrValue); } else if (scanner.lookingAt(HTML_ENTITY)) { parseHtmlEntity(attrValue); } else { attrValue.append(scanner.getFirst()); scanner.next(); } } visitor.onHtmlElementAttribute(attrName, attrValue.toString()); scanner.next(); scanner.skip(WHITESPACE_WITH_EOL); } if (!isEnd) { visitor.onStartHtmlElementComplete(elementName); } // end element marker if (scanner.lookingAt('/')) { visitor.onEndHtmlElement(elementName); } scanner.skip(END_ELEMENT); } }