package net.rubygrapefruit.docs.html;
import net.rubygrapefruit.docs.model.buildable.BuildableDocument;
import net.rubygrapefruit.docs.model.buildable.BuildableParagraph;
import net.rubygrapefruit.docs.parser.*;
import java.io.Reader;
/**
* doc = element element = empty-element | element-with-content empty-element = '<' element-name '/>'
* element-with-content = start-element (element | text | cdata)* end-element start-element = '<' element-name '>'
* end-element = '</' element-name '>' text = (!reserved-char | entity)+ cdata = ??
*/
public class HtmlParser extends Parser {
private final Name name = new Name();
private final Production<CharStream> whitespace = Productions.matchAtLeastOneOf(' ', '\t', '\r', '\n', '\f');
private final Comment comment = new Comment();
private final Production<CharStream> ignorableContent = Productions.matchFirstOf(whitespace, comment);
private final StartTag startTag = new StartTag();
private final EndTag endTag = new EndTag();
private final TextProduction textProduction = new TextProduction();
@Override
protected void doParse(Reader input, String fileName, BuildableDocument document) throws Exception {
LenientHtmlDocument documentProduction = new LenientHtmlDocument(document, fileName);
Buffer buffer = new Buffer(input);
buffer.consume(documentProduction);
}
private static class Token {
private final String value;
private final int line;
private final int col;
private Token(String value, int line, int col) {
this.value = value;
this.line = line;
this.col = col;
}
}
private class LenientHtmlDocument implements Production<CharStream> {
private final BuildableDocument document;
private final String fileName;
private LenientHtmlDocument(BuildableDocument document, String fileName) {
this.document = document;
this.fileName = fileName;
}
public void match(CharStream stream) {
if (stream.consume(new HtmlDocument(document, fileName))) {
} else {
stream.consume(new BodyBody(document));
}
}
}
private class HtmlDocument implements Production<CharStream> {
private final BuildableDocument document;
private final String fileName;
private HtmlDocument(BuildableDocument document, String fileName) {
this.document = document;
this.fileName = fileName;
}
public void match(CharStream stream) {
while (stream.consume(ignorableContent)) {
}
Token beforeDocType = stream.consume(textProduction);
boolean hasDocType = stream.consume(new Doctype());
while (stream.consume(ignorableContent)) {
}
Token afterDocType = stream.consume(textProduction);
boolean hasHtmlElement = stream.consume(new HtmlElement(document));
if (!hasDocType && !hasHtmlElement) {
stream.rewind();
return;
}
while (stream.consume(ignorableContent)) {
}
Token afterContent = stream.consume(textProduction);
if (beforeDocType != null) {
document.addError(String.format("unexpected text in %s, line %s, column %s.", fileName, beforeDocType.line,
beforeDocType.col));
}
if (afterDocType != null) {
document.addError(String.format("unexpected text in %s, line %s, column %s.", fileName, afterDocType.line,
afterDocType.col));
}
if (afterContent != null) {
document.addError(String.format("unexpected text in %s, line %s, column %s.", fileName, afterContent.line,
afterContent.col));
}
}
}
private abstract class ElementProduction implements Production<CharStream> {
public void match(CharStream stream) {
Token token = stream.consume(startTag);
if (token == null) {
return;
}
if (!token.value.equalsIgnoreCase(getTagName())) {
stream.rewind();
return;
}
stream.accept();
stream.consume(getBody());
// TODO - handle mismatched end tags here
stream.consume(endTag);
}
protected abstract Production<? super CharStream> getBody();
protected abstract String getTagName();
}
private class HtmlElement extends ElementProduction {
private final BuildableDocument document;
private HtmlElement(BuildableDocument document) {
this.document = document;
}
@Override
protected String getTagName() {
return "html";
}
@Override
protected Production<? super CharStream> getBody() {
return new HtmlBody(document);
}
}
private class HtmlBody implements Production<CharStream> {
private final BuildableDocument document;
private HtmlBody(BuildableDocument document) {
this.document = document;
}
public void match(CharStream stream) {
while (stream.consume(ignorableContent)) {
}
if (stream.consume(new BodyElement(document))) {
} else {
stream.consume(new BodyBody(document));
}
while (stream.consume(ignorableContent)) {
}
}
}
private class BodyElement extends ElementProduction {
private final BuildableDocument document;
private BodyElement(BuildableDocument document) {
this.document = document;
}
@Override
protected String getTagName() {
return "body";
}
@Override
protected Production<? super CharStream> getBody() {
return new BodyBody(document);
}
}
private class BodyBody implements Production<CharStream> {
private final BuildableDocument document;
private BodyBody(BuildableDocument document) {
this.document = document;
}
public void match(CharStream stream) {
while (true) {
if (stream.consume(ignorableContent)) {
continue;
}
if (stream.consume(new ParagraphElement(document))) {
continue;
}
Token token = stream.consume(textProduction);
if (token != null) {
document.addParagraph().append(token.value);
continue;
}
break;
}
}
}
private class ParagraphElement extends ElementProduction {
private final BuildableDocument document;
private ParagraphElement(BuildableDocument document) {
this.document = document;
}
@Override
protected String getTagName() {
return "p";
}
@Override
protected Production<? super CharStream> getBody() {
return new ParagraphBody(document.addParagraph());
}
}
private class ParagraphBody implements Production<CharStream> {
private final BuildableParagraph paragraph;
private ParagraphBody(BuildableParagraph paragraph) {
this.paragraph = paragraph;
}
public void match(CharStream stream) {
while (true) {
if (stream.consume(comment)) {
continue;
}
Token value = stream.consume(textProduction);
if (value != null) {
paragraph.append(value.value);
continue;
}
break;
}
}
}
private static class Name implements Production<CharStream> {
public void match(CharStream charStream) {
// TODO - legal element name characters here
if (!charStream.consumeAnyExcept(' ', '\t', '>', '<', '=', '/')) {
return;
}
while (charStream.consumeAnyExcept(' ', '\t', '>', '<', '=', '/')) {
}
}
}
private class StartTag implements ValueProducingProduction<CharStream, Token> {
public Token match(CharStream charStream) {
if (!charStream.consume('<')) {
return null;
}
// TODO - whitespace here?
if (!charStream.consume(name)) {
return null;
}
String name = charStream.getValue();
int line = charStream.getStartLine();
int col = charStream.getStartColumn();
charStream.consume(whitespace);
// TODO - attributes here
if (!charStream.consume('>')) {
return null;
}
return new Token(name, line, col);
}
}
private class EndTag implements ValueProducingProduction<CharStream, Token> {
public Token match(CharStream charStream) {
if (!charStream.consume('<')) {
return null;
}
if (!charStream.consume('/')) {
return null;
}
// TODO - whitespace here?
if (!charStream.consume(name)) {
return null;
}
String name = charStream.getValue();
int line = charStream.getStartLine();
int col = charStream.getStartColumn();
charStream.consume(whitespace);
if (!charStream.consume('>')) {
return null;
}
return new Token(name, line, col);
}
}
private static class Comment implements Production<CharStream> {
private final Production<CharStream> startComment = Productions.match("<!--");
private final Production<CharStream> endComment = Productions.match("-->");
public void match(CharStream charStream) {
if (!charStream.consume(startComment)) {
return;
}
while (!charStream.consume(endComment)) {
charStream.consumeAnyExcept();
}
}
}
private static class Doctype implements Production<CharStream> {
private final Production<CharStream> startComment = Productions.matchIgnoreCase("<!doctype");
private final Production<CharStream> type = Productions.matchIgnoreCase("html");
private final Production<CharStream> endComment = Productions.match(">");
public void match(CharStream charStream) {
if (!charStream.consume(startComment)) {
return;
}
if (!charStream.consume(' ')) {
charStream.rewind();
return;
}
while (charStream.consume(' ')) {
}
if (!charStream.consume(type)) {
charStream.rewind();
return;
}
while (charStream.consume(' ')) {
}
if (!charStream.consume(endComment)) {
charStream.rewind();
return;
}
}
}
private static class Text implements Production<CharStream> {
public void match(CharStream charStream) {
// TODO - legal text characters here
while (charStream.consumeAnyExcept('<')) {
}
}
}
private static class TextProduction implements ValueProducingProduction<CharStream, Token> {
final Text text = new Text();
public Token match(CharStream charStream) {
if (!charStream.consume(text)) {
return null;
}
int line = charStream.getStartLine();
int col = charStream.getStartColumn();
String value = charStream.getValue();
return new Token(value, line, col);
}
}
}