/*=============================================================================#
# Copyright (c) 2015-2016 David Green and others.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Eclipse Public License v1.0
# which accompanies this distribution, and is available at
# http://www.eclipse.org/legal/epl-v10.html
#
# Contributors:
# David Green - initial API and implementation in Mylyn
# Stephan Wahlbrink (WalWare.de) - revised API and implementation
#=============================================================================*/
package de.walware.docmlet.wikitext.internal.commonmark.core.blocks;
import static com.google.common.base.Preconditions.checkState;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.CDATA_END_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.CDATA_START1_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.CLOSE_TAG_1_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.COMMENT_END_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.COMMENT_START1_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.DECL_END_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.DECL_START1_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.OPEN_TAG_1_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.PI_END_REGEX;
import static de.walware.docmlet.wikitext.internal.commonmark.core.CommonRegex.PI_START1_REGEX;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.mylyn.wikitext.core.parser.DocumentBuilder;
import org.eclipse.mylyn.wikitext.core.parser.DocumentBuilder.BlockType;
import de.walware.jcommons.collections.ImList;
import de.walware.docmlet.wikitext.core.source.EmbeddingAttributes;
import de.walware.docmlet.wikitext.core.source.extdoc.IExtdocMarkupLanguage;
import de.walware.docmlet.wikitext.internal.commonmark.core.CommonmarkLocator;
import de.walware.docmlet.wikitext.internal.commonmark.core.Line;
import de.walware.docmlet.wikitext.internal.commonmark.core.LineSequence;
import de.walware.docmlet.wikitext.internal.commonmark.core.ProcessingContext;
import de.walware.docmlet.wikitext.internal.commonmark.core.SourceBlock;
import de.walware.docmlet.wikitext.internal.commonmark.core.SourceBlockItem;
import de.walware.docmlet.wikitext.internal.commonmark.core.SourceBlocks.SourceBlockBuilder;
public class HtmlBlock extends SourceBlock {
private static final String HTML_1_TAG_NAMES= "script|pre|style";
private static final String HTML_BLOCK_TAG_NAMES=
"address|article|aside|base|basefont|blockquote|body" +
"|caption|center|col|colgroup" +
"|dd|details|dialog|dir|div|dl|dt" +
"|fieldset|figcaption|figure|footer|form|frame|frameset" +
"|h1|head|header|hr|html|iframe" +
"|legend|li|link" +
"|main|menu|menuitem|meta|nav|noframes" +
"|ol|optgroup|option|p|param" +
"|section|source|title|summary" +
"|table|tbody|td|tfoot|th|thead|tr|track" +
"|ul";
private static final Pattern START_PATTERN= Pattern.compile("<(?:" +
"((?:(?i)" + HTML_1_TAG_NAMES + ")(?:[\\s>].*)?)" +
"|(" + COMMENT_START1_REGEX + ".*)" +
"|(" + PI_START1_REGEX + ".*)" +
"|(" + DECL_START1_REGEX + ".*)" +
"|(" + CDATA_START1_REGEX + ".*)" +
"|(/?(?:(?i)" + HTML_BLOCK_TAG_NAMES + ")(?:(?:\\s|/?>).*)?)" +
"|(" + OPEN_TAG_1_REGEX + "\\s*|" + CLOSE_TAG_1_REGEX + "\\s*)" +
")",
Pattern.DOTALL );
private static final Pattern END_HTML_1_PATTERN= Pattern.compile(
"</(?:(?i)" + HTML_1_TAG_NAMES + ")>",
Pattern.DOTALL );
private static final Pattern END_COMMENT_PATTERN= Pattern.compile(
COMMENT_END_REGEX,
Pattern.DOTALL );
private static final Pattern END_PI_PATTERN= Pattern.compile(
PI_END_REGEX,
Pattern.DOTALL );
private static final Pattern END_DECL_PATTERN= Pattern.compile(
DECL_END_REGEX,
Pattern.DOTALL );
private static final Pattern END_CDATA_PATTERN= Pattern.compile(
CDATA_END_REGEX,
Pattern.DOTALL );
static final class HtmlBlockItem extends SourceBlockItem<HtmlBlock> {
private byte htmlType;
private boolean isClosed;
public HtmlBlockItem(final HtmlBlock type, final SourceBlockBuilder builder) {
super(type, builder);
}
}
private final Matcher startMatcher= START_PATTERN.matcher("");
private Matcher endHtml1Matcher;
private Matcher endCommentMatcher;
private Matcher endPIMatcher;
private Matcher endDeclMatcher;
private Matcher endCDATAMatcher;
public HtmlBlock() {
}
@Override
public boolean canStart(final LineSequence lineSequence, final SourceBlockItem<?> currentBlockItem) {
final Line currentLine= lineSequence.getCurrentLine();
final Matcher matcher;
return (currentLine != null
&& !currentLine.isBlank() && currentLine.getIndent() < 4
&& (matcher= currentLine.setupIndent(this.startMatcher)).matches()
&& (currentBlockItem == null || canInterrupt(matcher)) );
}
private boolean canInterrupt(final Matcher matcher) {
return (matcher.start(7) == -1);
}
@Override
public void createItem(final SourceBlockBuilder builder, final LineSequence lineSequence) {
final HtmlBlockItem htmlBlockItem= new HtmlBlockItem(this, builder);
final Line startLine= lineSequence.getCurrentLine();
final Matcher matcher= startLine.setupIndent(this.startMatcher);
checkState(matcher.matches());
htmlBlockItem.htmlType= getType(matcher);
final Matcher endMatcher= getEndMatcher(htmlBlockItem.htmlType);
if (endMatcher != null) {
while (true) {
final Line line= lineSequence.getCurrentLine();
if (line != null) {
lineSequence.advance();
if (line.setup(endMatcher).find()) {
htmlBlockItem.isClosed= true;
break;
}
continue;
}
break;
}
}
else {
advanceNonBlankLines(lineSequence);
}
}
@Override
public void initializeContext(final ProcessingContext context, final SourceBlockItem<?> blockItem) {
}
@Override
public void emit(final ProcessingContext context, final SourceBlockItem<?> blockItem,
final CommonmarkLocator locator, final DocumentBuilder builder) {
final HtmlBlockItem htmlBlockItem= (HtmlBlockItem) blockItem;
final ImList<Line> lines= blockItem.getLines();
if (context.getMode() == ProcessingContext.PARSE_SOURCE_STRUCT) {
final int descr;
if (htmlBlockItem.htmlType == 2) {
descr= IExtdocMarkupLanguage.EMBEDDED_HTML_COMMENT_BLOCK_DESCR;
}
else {
descr= IExtdocMarkupLanguage.EMBEDDED_HTML_OTHER_BLOCK_DESCR
| ((htmlBlockItem.htmlType << IExtdocMarkupLanguage.EMBEDDED_HTML_DISTINCT_SHIFT) & IExtdocMarkupLanguage.EMBEDDED_HTML_DISTINCT_MASK);
}
locator.setBlockBegin(blockItem);
builder.beginBlock(BlockType.CODE, new EmbeddingAttributes(
IExtdocMarkupLanguage.EMBEDDED_HTML, descr,
lines.get(0).getOffset(), Integer.MIN_VALUE ));
}
for (final Line line : lines) {
locator.setLine(line);
builder.charactersUnescaped(line.getText());
builder.charactersUnescaped("\n");
}
if (context.getMode() == ProcessingContext.PARSE_SOURCE_STRUCT) {
locator.setBlockEnd(blockItem);
builder.endBlock();
}
}
private byte getType(final Matcher matcher) {
if (matcher.start(1) != -1) {
return 1;
}
if (matcher.start(2) != -1) {
return 2;
}
if (matcher.start(3) != -1) {
return 3;
}
if (matcher.start(4) != -1) {
return 4;
}
if (matcher.start(5) != -1) {
return 5;
}
if (matcher.start(6) != -1) {
return 6;
}
return 7;
}
private Matcher getEndMatcher(final byte type) {
switch (type) {
case 1:
if (this.endHtml1Matcher == null) {
this.endHtml1Matcher= END_HTML_1_PATTERN.matcher("");
}
return this.endHtml1Matcher;
case 2:
if (this.endCommentMatcher == null) {
this.endCommentMatcher= END_COMMENT_PATTERN.matcher("");
}
return this.endCommentMatcher;
case 3:
if (this.endPIMatcher == null) {
this.endPIMatcher= END_PI_PATTERN.matcher("");
}
return this.endPIMatcher;
case 4:
if (this.endDeclMatcher == null) {
this.endDeclMatcher= END_DECL_PATTERN.matcher("");
}
return this.endDeclMatcher;
case 5:
if (this.endCDATAMatcher == null) {
this.endCDATAMatcher= END_CDATA_PATTERN.matcher("");
}
return this.endCDATAMatcher;
default:
return null; // blank line
}
}
}