/******************************************************************************* * Copyright (c) 2015, 2016 David Green. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * David Green - initial API and implementation *******************************************************************************/ package org.eclipse.mylyn.wikitext.commonmark.internal.inlines; import static com.google.common.base.Preconditions.checkNotNull; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.mylyn.wikitext.commonmark.internal.Line; import org.eclipse.mylyn.wikitext.commonmark.internal.ProcessingContext; import org.eclipse.mylyn.wikitext.commonmark.internal.ProcessingContext.NamedUriWithTitle; import org.eclipse.mylyn.wikitext.parser.DocumentBuilder; import org.eclipse.mylyn.wikitext.parser.builder.EntityReferences; import com.google.common.base.CharMatcher; import com.google.common.base.Objects; import com.google.common.base.Optional; import com.google.common.base.Strings; import com.google.common.escape.Escaper; import com.google.common.net.UrlEscapers; public class PotentialBracketEndDelimiter extends InlineWithText { private static final Pattern HTML_ENTITY_PATTERN = Pattern .compile("(&([a-zA-Z][a-zA-Z0-9]{1,32}|#x[a-fA-F0-9]{1,8}|#[0-9]{1,8});)"); static final String ESCAPABLE_CHARACTER_GROUP = "[!\"\\\\#$%&'()*+,./:;<=>?@\\[\\]^_`{|}~-]"; static final String ESCAPED_CHARS = "(?:\\\\" + ESCAPABLE_CHARACTER_GROUP + ")"; static final String CAPTURING_ESCAPED_CHARS = "\\\\(" + ESCAPABLE_CHARACTER_GROUP + ")"; static final String PARENS_TITLE_PART = "(?:\\(((?:" + ESCAPED_CHARS + "|[^\\)])*)\\))"; static final String SINGLE_QUOTED_TITLE_PART = "(?:'((?:" + ESCAPED_CHARS + "|[^'])*)')"; static final String QUOTED_TITLE_PART = "(?:\"((?:" + ESCAPED_CHARS + "|[^\"])*)\")"; static final String BRACKET_URI_PART = "<((?:[^<>\\\\\r\n]|" + ESCAPED_CHARS + ")*?)>"; private static final String IN_PARENS = "\\((?:[^\\\\()]|" + ESCAPED_CHARS + ")*\\)"; static final String NOBRACKET_URI_PART = "((?:[^\\\\\\s()]|" + ESCAPED_CHARS + "|" + IN_PARENS + "|\\\\)+)"; static final String URI_PART = "(?:" + BRACKET_URI_PART + "|" + NOBRACKET_URI_PART + ")"; static final String TITLE_PART = "(?:" + QUOTED_TITLE_PART + "|" + SINGLE_QUOTED_TITLE_PART + "|" + PARENS_TITLE_PART + ")"; final Pattern endPattern = Pattern.compile("\\(\\s*" + URI_PART + "?(?:\\s+" + TITLE_PART + ")?\\s*\\)(.*)", Pattern.DOTALL); final Pattern referenceLabelPattern = Pattern.compile("(\\s*\\[((?:[^\\]]|\\\\]){0,1000})]).*", Pattern.DOTALL); final Pattern referenceDefinitionEndPattern = Pattern .compile(":\\s*" + URI_PART + "?(?:\\s+" + TITLE_PART + ")?\\s*(.*)", Pattern.DOTALL); public PotentialBracketEndDelimiter(Line line, int offset) { super(line, offset, 1, "]"); } @Override public void emit(DocumentBuilder builder) { builder.characters(text); } @Override public void apply(ProcessingContext context, List<Inline> inlines, Cursor cursor) { Optional<PotentialBracketDelimiter> previousDelimiter = findLastPotentialBracketDelimiter(inlines); if (previousDelimiter.isPresent()) { PotentialBracketDelimiter openingDelimiter = previousDelimiter.get(); int indexOfOpeningDelimiter = inlines.indexOf(openingDelimiter); boolean referenceDefinition = cursor.hasNext() && cursor.getNext() == ':' && eligibleForReferenceDefinition(openingDelimiter, cursor); Matcher matcher = cursor.hasNext() ? cursor.matcher(1, referenceDefinition ? referenceDefinitionEndPattern : endPattern) : null; List<Inline> contents = InlineParser .secondPass(inlines.subList(indexOfOpeningDelimiter + 1, inlines.size())); if (!openingDelimiter.isLinkDelimiter() || !containsLink(contents)) { if (!cursor.hasNext() || !checkNotNull(matcher).matches()) { String referenceName = toReferenceName(referenceName(cursor, contents)); int size = 1; if (cursor.hasNext()) { Matcher referenceLabelMatcher = cursor.matcher(1, referenceLabelPattern); if (referenceLabelMatcher.matches()) { String label = referenceLabelMatcher.group(2); if (!label.isEmpty()) { referenceName = toReferenceName(label); } size = referenceLabelMatcher.end(1) - referenceLabelMatcher.start(1) + 1; } } NamedUriWithTitle uriWithTitle = referenceName == null ? null : context.namedUriWithTitle(referenceName); if (uriWithTitle != null) { cursor.advance(size); truncate(inlines, indexOfOpeningDelimiter); int length = getOffset() - openingDelimiter.getOffset(); if (openingDelimiter.isLinkDelimiter()) { inlines.add(new Link(openingDelimiter.getLine(), openingDelimiter.getOffset(), length, uriWithTitle.getUri(), uriWithTitle.getTitle(), contents)); } else { inlines.add(new Image(openingDelimiter.getLine(), openingDelimiter.getOffset(), length, uriWithTitle.getUri(), uriWithTitle.getTitle(), contents)); } return; } } else { String uri = linkUri(matcher); String title = linkTitle(matcher); if (!(referenceDefinition && (Strings.isNullOrEmpty(uri) || hasContentOnSameLine(matcher, cursor)))) { String referenceName = null; if (referenceDefinition) { referenceName = toReferenceName(referenceName(cursor, contents)); } if ((referenceDefinition && referenceName != null) || !referenceDefinition) { int closingLength = matcher.start(6) - matcher.start() + 1; cursor.advance(closingLength); int length = getOffset() - openingDelimiter.getOffset() + closingLength; truncate(inlines, indexOfOpeningDelimiter); if (referenceDefinition) { truncatePrecedingWhitespace(inlines, 3); inlines.add(new ReferenceDefinition(openingDelimiter.getLine(), openingDelimiter.getOffset(), length, uri, title, referenceName)); } else if (openingDelimiter.isImageDelimiter()) { inlines.add(new Image(openingDelimiter.getLine(), openingDelimiter.getOffset(), length, uri, title, contents)); } else { inlines.add(new Link(openingDelimiter.getLine(), openingDelimiter.getOffset(), length, uri, title, contents)); } return; } } } } replaceDelimiter(inlines, indexOfOpeningDelimiter, openingDelimiter); } applyCharacters(context, inlines, cursor); } private String referenceName(Cursor cursor, List<Inline> contents) { if (contents.isEmpty()) { return ""; } int start = cursor.toCursorOffset(contents.get(0).getOffset()); int end = cursor.toCursorOffset(getOffset()); return cursor.getText(start, end); } private boolean containsLink(List<Inline> contents) { for (Inline inline : contents) { if (inline instanceof Link) { return true; } else if (inline instanceof InlineWithNestedContents && containsLink(((InlineWithNestedContents) inline).getContents())) { return true; } } return false; } private void replaceDelimiter(List<Inline> inlines, int index, PotentialBracketDelimiter delimiter) { inlines.set(index, new Characters(delimiter.getLine(), delimiter.getOffset(), delimiter.getLength(), delimiter.getText())); } private boolean hasContentOnSameLine(Matcher matcher, Cursor cursor) { int indexOfContent = matcher.start(6); if (indexOfContent == -1 || matcher.end(6) == indexOfContent) { return false; } int startIndex = titleEndIndex(matcher); if (startIndex == 0) { startIndex = matcher.end(3); if (startIndex == -1) { startIndex = matcher.end(2); } } if (startIndex > 0) { for (int x = startIndex; x < indexOfContent; ++x) { char c = cursor.getChar(x); if (c == '\n') { return false; } if (!Character.isWhitespace(c)) { return false; } } return true; } return false; } private void truncatePrecedingWhitespace(List<Inline> inlines, int length) { if (!inlines.isEmpty()) { Inline last = inlines.get(inlines.size() - 1); if (last instanceof Characters) { Characters characters = (Characters) last; if (characters.getText().length() <= length && CharMatcher.WHITESPACE.matchesAllOf(characters.getText())) { inlines.remove(inlines.size() - 1); } } } } public void truncate(List<Inline> inlines, int indexOfOpeningDelimiter) { while (inlines.size() > indexOfOpeningDelimiter) { inlines.remove(indexOfOpeningDelimiter); } } boolean eligibleForReferenceDefinition(PotentialBracketDelimiter openingDelimiter, Cursor cursor) { boolean linkDelimiter = openingDelimiter.isLinkDelimiter(); if (!linkDelimiter) { return false; } int cursorRelativeOffset = cursor.toCursorOffset(openingDelimiter.getOffset()); for (int x = cursorRelativeOffset - 1; x >= 0; --x) { char c = cursor.getChar(x); if (c == '\n') { return true; } else if (c != ' ') { return false; } if (cursorRelativeOffset - x == 4) { return false; } } int cursorRelativeEndOffset = cursor.toCursorOffset(getOffset()); for (int x = cursorRelativeOffset + 1; x < cursorRelativeEndOffset; ++x) { char c = cursor.getChar(x); if (c == '[' && !precededByBackslashEscape(cursor, x)) { return false; } } return true; } boolean precededByBackslashEscape(Cursor cursor, int originalOffset) { int count = 0; for (int index = originalOffset - 1; index >= 0; --index) { char c = cursor.getChar(index); if (c == '\\') { ++count; } else { break; } } return count % 2 == 1; } private void applyCharacters(ProcessingContext context, List<Inline> inlines, Cursor cursor) { new Characters(getLine(), getOffset(), getLength(), getText()).apply(context, inlines, cursor); } private String linkTitle(Matcher matcher) { String title = matcher.group(3); if (title == null) { title = matcher.group(4); if (title == null) { title = matcher.group(5); if (title == null) { title = ""; } } } String titleWithoutBackslashEscapes = unescapeBackslashEscapes(title); return replaceHtmlEntities(titleWithoutBackslashEscapes, null); } private int titleEndIndex(Matcher matcher) { int index = matcher.end(3); if (index == -1) { index = matcher.end(4); if (index == -1) { index = matcher.end(5); } } return index + 1; } private String linkUri(Matcher matcher) { String uriWithEscapes = matcher.group(1); if (uriWithEscapes == null) { uriWithEscapes = matcher.group(2); } uriWithEscapes = Objects.firstNonNull(uriWithEscapes, ""); return normalizeUri(uriWithEscapes); } private String normalizeUri(String uriWithEscapes) { String uriWithoutBackslashEscapes = unescapeBackslashEscapes(uriWithEscapes); try { String uriWithoutHtmlEntities = replaceHtmlEntities(uriWithoutBackslashEscapes, UrlEscapers.urlFormParameterEscaper()); String decoded = URLDecoder.decode(uriWithoutHtmlEntities, StandardCharsets.UTF_8.name()); Escaper escaper = UrlEscapers.urlFragmentEscaper(); int indexOfHash = decoded.indexOf('#'); if (indexOfHash != -1) { String uri = escaper.escape(decoded.substring(0, indexOfHash)) + '#'; if ((indexOfHash + 1) < decoded.length()) { uri += escaper.escape(decoded.substring(indexOfHash + 1)); } return uri; } return escaper.escape(decoded); } catch (Exception e) { return uriWithoutBackslashEscapes; } } String replaceHtmlEntities(String text, Escaper escaper) { String replaced = ""; int lastEnd = 0; Matcher matcher = HTML_ENTITY_PATTERN.matcher(text); while (matcher.find()) { if (lastEnd < matcher.start(1)) { replaced += text.substring(lastEnd, matcher.start(1)); } String entity = matcher.group(2); String entityTextEquivalent = EntityReferences.instance().equivalentString(entity); replaced += entityTextEquivalent == null ? matcher.group(1) : escaper == null ? entityTextEquivalent : escaper.escape(entityTextEquivalent); lastEnd = matcher.end(1); } if (lastEnd < text.length()) { replaced += text.substring(lastEnd, text.length()); } return replaced; } String toReferenceName(String stringWithBackslashEscapes) { String referenceName = stringWithBackslashEscapes.replaceAll("(?s)\\\\(\\[|\\])", "$1").replaceAll("\\s+", " "); if (CharMatcher.WHITESPACE.matchesAllOf(referenceName)) { return null; } return referenceName; } String unescapeBackslashEscapes(String stringWithBackslashEscapes) { return stringWithBackslashEscapes.replaceAll(CAPTURING_ESCAPED_CHARS, "$1"); } private Optional<PotentialBracketDelimiter> findLastPotentialBracketDelimiter(List<Inline> inlines) { for (int x = inlines.size() - 1; x >= 0; --x) { Inline inline = inlines.get(x); if (inline instanceof PotentialBracketDelimiter) { PotentialBracketDelimiter delimiter = (PotentialBracketDelimiter) inline; return Optional.of(delimiter); } } return Optional.absent(); } }