/* * Copyright (c) 2011-2014 Julien Nicoulaud <julien.nicoulaud@gmail.com> * Copyright (c) 2015-2015 Vladimir Schneider <vladimir.schneider@gmail.com> * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ package com.vladsch.idea.multimarkdown.parser; import com.intellij.openapi.diagnostic.Logger; import com.intellij.psi.TokenType; import com.intellij.psi.tree.IElementType; import com.vladsch.idea.multimarkdown.settings.MultiMarkdownGlobalSettings; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.pegdown.PegDownProcessor; import org.pegdown.ast.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import static com.vladsch.idea.multimarkdown.psi.MultiMarkdownTypes.*; /** * Lexer/Parser Combination that uses pegdown behind the scenes to do the heavy lifting * here we just fake everything. */ public class MultiMarkdownLexParser { //implements Lexer, PsiParser { private static final Logger LOGGER = Logger.getInstance(MultiMarkdownLexParser.class); private int currentStringLength; //private String currentString; private static HashSet<IElementType> excludedTokenTypes = new HashSet<IElementType>(); private static Map<IElementType, HashSet<IElementType>> overrideExclusions = new HashMap<IElementType, HashSet<IElementType>>(); private static Map<IElementType, HashMap<IElementType, IElementType>> combinationSplits = new HashMap<IElementType, HashMap<IElementType, IElementType>>(); protected ArrayList<SegmentedRange> parentRanges = new ArrayList<SegmentedRange>(); protected int minStackLevel = 0; protected int tableRows = 0; protected int rowColumns = 0; protected static boolean recursingBold = false; protected static boolean recursingItalic = false; protected static boolean recursingStrike = false; protected Map<String, MarkdownASTVisitor.ParserNodeInfo> abbreviations = new HashMap<String, MarkdownASTVisitor.ParserNodeInfo>(); protected String abbreviationsRegEx = ""; protected Pattern abbreviationsPattern = null; protected boolean githubWikiLinks; private boolean parseCalled = false; // when an exclusion is added then the parent range will not be punched out by the child // default child range punches out a hole in the parent range. static protected void addExclusion(IElementType parent, IElementType child) { HashSet<IElementType> childExclusions; if (!overrideExclusions.containsKey(child)) { childExclusions = new HashSet<IElementType>(); overrideExclusions.put(child, childExclusions); } else { childExclusions = overrideExclusions.get(child); } childExclusions.add(parent); } static protected void addInlineExclusions(IElementType parent) { addInlineExclusions(parent, true); } static protected void addInlineExclusions(IElementType parent, boolean addEmph) { addExclusion(parent, CODE); addExclusion(parent, SPECIAL_TEXT); addExclusion(parent, TEXT); if (addEmph) { addExclusion(parent, BOLD); addExclusion(parent, BOLD_MARKER); addExclusion(parent, BOLDITALIC); addExclusion(parent, ITALIC); addExclusion(parent, ITALIC_MARKER); addExclusion(parent, STRIKETHROUGH); addExclusion(parent, STRIKETHROUGH_BOLD); addExclusion(parent, STRIKETHROUGH_BOLDITALIC); addExclusion(parent, STRIKETHROUGH_ITALIC); addExclusion(parent, STRIKETHROUGH_MARKER); } } static protected void addExcludeFromInlines(IElementType parent) { addExclusion(BOLD, parent); addExclusion(BOLD_MARKER, parent); addExclusion(BOLDITALIC, parent); addExclusion(ITALIC, parent); addExclusion(ITALIC_MARKER, parent); addExclusion(STRIKETHROUGH, parent); addExclusion(STRIKETHROUGH_BOLD, parent); addExclusion(STRIKETHROUGH_BOLDITALIC, parent); addExclusion(STRIKETHROUGH_ITALIC, parent); addExclusion(STRIKETHROUGH_MARKER, parent); } static protected boolean isExcluded(IElementType parent, IElementType child) { HashSet<IElementType> childExclusions; if (child == null || parent == null) return true; if (!overrideExclusions.containsKey(child)) return false; childExclusions = overrideExclusions.get(child); return childExclusions.contains(parent); } static protected void addCombinationSplit(IElementType resultingType, IElementType elementType1, IElementType elementType2) { if (!combinationSplits.containsKey(elementType1)) combinationSplits.put(elementType1, new HashMap<IElementType, IElementType>(2)); if (!combinationSplits.get(elementType1).containsKey(elementType2)) combinationSplits.get(elementType1).put(elementType2, resultingType); if (!combinationSplits.containsKey(elementType2)) combinationSplits.put(elementType2, new HashMap<IElementType, IElementType>(2)); if (!combinationSplits.get(elementType2).containsKey(elementType1)) combinationSplits.get(elementType2).put(elementType1, resultingType); } static { addCombinationSplit(BOLDITALIC, BOLD, ITALIC); addCombinationSplit(STRIKETHROUGH_BOLDITALIC, BOLDITALIC, STRIKETHROUGH); addCombinationSplit(STRIKETHROUGH_BOLDITALIC, BOLD, STRIKETHROUGH_ITALIC); addCombinationSplit(STRIKETHROUGH_BOLDITALIC, ITALIC, STRIKETHROUGH_BOLD); addCombinationSplit(STRIKETHROUGH_BOLDITALIC, STRIKETHROUGH_ITALIC, STRIKETHROUGH_BOLD); addCombinationSplit(STRIKETHROUGH_BOLD, BOLD, STRIKETHROUGH); addCombinationSplit(STRIKETHROUGH_ITALIC, ITALIC, STRIKETHROUGH); // these are not used for highlighting, only to punch out the range of their parents excludedTokenTypes.add(TABLE_BODY); excludedTokenTypes.add(TABLE_HEADER); addExclusion(ANCHOR_LINK, INLINE_HTML); // these can affect text and should combine attributes addInlineExclusions(TABLE_HEADER); addInlineExclusions(TABLE_CELL_RODD_CODD); addInlineExclusions(TABLE_CELL_RODD_CEVEN); addInlineExclusions(TABLE_CELL_REVEN_CODD); addInlineExclusions(TABLE_CELL_REVEN_CEVEN); addInlineExclusions(TABLE_CAPTION); // task items //addInlineExclusions(TASK_ITEM); //addInlineExclusions(TASK_DONE_ITEM); addInlineExclusions(TASK_ITEM_MARKER); addInlineExclusions(TASK_DONE_ITEM_MARKER); //addInlineExclusions(FOOTNOTE); // let all the inlines not punch through each other addInlineExclusions(STRIKETHROUGH_BOLDITALIC, false); addExclusion(STRIKETHROUGH_BOLDITALIC, STRIKETHROUGH_BOLD); addExclusion(STRIKETHROUGH_BOLDITALIC, STRIKETHROUGH_ITALIC); addExclusion(STRIKETHROUGH_BOLDITALIC, STRIKETHROUGH); addExclusion(STRIKETHROUGH_BOLDITALIC, BOLDITALIC); addExclusion(STRIKETHROUGH_BOLDITALIC, BOLD); addExclusion(STRIKETHROUGH_BOLDITALIC, ITALIC); addInlineExclusions(STRIKETHROUGH_ITALIC, false); addExclusion(STRIKETHROUGH_ITALIC, STRIKETHROUGH); addExclusion(STRIKETHROUGH_ITALIC, ITALIC); addInlineExclusions(STRIKETHROUGH_BOLD, false); addExclusion(STRIKETHROUGH_BOLD, STRIKETHROUGH); addExclusion(STRIKETHROUGH_BOLD, BOLD); addInlineExclusions(STRIKETHROUGH, false); addInlineExclusions(BOLDITALIC, false); addExclusion(BOLDITALIC, BOLD); addExclusion(BOLDITALIC, ITALIC); addInlineExclusions(BOLD, false); addInlineExclusions(ITALIC, false); // these should override text addInlineExclusions(AUTO_LINK); addInlineExclusions(ANCHOR_LINK); addInlineExclusions(REFERENCE); addInlineExclusions(REFERENCE_IMAGE); addInlineExclusions(REFERENCE_LINK); addInlineExclusions(EXPLICIT_LINK); addInlineExclusions(IMAGE); addInlineExclusions(ABBREVIATION); addInlineExclusions(QUOTE); addInlineExclusions(HEADER_LEVEL_1); addInlineExclusions(HEADER_LEVEL_2); addInlineExclusions(HEADER_LEVEL_3); addInlineExclusions(HEADER_LEVEL_4); addInlineExclusions(HEADER_LEVEL_5); addInlineExclusions(HEADER_LEVEL_6); addInlineExclusions(DEFINITION); addInlineExclusions(DEFINITION_TERM); // to allow strike, bold and italics to show // list item is useless, should not punch out block quote, but it should punch out bullet_list // that way only the bullets will be left to punch out the block quote addExclusion(BLOCK_QUOTE, LIST_ITEM); } public @Nullable LexerToken[] parseMarkdown(final RootNode rootNode, char[] currentChars, int pegdownExtensions) { assert !parseCalled; if (rootNode == null) return null; this.currentStringLength = currentChars.length; // process tokens right away and return them this.githubWikiLinks = (pegdownExtensions & MultiMarkdownLexParserManager.GITHUB_WIKI_LINKS) != 0; MarkdownASTVisitor visitor = new MarkdownASTVisitor(); rootNode.accept(visitor); ArrayList<LexerToken> lexerTokens = visitor.getTokens(); LexerToken[] tokens = new LexerToken[lexerTokens.size()]; tokens = lexerTokens.toArray(tokens); if (tokens.length > 0) { Arrays.sort(tokens); // now need to step through and merge consecutive tokens int iMax = tokens.length; LexerToken thisToken = tokens[0]; lexerTokens = new ArrayList<LexerToken>(iMax); for (int i = 1; i < iMax; i++) { LexerToken thatToken = tokens[i]; if (!thatToken.doesExtend(thisToken)) { lexerTokens.add(thisToken); thisToken = thatToken; } else { thisToken.getRange().expandToInclude(thatToken.getRange()); } } lexerTokens.add(thisToken); // now we generate lexemes from the combined optimized tokens tokens = new LexerToken[lexerTokens.size()]; tokens = lexerTokens.toArray(tokens); // we create a list of non-intersecting, sorted, ranges tokens = splitLexerTokens(tokens); } parseCalled = true; return tokens; } public static LexerToken getWhiteSpaceToken(int start, int end) { return new LexerToken(new Range(start, end), TokenType.WHITE_SPACE); } public static LexerToken getSkippedSpaceToken(int start, int end) { //if (end > currentStringLength) { // int tmp = 0; //} return new LexerToken(new Range(start, end), NONE); } public LexerToken[] splitLexerTokens(LexerToken[] tokens) { int end = tokens.length; if (end > 0) { ArrayList<LexerToken> lexemes = new ArrayList<LexerToken>(tokens.length); // do all of them splitLexemes(lexemes, tokens, 0, Integer.MAX_VALUE); LexerToken[] lexerTokens = new LexerToken[lexemes.size()]; return lexemes.toArray(lexerTokens); } return null; } protected int splitLexemes(ArrayList<LexerToken> lexemes, LexerToken[] tokens, int start, int rangeEnd) { LexerToken token = tokens[start]; Range range = token.getRange(); Range range1; if (range.end <= rangeEnd) { int end = tokens.length; start++; for (; start < end && (range1 = tokens[start].getRange()).end <= rangeEnd; start++) { if (range.compare(range1) <= 0 && (range.doesNotOverlap(range1) || range.equals(range1))) { // it comes before, add it, if it equals, then skip it if (!range.equals(range1)) { if (range.equals(token.getRange())) { lexemes.add(token); } else { LexerToken newToken = new LexerToken(range, token.getElementType()); lexemes.add(newToken); } token = tokens[start]; range = token.getRange(); } } else if (!range.doesContain(range1)) { //if (!range.doesNotOverlap(range1)) { // int tmp = 0; // //assert false; //} //if (range.compare(range1) <= 0) { // int tmp = 0; // //assert false; //} lexemes.add(token); token = tokens[start]; range = token.getRange(); } else { if (range.doesOverlap(range1) && !range.doesContain(range1)) { // split the range and continue // if it contains it, then skip it Range newRange = new Range(range); newRange.end = range1.start; if (newRange.isEmpty()) { token = tokens[start]; range = token.getRange(); } else { LexerToken newToken = new LexerToken(newRange, token.getElementType()); lexemes.add(newToken); range = new Range(range); range.start = range1.end; if (range.isEmpty()) { token = tokens[start]; range = token.getRange(); } else { start = splitLexemes(lexemes, tokens, start, range.start); } } } } } if (!range.isEmpty()) { if (range.equals(token.getRange())) { lexemes.add(token); } else { LexerToken newToken = new LexerToken(range, token.getElementType()); lexemes.add(newToken); } } } return start; } protected static class LexerToken implements Comparable<LexerToken> { @Override public int compareTo(@NotNull LexerToken o) { return compare(o); } private final Range range; private final IElementType elementType; private int nesting; public LexerToken(final Range range, final IElementType elementType) { this.range = range; this.elementType = elementType; this.nesting = Integer.MAX_VALUE; } public LexerToken(int start, int end, final IElementType elementType) { this.range = new Range(start, end); this.elementType = elementType; this.nesting = Integer.MAX_VALUE; } public LexerToken(final Range range, final IElementType elementType, int nesting) { this.range = range; this.elementType = elementType; this.nesting = nesting; } public Range getRange() { return range; } public IElementType getElementType() { return elementType; } public String toString() { return "MultiMarkdownLexParser$LexerToken" + range.toString() + " " + elementType.toString(); } public boolean isWhiteSpace() { return elementType == TokenType.WHITE_SPACE; } public boolean isSkippedSpace() { return elementType == NONE; } public int compare(LexerToken that) { int rangeCompare = this.range.compare(that.range); return rangeCompare != 0 ? rangeCompare : (this.nesting < that.nesting ? -1 : (this.nesting > that.nesting ? 1 : 0)); } public boolean doesExtend(LexerToken that) { return this.elementType == that.elementType && this.range.isAdjacent(that.range); } } protected void pushRange(Range range, IElementType type) { SegmentedRange segmentedRange = new SegmentedRange(range); segmentedRange.setTokenType(type); parentRanges.add(segmentedRange); } protected void pushRange(SegmentedRange segmentedRange) { parentRanges.add(segmentedRange); } protected void pushRange(int start, int end, IElementType type) { SegmentedRange segmentedRange = new SegmentedRange(start, end); segmentedRange.setTokenType(type); parentRanges.add(segmentedRange); } protected SegmentedRange popRange() { assert parentRanges.size() > 0; //if (parentRanges.size() <= minStackLevel) { // int tmp = 0; //} return parentRanges.remove(parentRanges.size() - 1); } protected SegmentedRange getRange() { assert parentRanges.size() > 0; return parentRanges.get(parentRanges.size() - 1); } protected class MarkdownASTVisitor implements Visitor { protected final ArrayList<LexerToken> tokens = new ArrayList<LexerToken>(100); public ArrayList<LexerToken> getTokens() { return tokens; } class ParserNodeInfo { public int startIndex = 0; public int endIndex = 0; public StringBuilder text = new StringBuilder(); public StringBuilder expansion = null; } protected void collectChildrensText(Node node, ParserNodeInfo nodeInfo) { if (node.getClass() == TextNode.class || node.getClass() == SpecialTextNode.class) { nodeInfo.text.append(((TextNode) node).getText()); if (nodeInfo.startIndex == 0) { nodeInfo.startIndex = node.getStartIndex(); } nodeInfo.endIndex = node.getEndIndex(); } else if (node instanceof SuperNode) { for (Node child : node.getChildren()) { collectChildrensText(child, nodeInfo); } } } @Override public void visit(TocNode node) { } public void visit(RootNode node) { for (AbbreviationNode abbrNode : node.getAbbreviations()) { ParserNodeInfo abbrNodeInfo = new ParserNodeInfo(); ParserNodeInfo expansionNodeInfo = new ParserNodeInfo(); // need to collect the expanded abbreviation text from abbreviationNodes during child visit collectChildrensText(abbrNode, abbrNodeInfo); collectChildrensText(abbrNode.getExpansion(), expansionNodeInfo); String abbr = abbrNodeInfo.text.toString(); abbrNodeInfo.expansion = expansionNodeInfo.text; if (!abbreviations.containsKey(abbr)) { // we overwrite the old values? or we keep them all for error resolution abbreviations.put(abbr, abbrNodeInfo); if (abbreviationsRegEx.length() > 0) abbreviationsRegEx += "|"; abbreviationsRegEx += "\\b\\Q" + abbr + "\\E\\b"; } } for (AbbreviationNode abbreviationNode : node.getAbbreviations()) { abbreviationNode.accept(this); } for (ReferenceNode referenceNode : node.getReferences()) { referenceNode.accept(this); } visitChildren(node); } public void visit(FootnoteNode node) { //ArrayList<Node> children = new ArrayList<Node>(1); //children.add(node.getFootnote()); addTokenWithChildren(node, FOOTNOTE, node.getFootnote().getChildren()); } public void visit(FootnoteRefNode node) { addToken(node, FOOTNOTE_REF); } public void visit(TextNode node) { if (node instanceof CommentNode) { addToken(node, COMMENT); } else if (node instanceof WikiPageRefNode) { addToken(node, WIKI_LINK_REF); } else if (node instanceof WikiPageTitleNode) { addToken(node, WIKI_LINK_TEXT); } else { if (abbreviations.isEmpty()) { addToken(node, TEXT); } else { addTextTokenWithAbbreviations(node, TEXT, ABBREVIATED_TEXT); } } } protected void addTextTokenWithAbbreviations(TextNode node, IElementType tokenType, IElementType abbreviationType) { int endIndex = node.getEndIndex(); int startIndex = node.getStartIndex(); // compensate for missing EOL at end of input causes pegdown to return a range past end of input // in this case IDEA ignores the range. :( if (endIndex > currentStringLength) endIndex = currentStringLength; Range range = new Range(startIndex, endIndex); if (!range.isEmpty() && (parentRanges.size() <= 0 || excludeAncestors(range, tokenType))) { // wasn't stripped out, set it // see if it contains abbreviations, we color them differently from text if (abbreviationsPattern == null) { abbreviationsPattern = Pattern.compile(abbreviationsRegEx); } String nodeText = node.getText();// currentString.substring(startIndex, endIndex); Matcher m = abbreviationsPattern.matcher(nodeText); int lastPos = startIndex; while (m.find()) { //String found = m.group(); int foundStart = startIndex + m.start(0); int foundEnd = startIndex + m.end(0); if (lastPos < foundStart) { range = new Range(lastPos, foundStart); tokens.add(new LexerToken(range, tokenType)); } if (foundStart < foundEnd) { range = new Range(foundStart, foundEnd); tokens.add(new LexerToken(range, abbreviationType)); } lastPos = foundEnd; } if (lastPos < endIndex) { range = new Range(lastPos, endIndex); tokens.add(new LexerToken(range, tokenType)); } //System.out.print("adding " + tokenType + " for [" + range.getStart() + ", " + range.getEnd() + ")\n"); } } public void visit(SimpleNode node) { switch (node.getType()) { case HRule: addToken(node, HRULE); break; case Apostrophe: case Ellipsis: case Emdash: case Endash: addToken(node, SMARTS); break; case Linebreak: case Nbsp: break; } } public void visit(SuperNode node) { visitChildren(node); } public void visit(ParaNode node) { visitChildren(node); } public void visit(Node node) { addToken(node, TokenType.ERROR_ELEMENT); } public void visit(SpecialTextNode node) { if ((node.getEndIndex() - node.getStartIndex() > 1)) addToken(node, SPECIAL_TEXT); else visit((TextNode) node); // so that it is handled in TextNode manner } protected void splitOutMarker(StrongEmphSuperNode node, IElementType markerType) { String marker = node.getChars();// != null ? node.getChars() : "~~"; int markerLength = marker.length(); ArrayList<Node> children = new ArrayList<Node>(1); children.add(node); SuperNode parentNode = new SuperNode(children); parentNode.setStartIndex(node.getStartIndex()); parentNode.setEndIndex(node.getEndIndex()); // now need to truncate children to this range limitChildrensRange(parentNode, node.getStartIndex() + markerLength, node.getEndIndex() - (node.isClosed() ? markerLength : 0)); addTokenWithChildren(parentNode, markerType); } /** * split out the lead-in and terminating sequence into faked parent node * and add new strikethrough_marker token type for the lead and trail chars with child text node * taking on the strikethrough attribute. Then we can color the lead-in and terminating chars separately. */ public void visit(StrikeNode node) { if (!recursingStrike) { recursingStrike = true; splitOutMarker(node, STRIKETHROUGH_MARKER); recursingStrike = false; } else { addTokenWithChildren(node, STRIKETHROUGH); } } /** * split out the lead-in and terminating sequence into faked parent node * and add new bold_marker and italic_marker token types for the lead and trail chars with child text node * taking on the BOLD and ITALIC tokens. So we can color the lead-in and terminating chars separately. */ public void visit(StrongEmphSuperNode node) { if (node.isClosed()) { IElementType parentTokenType = node.isStrong() ? BOLD_MARKER : ITALIC_MARKER; IElementType tokenType = node.isStrong() ? BOLD : ITALIC; if (tokenType == BOLD && !recursingBold || tokenType == ITALIC && !recursingItalic) { if (tokenType == BOLD) recursingBold = true; else recursingItalic = true; splitOutMarker(node, parentTokenType); if (tokenType == BOLD) recursingBold = false; else recursingItalic = false; } else { addTokenWithChildren(node, tokenType); } } else { // not closed, ignore visitChildren(node); } } protected Node getLastChild(SuperNode node) { Node lastChild = null; for (; ; ) { List<Node> children = node.getChildren(); int size = children.size(); if (size <= 0) break; lastChild = children.get(size - 1); if (!(lastChild instanceof SuperNode)) break; node = (SuperNode) lastChild; } return lastChild; } protected void limitChildrensRange(SuperNode parentNode, int startIndex, int endIndex) { for (Node node : parentNode.getChildren()) { if (node.getStartIndex() < startIndex) ((AbstractNode) node).setStartIndex(startIndex); if (node.getEndIndex() > endIndex) ((AbstractNode) node).setEndIndex(endIndex); if (node instanceof SuperNode) limitChildrensRange((SuperNode) node, startIndex, endIndex); } } public void visit(ExpImageNode node) { addTokenWithChildren(node, IMAGE); } public void visit(ExpLinkNode node) { addTokenWithChildren(node, EXPLICIT_LINK); } public void visit(final RefLinkNode node) { addTokenWithChildren(node, REFERENCE_LINK); } public void visit(ReferenceNode node) { addTokenWithChildren(node, REFERENCE); } public void visit(RefImageNode node) { addTokenWithChildren(node, REFERENCE_IMAGE); } public void visit(AutoLinkNode node) { addToken(node, AUTO_LINK); } public void visit(MailLinkNode node) { addToken(node, MAIL_LINK); } public void visit(HeaderNode node) { //visitChildren(node); switch (node.getLevel()) { case 1: addTokenWithChildren(node, node.isSetext() ? SETEXT_HEADER_LEVEL_1 : HEADER_LEVEL_1); break; case 2: addTokenWithChildren(node, node.isSetext() ? SETEXT_HEADER_LEVEL_2 : HEADER_LEVEL_2); break; case 3: addTokenWithChildren(node, HEADER_LEVEL_3); break; case 4: addTokenWithChildren(node, HEADER_LEVEL_4); break; case 5: addTokenWithChildren(node, HEADER_LEVEL_5); break; case 6: addTokenWithChildren(node, HEADER_LEVEL_6); break; } } public void visit(CodeNode node) { addToken(node, CODE); } public void visit(VerbatimNode node) { addToken(node, VERBATIM); } public void visit(WikiLinkNode node) { String text = node.getText(); int pos = 0; if ((pos = text.indexOf("|")) >= 0) { addToken(node.getStartIndex(), node.getStartIndex() + 2, WIKI_LINK_OPEN); if (githubWikiLinks) { int anchorPos = text.indexOf('#', pos + 1); addToken(node.getStartIndex() + 2, node.getStartIndex() + 2 + pos, WIKI_LINK_TEXT); addToken(node.getStartIndex() + 2 + pos, node.getStartIndex() + 2 + pos + 1, WIKI_LINK_SEPARATOR); if (anchorPos >= 0) { addToken(node.getStartIndex() + 2 + pos + 1, node.getStartIndex() + 2 + anchorPos, WIKI_LINK_REF); addToken(node.getStartIndex() + 2 + anchorPos, node.getStartIndex() + 2 + anchorPos + 1, WIKI_LINK_REF_ANCHOR_MARKER); addToken(node.getStartIndex() + 2 + anchorPos + 1, node.getEndIndex() - 2, WIKI_LINK_REF_ANCHOR); } else { addToken(node.getStartIndex() + 2 + pos + 1, node.getEndIndex() - 2, WIKI_LINK_REF); } addToken(node.getEndIndex() - 2, node.getEndIndex(), WIKI_LINK_CLOSE); } else { int anchorPos = text.indexOf('#'); if (anchorPos > pos) anchorPos = -1; if (anchorPos >= 0) { addToken(node.getStartIndex() + 2, node.getStartIndex() + 2 + anchorPos, WIKI_LINK_REF); addToken(node.getStartIndex() + 2 + anchorPos, node.getStartIndex() + 2 + anchorPos + 1, WIKI_LINK_REF_ANCHOR_MARKER); addToken(node.getStartIndex() + 2 + anchorPos + 1, node.getStartIndex() + 2 + pos, WIKI_LINK_REF_ANCHOR); } else { addToken(node.getStartIndex() + 2, node.getStartIndex() + 2 + pos, WIKI_LINK_REF); } addToken(node.getStartIndex() + 2 + pos, node.getStartIndex() + 2 + pos + 1, WIKI_LINK_SEPARATOR); addToken(node.getStartIndex() + 2 + pos + 1, node.getEndIndex() - 2, WIKI_LINK_TEXT); addToken(node.getEndIndex() - 2, node.getEndIndex(), WIKI_LINK_CLOSE); } } else { addToken(node.getStartIndex(), node.getStartIndex() + 2, WIKI_LINK_OPEN); int anchorPos = text.indexOf('#'); if (anchorPos >= 0) { addToken(node.getStartIndex() + 2, node.getStartIndex() + 2 + anchorPos, WIKI_LINK_REF); addToken(node.getStartIndex() + 2 + anchorPos, node.getStartIndex() + 2 + anchorPos + 1, WIKI_LINK_REF_ANCHOR_MARKER); addToken(node.getStartIndex() + 2 + anchorPos + 1, node.getEndIndex() - 2, WIKI_LINK_REF_ANCHOR); } else { addToken(node.getStartIndex() + 2, node.getEndIndex() - 2, WIKI_LINK_REF); } addToken(node.getEndIndex() - 2, node.getEndIndex(), WIKI_LINK_CLOSE); } } public void visit(QuotedNode node) { addTokenWithChildren(node, QUOTE); } public void visit(BlockQuoteNode node) { // here some children will punch out the block quote's > because they span more than one line and know nothing // of indentations. So we have to punch out holes of every child's new line that starts with > and optional space addTokenWithChildren(node, BLOCK_QUOTE); } public void visit(BulletListNode node) { addTokenWithChildren(node, BULLET_LIST); } public void visit(OrderedListNode node) { addTokenWithChildren(node, ORDERED_LIST); } public void visit(ListItemNode node) { if (node instanceof TaskListNode) { SuperNode newNode = new SuperNode(); TaskListNode taskListNode = (TaskListNode) node; // marker is only the marker characters newNode.setStartIndex(taskListNode.getStartIndex() - taskListNode.getTaskListMarker().length()); newNode.setEndIndex(newNode.getStartIndex() + 3); // new node is all the text following addTokenWithChildren(newNode, (taskListNode.isDone() ? TASK_DONE_ITEM_MARKER : TASK_ITEM_MARKER)); //addTokenWithChildren(node, (taskListNode.isDone() ? TASK_DONE_ITEM : TASK_ITEM)); addTokenWithChildren(node, LIST_ITEM); } else { addTokenWithChildren(node, LIST_ITEM); } } public void visit(DefinitionListNode node) { addTokenWithChildren(node, DEFINITION_LIST); } public void visit(DefinitionNode node) { addTokenWithChildren(node, DEFINITION); } public void visit(DefinitionTermNode node) { addTokenWithChildren(node, DEFINITION_TERM); } public void visit(TableNode node) { tableRows = 0; addTokenWithChildren(node, TABLE); } public void visit(TableBodyNode node) { addTokenWithChildren(node, TABLE_BODY); } public void visit(TableCellNode node) { rowColumns++; addTokenWithChildren(node, (tableRows & 1) != 0 ? ((rowColumns & 1) != 0 ? TABLE_CELL_RODD_CODD : TABLE_CELL_RODD_CEVEN) : ((rowColumns & 1) != 0 ? TABLE_CELL_REVEN_CODD : TABLE_CELL_REVEN_CEVEN)); } // Not called, TableColumnNode is only used as part of TableNode.getColumns() public void visit(TableColumnNode node) { addTokenWithChildren(node, TABLE_COLUMN); } public void visit(TableHeaderNode node) { addTokenWithChildren(node, TABLE_HEADER); } public void visit(TableRowNode node) { tableRows++; rowColumns = 0; addTokenWithChildren(node, (tableRows & 1) != 0 ? TABLE_ROW_ODD : TABLE_ROW_EVEN); } public void visit(TableCaptionNode node) { addTokenWithChildren(node, TABLE_CAPTION); } abstract class NodeFactory { abstract public TextNode newNode(String text); } class CommentNode extends TextNode { public CommentNode(String text) { super(text); } } class WikiPageRefNode extends TextNode { public WikiPageRefNode(String text) { super(text); } } class WikiPageTitleNode extends TextNode { public WikiPageTitleNode(String text) { super(text); } } protected boolean extractHtmlComments(TextNode node, NodeFactory factory) { String html = node.getText(); Pattern p = Pattern.compile("(\"|\'|<!--|-->)", Pattern.CASE_INSENSITIVE); ArrayList<TextNode> nodes = null; Matcher m = p.matcher(html); TextNode textNode; CommentNode commentNode; int lastPos = 0; boolean inDQuotes = false; boolean inSQuotes = false; int startComment = -1; int endPos; while (m.find()) { String found = m.group(); if (inSQuotes && !found.equals("'")) continue; if (inDQuotes && !found.equals("\"")) continue; if (startComment >= 0 && !found.equals("-->")) continue; if (found.equals("\"")) { inDQuotes = !inDQuotes; } else if (found.equals("'")) { inSQuotes = !inSQuotes; } else if (found.equals("<!--")) { startComment = m.start(0); } else if (startComment >= 0 && found.equals("-->")) { // have a comment if (nodes == null) nodes = new ArrayList<TextNode>(10); endPos = startComment; if (lastPos < endPos) { textNode = factory.newNode(html.substring(lastPos, endPos)); textNode.setStartIndex(node.getStartIndex() + lastPos); textNode.setEndIndex(node.getStartIndex() + endPos); nodes.add(textNode); } lastPos = startComment; endPos = m.end(0); commentNode = new CommentNode(html.substring(lastPos, endPos)); commentNode.setStartIndex(node.getStartIndex() + lastPos); commentNode.setEndIndex(node.getStartIndex() + endPos); nodes.add(commentNode); lastPos = endPos; startComment = -1; } } if (nodes != null) { endPos = html.length(); if (lastPos < endPos) { textNode = factory.newNode(html.substring(lastPos, endPos)); textNode.setStartIndex(node.getStartIndex() + lastPos); textNode.setEndIndex(node.getStartIndex() + endPos); nodes.add(textNode); } for (Node node1 : nodes) { node1.accept(this); } return true; } return false; } public void visit(HtmlBlockNode node) { if (node.getChildren().size() > 1 || !extractHtmlComments(node, new NodeFactory() { @Override public TextNode newNode(String text) { return new HtmlBlockNode(text); } })) addToken(node, HTML_BLOCK); } public void visit(InlineHtmlNode node) { if (node.getChildren().size() > 1 || !extractHtmlComments(node, new NodeFactory() { @Override public TextNode newNode(String text) { return new InlineHtmlNode(text); } })) addToken(node, INLINE_HTML); } public void visit(AbbreviationNode node) { addTokenWithChildren(node, ABBREVIATION); } public void visit(AnchorLinkNode node) { addToken(node, ANCHOR_LINK); } protected void visitChildren(SuperNode node) { visitChildren(node.getChildren()); } protected void visitChildren(List<Node> children) { // here we combine multiple segments of TextNode and SpecialText into a single TextNode int startIndex = 0, endIndex = 0; String combinedText = null; Node lastTextNode = null; for (Node child : children) { boolean processed = false; if (child.getClass() == TextNode.class || (child.getClass() == SpecialTextNode.class && child.getEndIndex() - child.getStartIndex() <= 1)) { if (combinedText != null) { // combine range and text, if possible if (endIndex == child.getStartIndex()) { // combine endIndex = child.getEndIndex(); combinedText += ((TextNode) child).getText(); lastTextNode = null; processed = true; } else { // insert collected up to now if (lastTextNode != null) { lastTextNode.accept(this); lastTextNode = null; } else { TextNode newNode = new TextNode(combinedText); newNode.setStartIndex(startIndex); newNode.setEndIndex(endIndex); newNode.accept(this); } combinedText = null; } } if (combinedText == null) { startIndex = child.getStartIndex(); endIndex = child.getEndIndex(); combinedText = ((TextNode) child).getText(); lastTextNode = child; processed = true; } } if (!processed) { if (combinedText != null) { // process accumulated to date if (lastTextNode != null) { lastTextNode.accept(this); } else { TextNode newNode = new TextNode(combinedText); newNode.setStartIndex(startIndex); newNode.setEndIndex(endIndex); newNode.accept(this); } combinedText = null; lastTextNode = null; } child.accept(this); } } if (combinedText != null) { // process the last combined if (lastTextNode != null) { lastTextNode.accept(this); } else { TextNode newNode = new TextNode(combinedText); newNode.setStartIndex(startIndex); newNode.setEndIndex(endIndex); newNode.accept(this); } } } protected boolean excludeAncestors(Range range, IElementType type) { for (SegmentedRange parentRange : parentRanges) { if (parentRange != null && parentRange.isExcludedBy(type)) { //System.out.println("Excluding parent " + parentRange + " by " + type + " " + range); parentRange.exclude(range); //System.out.println(" Excluded parent " + parentRange + " by " + type + " " + range + "\n"); } } return true; } protected boolean excludeAncestors(SegmentedRange segmentedRange) { for (SegmentedRange parentRange : parentRanges) { if (parentRange != null && parentRange.isExcludedBy(segmentedRange.getTokenType())) { //System.out.println("Excluding parent " + parentRange + " by " + segmentedRange); parentRange.exclude(segmentedRange); //System.out.println(" Excluded parent " + parentRange + " by " + segmentedRange + "\n"); } } return true; } protected void addSplitCombinations() { if (parentRanges.size() > 0) { SegmentedRange segmentedRange = parentRanges.get(parentRanges.size() - 1); IElementType tokenType = segmentedRange.getTokenType(); //System.out.println("split combos " + segmentedRange.toString()); if (combinationSplits.containsKey(tokenType)) { SegmentedRange combinationRange = null; // here we have to combine bold and italic into bolditalic, etc of our parent ranges for (SegmentedRange parentRange : parentRanges) { if (tokenType != parentRange.getTokenType() && !parentRange.isEmpty() && combinationSplits.containsKey(tokenType) && combinationSplits.get(tokenType).containsKey(parentRange.getTokenType())) { // we will create an intersection and make it punch through parents and this range tokenType = combinationSplits.get(tokenType).get(parentRange.getTokenType()); SegmentedRange splitRange = new SegmentedRange(); splitRange.setTokenType(tokenType); splitRange.addIntersections(segmentedRange, parentRange); //System.out.println("Add intersections of " + segmentedRange + " by " + parentRange + " = " + splitRange); segmentedRange = combinationRange = splitRange; if (segmentedRange.isEmpty()) break; // continue to make combinations with other parents } } if (combinationRange != null && !combinationRange.isEmpty()) pushRange(combinationRange); } } } // to overcome the problem that a parent has a wider range than the child selectively // punch out the child's range from the parent's so that we can eliminate parent's highlighting // on the child text protected void addTokenWithChildren(Node node, IElementType tokenType) { addTokenWithChildren(node, tokenType, node.getChildren()); } protected void addTokenWithChildren(Node node, IElementType tokenType, List<Node> children) { addTokenWithChildren(node.getStartIndex(), node.getEndIndex(), tokenType, children); } protected void addTokenWithChildren(int startIndex, int endIndex, IElementType tokenType, List<Node> children) { //if (node.getEndIndex() > currentStringLength) { // ((SuperNode) node).setEndIndex(currentStringLength); // if (node.getStartIndex() >= node.getEndIndex()) { // return; // } //} int entryStackLevel = parentRanges.size(); Range range = new Range(startIndex, endIndex); //System.out.println("addTokenWithChildren " + tokenType + range); // compensate for missing EOL at end of input causes pegdown to return a range past end of input // in this case IDEA ignores the range. :( if (range.getEnd() > currentStringLength) range.setEnd(currentStringLength); if (!range.isEmpty()) { pushRange(range, tokenType); //System.out.println(" addTokenWithChildren:pushed(" + tokenType + range + ")[" + parentRanges.size() + "]"); int prevMinStackLevel = minStackLevel; int stackLevel = parentRanges.size(); minStackLevel = stackLevel; // add split combinations to the parent stack, then children addSplitCombinations(); //if (parentRanges.size() < stackLevel) { // int tmp = 0; //} visitChildren(children); minStackLevel = prevMinStackLevel; // leave self on stack so it gets punched out by combinations, pop combination, we will now add them //if (parentRanges.size() - stackLevel < 0) { // int tmp = 0; //} ArrayList<SegmentedRange> fakeRanges = new ArrayList<SegmentedRange>(parentRanges.size() - stackLevel); while (stackLevel < parentRanges.size()) fakeRanges.add(popRange()); for (SegmentedRange segmentedRange : fakeRanges) { addSegmentedToken(segmentedRange, true); } //System.out.println(" addTokenWithChildren:poping[" + parentRanges.size() + "]"); SegmentedRange segmentedRange = popRange(); //System.out.println(" addTokenWithChildren:poped(" + segmentedRange + ")[" + parentRanges.size() + "]"); addSegmentedToken(segmentedRange, true); } //if (entryStackLevel != parentRanges.size()) { // int tmp = 0; // //assert false; //} } protected void addSegmentedToken(SegmentedRange segmentedRange, boolean excludeAncestors) { IElementType tokenType = segmentedRange.getTokenType(); boolean renderRange = !excludedTokenTypes.contains(tokenType); if (parentRanges.size() <= 0) excludeAncestors = false; for (Range range : segmentedRange.getSegments()) { // now exclude from ancestors what is left by the children if (!excludeAncestors || excludeAncestors(range, tokenType)) { // wasn't stripped out, set it if (renderRange) { tokens.add(new LexerToken(range, tokenType, parentRanges.size())); //System.out.print("adding " + tokenType + " for [" + range.getStart() + ", " + range.getEnd() + ")\n"); } } } } protected void addToken(Node node, IElementType tokenType) { addToken(node.getStartIndex(), node.getEndIndex(), tokenType); } protected void addToken(Range range, IElementType tokenType) { addToken(range.getStart(), range.getEnd(), tokenType); } protected void addToken(int startIndex, int endIndex, IElementType tokenType) { //if (tokenType == QUOTE) { // int tmp = 0; //} // compensate for missing EOL at end of input causes pegdown to return a range past end of input // in this case IDEA ignores the range. :( if (endIndex > currentStringLength) endIndex = currentStringLength; Range range = new Range(startIndex, endIndex); if (!range.isEmpty() && (parentRanges.size() <= 0 || excludeAncestors(range, tokenType))) { // wasn't stripped out, set it tokens.add(new LexerToken(range, tokenType)); //System.out.print("adding " + tokenType + " for [" + range.getStart() + ", " + range.getEnd() + ")\n"); } } } /** * Helper Classes */ static class Range { protected int start; protected int end; public int getStart() { return start; } public void setStart(int start) { this.start = start; } public int getEnd() { return end; } public void setEnd(int end) { this.end = end; } public Range(int start, int end) { this.start = start; this.end = end; } public Range(Range that) { this.start = that.start; this.end = that.end; } public boolean doesNotOverlap(Range that) { return that.end <= start || that.start >= end; } public boolean doesOverlap(Range that) { return !(that.end <= start || that.start >= end); } public boolean isEqual(Range that) { return end == that.end && start == that.start; } public boolean doesContain(Range that) { return end >= that.end && start <= that.start; } public boolean doesProperlyContain(Range that) { return end > that.end && start < that.start; } public boolean isEmpty() { return start >= end; } public boolean intersect(Range that) { if (start < that.start) start = that.start; if (end > that.end) end = that.end; if (start >= end) start = end = 0; return !isEmpty(); } public boolean exclude(Range that) { // lets make sure we don't need to split into 2 ranges //assert (doesOverlap(that) && !doesProperlyContain(that)); if (start >= that.start && start < that.end) start = that.end; if (end <= that.end && end > that.start) end = that.start; if (start >= end) start = end = 0; return !isEmpty(); } public int compare(Range that) { if (this.start < that.start) { return -1; } else if (this.start > that.start) { return 1; } else if (this.end > that.end) { return -1; } else if (this.end < that.end) { return 1; } return 0; } @Override public String toString() { return "[" + start + ", " + end + ")"; } public boolean isAdjacent(Range that) { return this.start == that.end || this.end == that.start; } public void expandToInclude(Range that) { if (this.start > that.start) this.start = that.start; if (this.end < that.end) this.end = that.end; } } static class SegmentedRange { protected ArrayList<Range> segments; protected IElementType tokenType; public ArrayList<Range> getSegments() { return segments; } public IElementType getTokenType() { return tokenType; } public void setTokenType(IElementType tokenType) { this.tokenType = tokenType; } public boolean isEmpty() { return segments.isEmpty(); } SegmentedRange() { segments = new ArrayList<Range>(); } SegmentedRange(int start, int end) { segments = new ArrayList<Range>(1); segments.add(0, new Range(start, end)); } SegmentedRange(Range range) { segments = new ArrayList<Range>(1); segments.add(0, range); } SegmentedRange(ArrayList<Range> ranges) { segments = new ArrayList<Range>(ranges); } SegmentedRange(SegmentedRange that) { segments = new ArrayList<Range>(that.segments); } public boolean doesContain(Range range) { for (Range range1 : segments) { if (range1.doesContain(range)) return true; } return false; } public void addIntersections(Range range, SegmentedRange segmentedRange) { for (Range range1 : segmentedRange.getSegments()) { if (range.doesOverlap(range1)) { Range newRange = new Range(range); newRange.intersect(range1); segments.add(newRange); } } } public void addIntersections(SegmentedRange segmentedRange1, SegmentedRange segmentedRange2) { for (Range range1 : segmentedRange1.getSegments()) { for (Range range2 : segmentedRange2.getSegments()) { if (range1.doesOverlap(range2)) { Range newRange = new Range(range1); newRange.intersect(range2); segments.add(newRange); } } } } public SegmentedRange exclude(SegmentedRange segmentedRange) { for (Range range : segmentedRange.getSegments()) { exclude(range); } return this; } public SegmentedRange exclude(Range range) { int i, iMax = segments.size(); for (i = 0; i < iMax; i++) { Range range1 = segments.get(i); if (range1.doesOverlap(range)) { if (range1.doesContain(range)) { if (range1.doesProperlyContain(range)) { // split range1 into 2 and add the new one Range newRange1 = new Range(range1); Range newRange2 = new Range(range1); newRange1.setEnd(range.getStart()); newRange2.setStart(range.getEnd()); segments.set(i, newRange1); i++; segments.add(i, newRange2); iMax++; } else { if (range1.isEqual(range)) { // remove, they are the same segments.remove(i); i--; iMax--; } else { // truncate range1 and replace Range newRange1 = new Range(range1); newRange1.exclude(range); segments.set(i, newRange1); } } } else if (range.doesContain(range1)) { // delete it segments.remove(i); i--; iMax--; } else { // they overlap but neither contains the other // truncate range1 and replace Range newRange1 = new Range(range1); newRange1.exclude(range); segments.set(i, newRange1); } } } return this; } protected boolean isExcludedBy(IElementType child) { return !isExcluded(tokenType, child); } @Override public String toString() { String out = "" + tokenType + " "; if (!isEmpty()) for (Range range : segments) out += range.toString(); else out += "<empty>"; return out; } } /* static class SplitSet { private IElementType[] elementSet; private IElementType resultingType; public SplitSet(IElementType[] elementSet, IElementType resultingType) { this.elementSet = elementSet; this.resultingType = resultingType; } public boolean hasExactSet(ArrayList<IElementType> elementSet) { for (IElementType elementType : this.elementSet) { boolean hadElement = false; for (IElementType elementType1 : elementSet) { if (elementType == elementType1) { hadElement = true; break; } } if (!hadElement) return false; } for (IElementType elementType : elementSet) { boolean hadElement = false; for (IElementType elementType1 : this.elementSet) { if (elementType == elementType1) { hadElement = true; break; } } if (!hadElement) return false; } return true; } public boolean hasExactSet(IElementType... elementSet) { for (IElementType elementType : this.elementSet) { boolean hadElement = false; for (IElementType elementType1 : elementSet) { if (elementType == elementType1) { hadElement = true; break; } } if (!hadElement) return false; } for (IElementType elementType : elementSet) { boolean hadElement = false; for (IElementType elementType1 : this.elementSet) { if (elementType == elementType1) { hadElement = true; break; } } if (!hadElement) return false; } return true; } public boolean containsElement(IElementType elementType) { boolean hadElement = false; for (IElementType elementType1 : elementSet) { if (elementType == elementType1) { hadElement = true; break; } } return hadElement; } } */ }