/** * Copyright (c) 2011, 2012 Cloudsmith Inc. and other contributors, as listed below. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Cloudsmith * */ package org.cloudsmith.geppetto.pp.dsl.ppdoc; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.cloudsmith.geppetto.pp.dsl.services.PPGrammarAccess; import org.eclipse.xtext.IGrammarAccess; import org.eclipse.xtext.nodemodel.INode; import com.google.common.collect.Lists; import com.google.inject.Inject; /** * Provides parsing of puppet documentation. * Supports the following RDoc constructs * <ul> * <li>headings, =heading1, ==heading2, to =====heading5. Additional = treated same as heading5</li> * <li>bold, <b>*bold*</b></li> * <li>italic, <i>_italic_</i></li> * <li>fixed/code, <tt>+fixed+</tt></li> * <li>stop doc mode, -- * <li>start doc mode, ++ * <li>preformatted, indent more than <i>natural margin</i> (i.e. 2 spaces or one)</li> * <li>lists with hanging indents, starting with '*', '-' '<digit>.' or '<letter>.'</li> * <li>definitions with hanging idents - [label] or label::</li> * </ul> * <p> * Note that parsing is simplistic and combinations of bold/italic/fixed will be shown in the "outermost" style i.e. * *_text_* is shown with only bold, and not (bold+italic). (Somewhat untested, so there could be some surprising * results). Also unsupported is the use of bold/italic/fixed in headers. * </p> * <p> * The parser produces a list of {@link DocNode} instances, where each node holds a sequence of the text, an offset (in * the text document), a length, and a style int. The result also contains the "comment parts" (i.e. '/' '*' '#' and * whitespace) that are not considered part of the documentation text. These nodes have the style HIDDEN. To get only * the documentation text, simply concatenate all non HIDDEN nodes. Node text contains NL. * </p> */ public class PPDocumentationParser { public static class DocNode { int offset; int length; int style; String text; public DocNode(int offset, int length, int style, String text) { this.offset = offset; this.length = length; this.style = style; this.text = text; } public int getLength() { return length; } public int getOffset() { return offset; } public int getStyle() { return style; } public String getText() { return text; } } private PPGrammarAccess ga; /** Style for comment characters and whitespace (i.e. not part of documentation). */ public static final int HIDDEN = 0x0; /** Regular documentation text */ public static final int PLAIN = 0x1; /** preformatted/verbatim text */ public static final int VERBATIM = 0x2; /** documentation comments between -- and ++ lines */ public static final int COMMENT = 0x3; // a comment inside the doc public static final int HEADING_1 = 0x10; public static final int HEADING_2 = 0x20; public static final int HEADING_3 = 0x30; public static final int HEADING_4 = 0x40; public static final int HEADING_5 = 0x50; /** bold span */ public static final int BOLD = 0x100; /** italic span */ public static final int ITALIC = 0x200; /** span of same type as preformatted/code/fixed */ public static final int FIXED = 0x400; /** Indicates a style (BOLD, ITALIC, FIXED) should be turned on */ public static final int ON = 0x1000; /** Indicates a style (BOLD, ITALIC, FIXED) should be turned off */ public static final int OFF = 0x2000; public static final Pattern nlPattern = Pattern.compile("\n"); public static final Pattern leadingWs = Pattern.compile("^(\\s*\\*)?(.*)", Pattern.DOTALL); public static final Pattern mlCommentTail = Pattern.compile("\\s*?\\*/\\s*$", Pattern.DOTALL); public static final Pattern headingPattern = Pattern.compile("^\\s{0,1}(=+).*", Pattern.DOTALL); public static final Pattern stylePattern = Pattern.compile("(?:\\*.*?\\*)|(?:_.*?_)|(?:\\+.*?\\+)"); public static final Pattern stopDocPattern = Pattern.compile("^\\s{0,1}\\-\\-\\s*", Pattern.DOTALL); public static final Pattern startDocPattern = Pattern.compile("^\\s{0,1}\\+\\+\\s*", Pattern.DOTALL); public static final Pattern hangingIndent = Pattern.compile("\\s*(\\*|\\-|[a-z0-9]\\.|\\[.*?\\]|.*?::)\\s*"); public static final Pattern blankLine = Pattern.compile("\\s*", Pattern.DOTALL); private int naturalMargin = 1; private int currentIndent = naturalMargin; private List<Integer> indentStack = new ArrayList<Integer>(); @Inject PPDocumentationParser(IGrammarAccess ga) { this.ga = (PPGrammarAccess) ga; } private void addStyleNodes(List<DocNode> result, int offset, String line) { // blank lines have no effect on indentation, and are always styled as plain if(blankLine.matcher(line).matches()) { result.add(new DocNode(offset, line.length(), PLAIN, line)); return; } int indent = indentation(line); if(indent < currentIndent) popIndentTo(indent); if(indent > currentIndent) { // // if line starts with more than a single space (or tab), it is verbatim // if(line.startsWith(" ") || line.startsWith(" \t") || line.startsWith("\t")) { result.add(new DocNode(offset, line.length(), VERBATIM, line)); return; } Matcher mr = hangingIndent.matcher(line); if(mr.lookingAt()) pushIndent(mr.end()); mr = headingPattern.matcher(line); if(mr.find()) { int headingLevel = mr.group(1).length(); int style = Math.min(HEADING_1 * headingLevel, HEADING_5); result.add(new DocNode(offset, line.length(), style, line)); // ignore fixed, italic and bold in headings for now popIndentTo(0); // pops to natural margin return; } mr = stylePattern.matcher(line); boolean match = mr.find(); while(match) { StringBuffer matched = new StringBuffer(); mr.appendReplacement(matched, ""); // before the style int matchLength = matched.length(); if(matchLength > 0) { result.add(new DocNode(offset, matchLength, PLAIN, matched.toString())); offset += matchLength; } String styled = mr.group(); int style = 0; if(styled.startsWith("*")) style = BOLD; else if(styled.startsWith("_")) style = ITALIC; else if(styled.startsWith("+")) style = FIXED; result.add(new DocNode(offset, styled.length(), style, styled)); offset += styled.length(); match = mr.find(); } StringBuffer matched = new StringBuffer(); mr.appendTail(matched); int matchLength = matched.length(); if(matchLength > 0) result.add(new DocNode(offset, matchLength, PLAIN, matched.toString())); } /** * Returns the number of space characters at the start of the string. * * @param line * @return */ private int indentation(String line) { int i = 0; for(; i < line.length(); i++) if(line.charAt(i) != ' ') return i; return i; } public List<DocNode> parse(List<INode> nodes) { naturalMargin = 1; indentStack.clear(); pushIndent(naturalMargin); if(nodes.size() > 0) { if(nodes.size() == 1 && nodes.get(0).getGrammarElement() == ga.getML_COMMENTRule()) return processMLComment(nodes.get(0)); return processSLSequence(nodes); } return Collections.emptyList(); } private int popIndentTo(int indent) { for(int i = indentStack.size() - 1; i > 0; i--) { if(indentStack.get(i) > indent) indentStack.remove(i); else break; } currentIndent = indentStack.get(indentStack.size() - 1); return currentIndent; } private List<DocNode> processMLComment(INode node) { List<DocNode> result = Lists.newArrayList(); String allText = node.getText(); int start = allText.indexOf("/*"); result.add(new DocNode(node.getOffset(), start + 2, HIDDEN, allText.substring(0, start + 2))); int offset = node.getOffset() + start + 2; Matcher matcher = mlCommentTail.matcher(allText); matcher.find(); int tail = matcher.start(); final DocNode endNode = new DocNode( node.getOffset() + tail, allText.length() - tail, HIDDEN, allText.substring(tail, allText.length())); allText = allText.substring(start + 2, tail); int commentLevel = 0; String[] lines = nlPattern.split(allText, 0); for(int i = 0; i < lines.length; i++) { String line = lines[i]; if(lines.length > 1 && i < lines.length - 1) line = line + "\n"; // must add the newline back (needed in the resulting nodes) Matcher mr = leadingWs.matcher(line); String leading = ""; String remainder = line; if(mr.matches()) { leading = mr.group(1); if(leading != null && leading.length() > 0) { result.add(new DocNode(offset, leading.length(), HIDDEN, leading)); offset += leading.length(); remainder = mr.group(2); } } // Input between "--" and "++" are comments in the doc if(startDocPattern.matcher(remainder).matches()) { commentLevel = Math.max(commentLevel - 1, 0); result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); offset += remainder.length(); continue; } if(stopDocPattern.matcher(remainder).matches()) { commentLevel++; result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); offset += remainder.length(); continue; } if(commentLevel > 0) result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); else addStyleNodes(result, offset, remainder); offset += remainder.length(); } result.add(endNode); return result; } private List<DocNode> processSLSequence(List<INode> nodes) { List<DocNode> result = Lists.newArrayList(); for(INode node : nodes) { String allText = node.getText(); result.add(new DocNode(node.getOffset(), 1, HIDDEN, allText.substring(0, 1))); int offset = node.getOffset() + 1; String line = allText.substring(1); int commentLevel = 0; Matcher mr = leadingWs.matcher(line); String leading = ""; String remainder = line; if(mr.matches()) { leading = mr.group(1); if(leading != null && leading.length() > 0) { result.add(new DocNode(offset, leading.length(), HIDDEN, leading)); offset += leading.length(); remainder = mr.group(2); } } // Input between "--" and "++" are comments in the doc if(startDocPattern.matcher(remainder).matches()) { commentLevel = Math.max(commentLevel - 1, 0); result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); offset += remainder.length(); continue; } if(stopDocPattern.matcher(remainder).matches()) { commentLevel++; result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); offset += remainder.length(); continue; } if(commentLevel > 0) result.add(new DocNode(offset, remainder.length(), COMMENT, remainder)); else addStyleNodes(result, offset, remainder); offset += remainder.length(); } return result; } private void pushIndent(int indent) { currentIndent = indent; indentStack.add(indent); } }