/**
* Copyright (c) 2012 Cloudsmith Inc. and other contributors, as listed below.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Cloudsmith
*
*/
package org.cloudsmith.xtext.dommodel.formatter.comments;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.cloudsmith.xtext.dommodel.formatter.context.IFormattingContext;
import org.cloudsmith.xtext.dommodel.formatter.css.Alignment;
import org.cloudsmith.xtext.textflow.CharSequences;
import org.cloudsmith.xtext.textflow.TextFlow;
import com.google.common.collect.Lists;
/**
* <p>
* This comment processor is used to parse and format comments spanning multiple lines. It is a low-level class that can be used to implement higher
* order comment formatting strategies.
* </p>
* <p>
* Comments may be a sequence of single line comments, or a multi line comment. When processing a sequence of single line comments, everything in the
* passed region to format must be comment text or whitespace - input in this example:
*
* <pre>
* # comment
* a = 10 # comment
* b = 20 # comment
* </pre>
*
* will result in something like:
*
* <pre>
* # comment
* # a = 10 # comment
* # b = 20 # comment
* # comment
* </pre>
* <p>
* <b>Usage:</b> An instance of the class is created with an ICommentContext that describes how the comment should be parsed. This context includes
* the relative starting position of the comment sequence. One of the classes implementing this interface may be used as a convenience. As an example,
* given the text:
* </p>
*
* <pre>
* {@code
* a = 10 /*
* * comment starts here
* * and continues here,
* * but is not aligned ok
* and some lines do not start with the repeating character
* ∗/
* }
* </pre>
*
* Is processed by providing the starting line offset 7. When formatting with the same context, the result is:
*
* <pre>
* {@code
* a = 10 /*
* * comment starts here
* * and continues here
* * but is not aligned ok
* * and some lines do not start with the repeating character
* ∗/
* }
* </pre>
*
* To relocate the comment, or output it in a different style, use a second {@link ICommentContainerInformation} in the format call.
* The same input example as before can be output to look like this:
*
* <pre>
* {@code
* a = 10 #
* # comment starts here
* # and continues here
* # but is not aligned ok
* # and some lines do not start with the repeating character
* #
* }
* </pre>
* <p>
* The comment processor has special processing of comment lines that consists of the same repeated character. All such lines are formatted without a
* left margin, and such lines longer than 5 characters are truncated instead of folded/wrapped when not fitting within the max constraints.
* </p>
* <p>
* It is possible to constrain trailing empty lines min/max.
* </p>
* <p>
* Note that the result will contain leading whitespace up to the position passed as the starting position. When appending the text to an existing
* flow already at this position, the method {@link CharSequences#trimLeft(CharSequence)} can be used to adjust the text to this position.
* </p>
* TODO: Needs to know about the ICommentFormatterAdvice !
*/
public class CommentProcessor {
public static class CommentFormattingOptions {
private int maxWidth;
private int minEmptyTrailing;
private int maxEmptyTrailing;
private boolean retainInline;
private ICommentFormatterAdvice advice;
public CommentFormattingOptions(ICommentFormatterAdvice advice, int maxWidth) {
this(advice, maxWidth, 1, 1, true);
}
public CommentFormattingOptions(ICommentFormatterAdvice advice, int maxWidth, int trailing) {
this(advice, maxWidth, trailing, trailing, true);
}
public CommentFormattingOptions(ICommentFormatterAdvice advice, int maxWidth, int minEmptyTrailing,
int maxEmptyTrailing) {
this(advice, maxWidth, minEmptyTrailing, maxEmptyTrailing, true);
}
public CommentFormattingOptions(ICommentFormatterAdvice advice, int maxWidth, int minEmptyTrailing,
int maxEmptyTrailing, boolean retainInline) {
this.maxWidth = maxWidth;
this.minEmptyTrailing = minEmptyTrailing;
this.maxEmptyTrailing = maxEmptyTrailing;
this.retainInline = retainInline;
this.advice = advice;
}
/**
* @return the maxEmptyTrailing
*/
public int getMaxEmptyTrailing() {
return maxEmptyTrailing;
}
/**
* @return the maxWidth of the formatted comment (including markers and margin)
*/
public int getMaxWidth() {
return maxWidth;
}
/**
* @return the minEmptyTrailing
*/
public int getMinEmptyTrailing() {
return minEmptyTrailing;
}
/**
* @return true if an inline comment should be retained on a single line
*/
public boolean isRetainInline() {
return retainInline;
}
}
public static class CommentText {
private CharSequence trailingContainerText;
private List<CharSequence> lines;
public CommentText(List<CharSequence> lines, CharSequence trailingContainerText) {
this.lines = lines;
this.trailingContainerText = trailingContainerText;
}
public List<CharSequence> getLines() {
return lines;
}
}
public CommentProcessor() {
}
protected TextFlow emit(CommentText commentText, ICommentContainerInformation out,
CommentFormattingOptions options, IFormattingContext formattingContext) {
final ICommentFormatterAdvice advice = options.advice;
final String lineSeparator = formattingContext.getLineSeparatorInformation().getLineSeparator();
final String endToken = out.getEndToken();
List<CharSequence> lines = commentText.lines;
TextFlow flow = new TextFlow(formattingContext);
// StringBuilder builder = new StringBuilder();
int indentSize = out.getLeftPosition();
int leftMarginSize = out.getLeftMargin();
if(Alignment.right == out.getMarkerColumnAlignment())
indentSize += out.getMarkerColumnWidth() - out.getRepeatingToken().length();
CharSequence indent = new CharSequences.Spaces(indentSize);
CharSequence leftMargin = new CharSequences.Spaces(leftMarginSize);
ensureTrailingLines(lines, options);
try {
// always process first line even if it is also the last
int limit = Math.max(1, lines.size() - 1);
boolean singleLine = lines.size() == 1;
for(int i = 0; i < limit; i++) {
// comment container
if(i == 0)
flow.append(CharSequences.spaces(out.getLeftPosition())).append(out.getStartToken());
else
flow.append(indent).append(out.getRepeatingToken());
CharSequence s = lines.get(i);
if(s.length() > 0) {
boolean hasBannerLength = s.length() > 4;
boolean alignSpecialLeft = advice.getAlignSpecialLinesLeft();
// Homogeneous lines should not have a leftMargin e.g. '#---' '********' unless advice says so
// anything starting with letter or digit, or that is not homogeneous has a leftMargin
if(Character.isLetterOrDigit(s.charAt(0)) //
||
!(CharSequences.isHomogeneous(s) && (hasBannerLength || alignSpecialLeft)))
flow.append(leftMargin);
flow.append(s);
// // Homogeneous lines should not have a leftMargin e.g. '#---' '********' unless advice says so
// // anything starting with letter or digit, or that is not homogeneous has a leftMargin
// if(Character.isLetterOrDigit(s.charAt(0)) || !advice.getAlignSpecialLinesLeft() ||
// !(CharSequences.isHomogeneous(s)))
// flow.append(leftMargin);
// flow.append(s);
}
if(!singleLine)
flow.append(lineSeparator);
}
// process last line
if(singleLine) {
// last line is the same as the first
if(endToken.length() > 0)
flow.append(" "); // space before end token (if one will be output)
}
else {
CharSequence s = lines.get(limit);
flow.append(indent);
if(s.length() > 0 || out.isSLStyle()) {
flow.append(out.getRepeatingToken());
if(s.length() > 0) {
if(Character.isLetterOrDigit(s.charAt(0)) || !CharSequences.isHomogeneous(s))
flow.append(leftMargin);
flow.append(s);
if(!out.isSLStyle())
flow.append(" "); // a ML comment may be followed by something
}
}
}
if(endToken.length() > 0)
flow.append(out.getEndToken());
// finally append trailing stuff
if(commentText.trailingContainerText.length() > 0)
flow.append(commentText.trailingContainerText);
}
catch(IOException e) {
// can't happen here, since the TextFlow uses a StringBuilder
// TODO: Actually - this is wrong, the API is open
}
return flow;
}
/**
* Surgically modify given list to conform to min/max trailing empty lines.
*
* @param lines
*/
private void ensureTrailingLines(List<CharSequence> lines, CommentFormattingOptions options) {
if(lines.size() == 1 && options.isRetainInline())
return; // do nothing for inline/same-line comments
int nbrEmpty = numberOfTrailingEmptyLines(lines);
int minEmptyTrailing = options.getMinEmptyTrailing();
int maxEmptyTrailing = options.getMaxEmptyTrailing();
// ensure min number of empty lines
while(nbrEmpty < minEmptyTrailing) {
lines.add("");
nbrEmpty++;
}
// ensure max
while(nbrEmpty > maxEmptyTrailing) {
lines.remove(lines.size() - 1);
nbrEmpty--;
}
}
public List<CharSequence> foldLine(CharSequence s, int width, ICommentFormatterAdvice advice) {
ArrayList<CharSequence> result = Lists.newArrayList();
int originalIndentation = CharSequences.indexOfNonWhitespace(s, 0);
if(originalIndentation == -1) {
result.add(""); // protect against all whitespace (should not happen).
}
else if(width <= 0 || s.length() < width) {
result.add(s);
}
else {
CharSequence template = protectedRegionsTemplate(s, advice);
while(s != null && s.length() > width) {
// chop of first part
// int end = CharSequences.lastIndexOfWhitespace(s, width);
int end = CharSequences.lastIndexOfWhitespace(template, width);
if(end < 0) {
// could not make first part of string comply with width, make it as short as possible
// end = CharSequences.indexOfWhitespace(s, width);
end = CharSequences.indexOfWhitespace(template, width);
}
if(end == -1 || end == s.length() - 1) {
// have to accept all of it
result.add(s);
s = null;
}
else {
CharSequence t = s.subSequence(0, end);
t = CharSequences.trim(t, 0, end);
result.add(t);
// adjust s and template
s = s.subSequence(end, s.length());
template = template.subSequence(end, template.length());
// if just spaces, or empty stuff, do not make it into a line
if(CharSequences.isEmpty(s)) {
s = null;
}
else {
s = CharSequences.trim(s, end, 0);
template = CharSequences.trim(template, end, 0);
// indent the hacked off part to same position as original
// (note that spaces function returns empty sequence if count < 0)
s = CharSequences.concatenate(CharSequences.spaces(originalIndentation), s);
template = CharSequences.concatenate(CharSequences.spaces(originalIndentation), template);
}
}
}
// add the conforming trailing part (if there was one)
if(s != null)
result.add(s);
}
return result;
}
public void foldLines(List<CharSequence> lines, ICommentFormatterAdvice advice, int width) {
for(int i = 0; i < lines.size(); i++) {
CharSequence s = lines.get(i);
if(s.length() > width) {
// shorten if banner, else fold
if(isBanner(s)) {
switch(advice.getBannerAdvice()) {
case Truncate:
lines.set(i, s.subSequence(0, width + 1));
break;
case Fold:
lines.set(i, s.subSequence(0, width + 1));
lines.add(i + 1, s.subSequence(width, s.length()));
break;
case NoWrap:
// keep it
break;
}
}
else {
List<CharSequence> folded = foldLine(s, width, advice);
lines.set(i, folded.get(0));
lines.addAll(i + 1, folded.subList(1, folded.size()));
i += folded.size() - 1; // skip lines that are already folded
}
}
}
}
public TextFlow formatComment(CharSequence s, ICommentContainerInformation in, ICommentContainerInformation out,
CommentFormattingOptions options, IFormattingContext context) {
String lineSeparator = context.getLineSeparatorInformation().getLineSeparator();
return formatComment(separateCommentFromContainer(s, out, lineSeparator), out, options, context);
}
public TextFlow formatComment(CommentText commentText, ICommentContainerInformation out,
CommentFormattingOptions options, IFormattingContext context) {
foldLines(
commentText.lines, options.advice, options.getMaxWidth() - out.getMarkerColumnWidth() - out.getLeftMargin());
TextFlow result = emit(commentText, out, options, context);
return result;
}
/**
* Returns true if the line has length 5 or longer and {@link #isHomogeneous(CharSequence)} and the sequence
* of characters is not a letter or digit. This is intended
* to answer true for lines that can be truncated instead of wrapped when they exceed the width.
* It also enables extending such lines to the max allowed width.
*
* The number 5 is selected since certain comment processors (RDoc is one) use '---' and '+++' and similar instructions
* to indicate processing instructions - and such should never be extended.
*
* @param s
* @return
*/
protected boolean isBanner(CharSequence s) {
return s.length() > 4 && !Character.isLetterOrDigit(s.charAt(0)) && CharSequences.isHomogeneous(s);
}
protected int numberOfTrailingEmptyLines(List<CharSequence> lines) {
int count = 0;
for(int i = lines.size() - 1; i >= 0; i--) {
if(lines.get(i).length() == 0)
count++;
else
break;
}
return count;
}
private CharSequence protectedRegionsTemplate(CharSequence s, ICommentFormatterAdvice advice) {
if(!advice.isDoubleDollarVerbatim())
return s;
boolean inProtectedArea = false;
StringBuilder builder = new StringBuilder(s.length());
for(int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if(inProtectedArea && c != '$')
builder.append("x");
else {
if(c == '$') {
if(inProtectedArea)
inProtectedArea = false;
else {
if(CharSequences.indexOf(s, "$", i + 1) > 0)
inProtectedArea = true;
}
}
builder.append(c);
}
}
return builder;
}
/**
* Separates the comment text from its surrounding container. The result is a sequence of trimmed text lines.
* The text does not contain any of the comment start/repeat/end tokens, and the text is relative to
* the comment's natural margin.
*
* @return {@link CommentText} with trimmed lines and any trailing text after endToken
*/
public CommentText separateCommentFromContainer(CharSequence s, ICommentContainerInformation in,
String lineSeparator) {
// separate the comment between start-end (if any) from any trailing stuff
final String endToken = in.getEndToken();
List<CharSequence> lines = null;
CharSequence trailingText = "";
if(in.isSLStyle()) {
lines = CharSequences.split(s, lineSeparator, false);
trailingText = lineSeparator;
}
else if(endToken.length() > 0) {
// ML with end token, and optional trailing text.
List<CharSequence> bodyAndTrailing = CharSequences.split(s, endToken);
s = bodyAndTrailing.get(0);
trailingText = bodyAndTrailing.get(1);
lines = CharSequences.split(s, lineSeparator, true);
}
else {
// ML without end token included in text, no trailing
lines = CharSequences.split(s, lineSeparator, false);
}
// lineSeparators are removed at this point
// parse and trim first line (removes startToken as well)
lines.set(0, trim(lines.get(0), in.getStartToken(), in));
for(int i = 1; i < lines.size(); i++)
lines.set(i, trim(lines.get(i), in.getRepeatingToken(), in));
return new CommentText(lines, trailingText);
}
/**
* Special trim left that is aware of a) the hanging indent position b) the position of the repeating sequence.
* Will trim to hanging, or to the position after the repeating if everything left of repeating is space.
* <p>
* <b>Example</b><br/>
* Given hanging indent is 2, and '_' denotes space, all of these are trimmed to <code>"a "</code>
* <ul>
* <li><code>"_*a "</code></li>
* <li><code>"__a "</code></li>
* <li><code>"_a "</code></li>
* <li><code>"a "</code></li>
* <li><code>"_______*a " -> "a "</code></li>
* </ul>
* </p>
*
* @param s
* @return
*/
protected CharSequence trim(CharSequence s, String expectedToken, ICommentContainerInformation in) {
if(s.length() < 1)
return s;
int start = 0;
int hangsOn = in.getLeftPosition() + in.getMarkerColumnWidth();
// find first non ws char
int limit = Math.min(hangsOn, s.length());
for(; start < limit; start++) {
if(!Character.isWhitespace(s.charAt(start)))
break;
}
// String repeating = in.getRepeatingToken();
int expectedTokenLength = expectedToken.length();
int repeatIdx = CharSequences.indexOf(s, expectedToken, start);
HAS_REPEAT: if(repeatIdx != -1) {
for(int i = start; i < repeatIdx; i++)
if(!Character.isWhitespace(s.charAt(i)))
break HAS_REPEAT;
// start is first char after repeating
// e.g. 4 in " *_"
start = repeatIdx + expectedTokenLength;
}
limit = s.length();
return CharSequences.trim(s.subSequence(start, limit), in.getLeftMargin(), limit);
}
}