/**
* Copyright (c) 2012 Cloudsmith Inc. and other contributors, as listed below.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Cloudsmith
*
*/
package org.cloudsmith.xtext.serializer;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.cloudsmith.xtext.dommodel.formatter.comments.ICommentConfiguration;
import org.cloudsmith.xtext.dommodel.formatter.comments.ICommentConfiguration.CommentType;
import org.cloudsmith.xtext.textflow.CharSequences;
import org.eclipse.emf.common.util.TreeIterator;
import org.eclipse.emf.ecore.EObject;
import org.eclipse.xtext.Keyword;
import org.eclipse.xtext.grammaranalysis.impl.GrammarElementTitleSwitch;
import org.eclipse.xtext.nodemodel.ICompositeNode;
import org.eclipse.xtext.nodemodel.ILeafNode;
import org.eclipse.xtext.nodemodel.INode;
import org.eclipse.xtext.parsetree.reconstr.impl.NodeIterator;
import org.eclipse.xtext.parsetree.reconstr.impl.TokenUtil;
import org.eclipse.xtext.util.Pair;
import org.eclipse.xtext.util.Triple;
import org.eclipse.xtext.util.Tuples;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.inject.Inject;
import com.google.inject.Provider;
/**
* <p>
* A Comment associator that does the following:
* <ul>
* <li>Scans the given input INode for semantic positions and comment sequences</li>
* <li>Makes lexical default associations based on comment type, position and linebreaks</li>
* <li>Refines the decisions based on association with grammar punctuation.</li>
* <li>Decodes left / right associations, as well as associations to left -1 position (for element before punctuation). Supporting right + 1 is left
* as an exercise.</li>
* <li>Transforms the decisions into a series of {@code Predicate<INode, INode, INode> } that expects to be applied with the nodes 'preceding',
* 'from', 'to' as described by {@link ICommentReconcilement}.</li>
* <li>Produces an {@link ICommentReconcilement} based on the predicates</li>
* </ul>
* </p>
* <p>
* The left/right associations of a comment sequence CS is done with these rules:
* <ul>
* <li>A CS that ends with a SL comment associates to the left, unless the CS only has whitespace on its left.</li>
* <li>A CS that ends with a NL (for grammars where comment terminal contains trailing WS/NL) is handled as a CS ending with SL.</li>
* <li>All other CS associate to the right</li>
* <li>A left association to a <i>list separator</i> associates to the element preceding the separator</li>
* <li>A right association to a <i>right punctuation</i> is changed to a left association</li>
* </ul>
* </p>
* <p>
* The implementation is generic, but makes assumptions about the grammar:
* <ul>
* <li>All comma keywords are interpreted as <i>list separators</i> and cause left association to skip to the preceding element.</li>
* <li>A set of right punctuation keywords <code>,;:)}]</code> cause a right association to be changed to left.</li>
* </ul>
* <p>
* A specialized implementation can easily override these. Customization can also easily be made by overriding the method
* {@link #computeLanguageSpecificAssociations(CommentSemanticSequence)} is which is called last, thus allowing any association already made to be
* changed.
* </p>
*/
public class CommentAssociator {
/**
* Describes either a sequence of comments or a semantic location (EObject and first/last INodes)
*/
public abstract static class CommentAssociationEntry {
abstract boolean isComments();
boolean isSemantic() {
return !isComments();
}
}
/**
* Describes a sequence of comments and their association (left/right, and a possible "skip" of 1 to the left.
* TODO: Can be simplified and made to check allowed invariants as left(0), left(1), right(0) are the only allowed.
* Note that supporting bigger skip counts, or right skip means that the hidden token sequencer needs to maintain a larger
* peep hole.
*/
public static class Comments extends CommentAssociationEntry {
List<INode> commentNodes;
boolean rightAssociation;
boolean firstOnLine;
int skipCount;
public Comments(List<INode> commentNodes) {
this.commentNodes = Lists.newArrayList(commentNodes); // keep a copy
this.rightAssociation = true;
this.skipCount = 0;
}
public List<INode> getComments() {
return commentNodes;
}
public int getSkipCount() {
return skipCount;
}
@Override
public boolean isComments() {
return true;
}
public boolean isEmpty() {
return commentNodes.size() == 0;
}
public boolean isFirstOnLine() {
return firstOnLine;
}
public boolean isLeft() {
return !rightAssociation;
}
public boolean isRight() {
return rightAssociation;
}
public void setDirectionLeft() {
rightAssociation = false;
}
public void setDirectionRight() {
rightAssociation = true;
}
public Comments setFirstOnLine(boolean first) {
firstOnLine = first;
return this;
}
public void setSkipCount(int skipCount) {
this.skipCount = skipCount;
}
}
/**
* Describes a sequence of CommentAssociationEntry, with housekeeping/lookup logic.
*
*/
public static class CommentSemanticSequence implements Iterable<CommentAssociationEntry> {
private List<CommentAssociationEntry> sequence = Lists.newArrayList();
public void addComments(List<INode> comments) {
// empty comment sequences are simply ignored to reduce work later
if(comments.size() < 1)
return;
// a sequence that has a subsequent comment to the left of the preceding comment is broken
// into two sequences
int prevLinePos = posOnLine(comments.get(0));
for(int i = 1; i < comments.size(); i++) {
if(posOnLine(comments.get(i)) < prevLinePos) {
sequence.add(new Comments(comments.subList(0, i)));
sequence.add(new Comments(comments.subList(i, comments.size())).setFirstOnLine(true));
return;
}
}
sequence.add(new Comments(comments));
}
public void addSemantic(EObject semantic, INode first, INode last) {
// avoid duplicate entry (typically occurs at the start as reverse scanning is needed to find a start with
// semantic information - this point may be different than the given start, but is typically the given point).
if(!sequence.isEmpty() && sequence.get(sequence.size() - 1).isSemantic()) {
Semantic lastSemantic = (Semantic) sequence.get(sequence.size() - 1);
if(lastSemantic.getFirstNode() == first && lastSemantic.getSemantic() == semantic)
return;
}
sequence.add(new Semantic(semantic, first, last));
}
public List<CommentAssociationEntry> getEntryList() {
return Collections.unmodifiableList(sequence);
}
public Semantic getFollowingSemantic(int commentPos, int skipCount) {
for(int i = commentPos + 1; i < sequence.size(); i++) {
if(sequence.get(i).isComments())
continue;
if(skipCount == 0)
return (Semantic) sequence.get(i);
skipCount--;
}
return null;
}
public Semantic getPrecedingSemantic(int commentPos, int skipCount) {
for(int i = commentPos - 1; i >= 0; i--) {
if(sequence.get(i).isComments())
continue;
if(skipCount == 0)
return (Semantic) sequence.get(i);
skipCount--;
}
return null;
}
@Override
public Iterator<CommentAssociationEntry> iterator() {
return Iterators.unmodifiableIterator(sequence.iterator());
}
}
/**
* Describes a "semantic position" based on a semantic object (a "token owner") and
* the two leaf nodes "first" / "last" (identical for leaf positions).
*
*/
public static class Semantic extends CommentAssociationEntry {
private EObject semantic;
private INode first;
private INode last;
public Semantic(EObject semantic, INode first, INode last) {
this.semantic = semantic;
this.first = first;
this.last = last;
}
public INode getFirstNode() {
return first;
}
public INode getLastNode() {
return last;
}
public EObject getSemantic() {
return semantic;
}
@Override
public boolean isComments() {
return false;
}
}
/**
* A useful method for debugging - detail printer.
*
* @param sequence
*/
public static void dumpCommentAssociations(CommentSemanticSequence sequence) {
StringBuilder builder = new StringBuilder();
for(CommentAssociationEntry e : sequence) {
if(e.isComments()) {
Comments c = (Comments) e;
builder.append("Comments(").append(c.isRight()
? "->"
: "<-").append(" ").append(c.getSkipCount());
if(c.isFirstOnLine())
builder.append(" <fol>");
builder.append("): ");
for(INode n : ((Comments) e).getComments())
builder.append(n.getText().replace("\n", "\\n")).append(",");
builder.append("\n");
}
else {
Semantic semantic = (Semantic) e;
builder.append("Semantic: ");
builder.append(semantic.getSemantic().getClass().getSimpleName()).append(", ");
builder.append(new GrammarElementTitleSwitch().doSwitch(semantic.getFirstNode().getGrammarElement()));
builder.append(", ");
builder.append(new GrammarElementTitleSwitch().doSwitch(semantic.getLastNode().getGrammarElement()));
builder.append("\n");
}
}
System.out.print(builder.toString());
}
protected static boolean isFirstOnLine(INode n) {
if(n == null)
throw new IllegalArgumentException("given node is null");
String s = n.getRootNode().getText();
int offsetOfNode = n.getTotalOffset();
int offsetOfLastNL = Math.max(0, 1 + CharSequences.lastIndexOf(s, "\n", offsetOfNode - 1));
return CharSequences.indexOfNonWhitespace(s, offsetOfLastNL) == offsetOfNode;
}
protected static int posOnLine(INode n) {
if(n == null)
throw new IllegalArgumentException("given node is null");
String s = n.getRootNode().getText();
int offsetOfNode = n.getTotalOffset();
int offsetOfLastNL = Math.max(0, 1 + CharSequences.lastIndexOf(s, "\n", offsetOfNode - 1));
return offsetOfNode - offsetOfLastNL;
}
protected TokenUtil tokenUtil;
protected ICommentConfiguration<CommentType> commentConfiguration;
/**
* An instance of CommentAssociator uses TokenUtil to answer questions about nodes/tokens, and
* needs an ICommentConfiguration to get more detailed information about comments.
*
* @param tokenUtil
* @param commentConfigurationProvider
*/
@Inject
public CommentAssociator(TokenUtil tokenUtil,
Provider<ICommentConfiguration<CommentType>> commentConfigurationProvider) {
this.tokenUtil = tokenUtil;
this.commentConfiguration = commentConfigurationProvider.get();
}
/**
* Associates comments in the given {@link ICompositeNode} with positions in a serialization sequence.
*
* @param node
* the node for which a reconcilement is wanted
* @return an ICommentReconcilement that can be used to reconcile comments in a serialization sequence.
*/
public ICommentReconcilement associateComments(ICompositeNode node) {
CommentSemanticSequence sequence = createSequence(node, commentConfiguration);
// perform a default lexical left/right association of each comment sequence
computeLeftAssociations(sequence);
// perform semantic modifications (association to punctuation)
computePunctuationAssociations(sequence);
computeLanguageSpecificAssociations(sequence);
// dumpCommentAssociations(sequence);
// encode as ICommentReconcilement
final List<Pair<Predicate<Triple<INode, INode, INode>>, List<INode>>> predicates = toPredicates(sequence);
final Map<INode, Comments> commentNodes = Maps.newHashMap();
for(CommentAssociationEntry e : sequence.getEntryList())
if(e.isComments())
for(INode n : ((Comments) e).getComments())
commentNodes.put(n, (Comments) e);
// commentNodes.addAll(((Comments) e).getComments());
return new ICommentReconcilement() {
@Override
public List<INode> commentNodesFor(INode preceding, INode last, INode current) {
Triple<INode, INode, INode> input = Tuples.create(preceding, last, current);
List<INode> result = Lists.newArrayList();
for(Pair<Predicate<Triple<INode, INode, INode>>, List<INode>> pair : predicates)
if(pair.getFirst().apply(input))
result.addAll(pair.getSecond());
return result;
}
@Override
public String getWhitespaceBetween(INode prevCommentNode, INode node) {
Comments c = commentNodes.get(node);
if(prevCommentNode == null && c != null && c.isFirstOnLine())
return "\n";
return "";
}
@Override
public boolean isReconciledCommentNode(INode node) {
return commentNodes.containsKey(node);
}
};
}
/**
* This implementation does nothing. It is intended that a language specific implementation
* further refines the result. This call is given a sequence that already has been processed for lexical
* association and punctuation.
*
* @param sequence
*/
protected void computeLanguageSpecificAssociations(CommentSemanticSequence sequence) {
// does nothing
}
/**
* Comments are by default associated with what follows (right association). This method finds and assigns Left association to
* those comment sequences that are last on line and not also first on line.
*
* @param sequence
* - the sequence to modify
*/
protected void computeLeftAssociations(CommentSemanticSequence sequence) {
for(CommentAssociationEntry e : sequence) {
if(e.isComments()) {
Comments c = (Comments) e;
List<INode> commentNodes = c.getComments();
if(commentNodes.size() < 1)
continue;
INode firstNode = commentNodes.get(0);
if(!isFirstOnLine(firstNode)) {
INode lastNode = commentNodes.get(commentNodes.size() - 1);
// SL comments by definition end the line (even if they lexically do not contain a NL in some grammar).
// For other types of comments, they may end with NL, and should then be treated the same way.
if(commentConfiguration.classify(lastNode) == CommentType.SingleLine ||
lastNode.getText().endsWith("\n"))
c.setDirectionLeft();
}
}
}
}
/**
* <p>
* This implementation associates comment sequences that are left associative with the +1 preceding semantic element if the immediately preceding
* semantic element is a <i>list separator</i> (see {@link #isListSeparator(Semantic)}, and modifies a right associative sequence to left, if the
* immediately following semantic element is a right punctuation as determined by {@link #isRightPunctuation(Semantic)}.
* </p>
* <p>
* This means that given:
*
* <pre>
* foo(a, b /* 1 */)
* </pre>
*
* will get an association between the comment 1 and 'b' (as opposed to 1 and the closing ')').
* </p>
*
* @param sequence
*/
protected void computePunctuationAssociations(CommentSemanticSequence sequence) {
List<CommentAssociationEntry> entries = sequence.getEntryList();
for(int i = 0; i < entries.size(); i++) {
CommentAssociationEntry e = entries.get(i);
if(e.isComments()) {
Comments c = (Comments) e;
if(c.isLeft()) {
if(isListSeparator(sequence.getPrecedingSemantic(i, 0)))
c.setSkipCount(1); // skip the list separator
}
else {
// a right association to punctuation is changed to left
if(isRightPunctuation(sequence.getFollowingSemantic(i, 0)))
c.setDirectionLeft();
}
}
}
}
/**
* Creates a CommentSemanticSequence for the root node. This sequence is a flattened list of
* semantic positions and comment sequences. All comment sequences are associated with what follows (by default).
*/
protected CommentSemanticSequence createSequence(ICompositeNode rootNode,
ICommentConfiguration<CommentType> commentConfiguration) {
CommentSemanticSequence sequence = new CommentSemanticSequence();
List<INode> currentComments = Lists.newArrayList();
NodeIterator nodeIterator = new NodeIterator(rootNode);
// rewind to previous token with token owner
while(nodeIterator.hasPrevious()) {
INode node = nodeIterator.previous();
if(tokenUtil.isToken(node)) {
EObject prevEObject = tokenUtil.getTokenOwner(node);
if(prevEObject != null) {
// starting location
sequence.addSemantic(prevEObject, node, getLastLeaf(node));
break;
}
}
}
INode node = null;
while(nodeIterator.hasNext()) {
node = nodeIterator.next();
// collect comments...
if(tokenUtil.isCommentNode(node)) {
currentComments.add(node);
continue;
}
// skip uninteresting...
if(!tokenUtil.isToken(node))
continue;
// looking at something possibly containing leading comments
ILeafNode nonHidden = null;
for(ILeafNode leaf : node.getLeafNodes()) {
if(!leaf.isHidden()) {
nonHidden = leaf;
break;
}
else if(tokenUtil.isCommentNode(leaf)) {
currentComments.add(leaf);
}
// else it is whitespace... which is ignored
}
// no need to search inside node, since its leading comments and first non hidden are now known.
nodeIterator.prune();
// add comment record
sequence.addComments(currentComments);
currentComments.clear();
// add the comments sequence breaking location
sequence.addSemantic(tokenUtil.getTokenOwner(node), nonHidden, getLastLeaf(node));
if(node.getOffset() > rootNode.getOffset() + rootNode.getLength()) {
// found next EObject outside rootNode
break;
}
}
// deal with trailing comments
if(!currentComments.isEmpty()) {
sequence.addComments(currentComments);
EObject last = tokenUtil.getTokenOwner(node);
if(last == null)
last = getEObjectForRemainingComments(rootNode);
sequence.addSemantic(last, node, getLastLeaf(node));
}
return sequence;
}
private EObject getEObjectForRemainingComments(ICompositeNode rootNode) {
TreeIterator<INode> i = rootNode.getAsTreeIterable().iterator();
while(i.hasNext()) {
INode o = i.next();
if(o.hasDirectSemanticElement())
return o.getSemanticElement();
}
return null;
}
private INode getFirstLeaf(INode node) {
for(ILeafNode leaf : node.getLeafNodes()) {
if(!leaf.isHidden()) {
return leaf;
}
}
return node;
}
private INode getLastLeaf(INode node) {
while(node instanceof ICompositeNode)
node = ((ICompositeNode) node).getLastChild();
return node;
}
/**
* Produces a predicate for reconciliation of left associated comment sequence.
*
* @param semantic
* @return
*/
protected Predicate<Triple<INode, INode, INode>> getPredicateLeft(final Semantic semantic) {
return new Predicate<Triple<INode, INode, INode>>() {
public boolean apply(Triple<INode, INode, INode> o) {
INode from = o.getSecond();
// to (third), and preceding (first) are ignored
if(tokenUtil.getTokenOwner(from) != semantic.getSemantic())
return false;
if(from.getGrammarElement() != semantic.getLastNode().getGrammarElement())
return false;
return true;
}
};
}
/**
* Produces a predicate for reconciliation of left-skip-one associated comment sequence.
*
* @param semantic
* @return
*/
protected Predicate<Triple<INode, INode, INode>> getPredicateLeftSkipOne(final Semantic semantic,
final Semantic punctuation) {
// This if for: <semantic> (preceding/first), <punctuation> (from/second), <comment> <unknown> (to/third)
return new Predicate<Triple<INode, INode, INode>>() {
public boolean apply(Triple<INode, INode, INode> o) {
INode preceding = o.getFirst();
INode from = o.getSecond();
Semantic sem = semantic;
// 'to' (third) is ignored
if(tokenUtil.getTokenOwner(preceding) != semantic.getSemantic())
return false;
if(preceding.getGrammarElement() != sem.getLastNode().getGrammarElement())
return false;
if(!isListSeparator(from))
return false;
return true;
}
};
}
/**
* Produces a predicate for reconciliation of right associated comment sequence.
*
* @param semantic
* @return
*/
protected Predicate<Triple<INode, INode, INode>> getPredicateRight(final Semantic semantic) {
return new Predicate<Triple<INode, INode, INode>>() {
public boolean apply(Triple<INode, INode, INode> o) {
INode to = o.getThird();
Semantic sem = semantic;
// from (second), and preceding (first) are ignored
if(tokenUtil.getTokenOwner(to) != sem.getSemantic())
return false;
if(getFirstLeaf(to).getGrammarElement() != sem.getFirstNode().getGrammarElement())
return false;
return true;
}
};
}
/**
* This implementation returns true for a Keyword being a ','. A specialized implementation may do more elaborate checks.
*
* @param semantic
* @return
*/
protected boolean isListSeparator(INode node) {
if(node == null)
return false;
EObject ge = node.getGrammarElement();
if(ge == null)
return false;
return ge instanceof Keyword && ",".equals(node.getText());
}
/**
* This implementation returns true for a Keyword being a ','. A specialized implementation may do more elaborate checks.
*
* @param semantic
* @return
*/
protected boolean isListSeparator(Semantic semantic) {
if(semantic == null)
return false;
return isListSeparator(semantic.getFirstNode()); // first and last are the same in the case where this is true
}
/**
* This implementation returns true for a Keyword being one of the characters ',' ';' '}' ']' ')'. A specialized
* implementation may do mor elaborate checks.
*
* @param semantic
* @return
*/
protected boolean isRightPunctuation(Semantic semantic) {
if(semantic == null)
return false;
INode node = semantic.getFirstNode(); // first and last are the same when the rest is true
if(node == null)
return false;
EObject ge = node.getGrammarElement();
if(ge == null)
return false;
return ge instanceof Keyword && ",;}])".contains(node.getText());
}
protected List<Pair<Predicate<Triple<INode, INode, INode>>, List<INode>>> toPredicates(
CommentSemanticSequence sequence) {
List<CommentAssociationEntry> entries = sequence.getEntryList();
List<Pair<Predicate<Triple<INode, INode, INode>>, List<INode>>> result = Lists.newArrayList();
for(int i = 0; i < entries.size(); i++) {
CommentAssociationEntry e = entries.get(i);
if(e.isComments()) {
Comments c = (Comments) e;
if(c.isRight()) {
if(c.getSkipCount() > 0)
throw new UnsupportedOperationException("right associated comment and skip count not supported");
result.add(Tuples.create(getPredicateRight(sequence.getFollowingSemantic(i, 0)), c.getComments()));
}
else {
if(c.getSkipCount() == 0)
result.add(Tuples.create(getPredicateLeft(sequence.getPrecedingSemantic(i, 0)), c.getComments()));
else if(c.getSkipCount() == 1)
result.add(Tuples.create(
getPredicateLeftSkipOne(
sequence.getPrecedingSemantic(i, 1), sequence.getPrecedingSemantic(i, 0)),
c.getComments()));
else
throw new UnsupportedOperationException(
"left associated comment and skip count > 1 not supported");
}
}
}
return result;
}
}