package com.aptana.rdt.internal.parser.warnings;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.Platform;
import org.jruby.ast.Node;
import org.rubypeople.rdt.core.IRubyProject;
import org.rubypeople.rdt.core.compiler.BuildContext;
import org.rubypeople.rdt.core.compiler.CategorizedProblem;
import org.rubypeople.rdt.core.compiler.CompilationParticipant;
import org.rubypeople.rdt.internal.core.parser.InOrderVisitor;
import org.rubypeople.rdt.internal.core.parser.Warning;
import com.aptana.rdt.AptanaRDTPlugin;
import com.aptana.rdt.IProblem;
/**
* Traverse ASTs. If they have a large enough mass (# of children), it generates as hash based on the subtree and
* attaches the node object to the hash.
*
* @author cwilliams
*/
public class FlayClone extends CompilationParticipant
{
private static final int DEFAULT_THRESHOLD = 20;
private int massThreshold = DEFAULT_THRESHOLD;
private HashMap<Integer, Set<Node>> hashes;
private boolean doFuzzy; // TODO Get from some user pref
private int total = 0; // total score (lower is better)
private HashMap<Integer, Boolean> identical;
private HashMap<Integer, Integer> masses;
@Override
public void buildStarting(BuildContext[] files, boolean isBatch, IProgressMonitor monitor)
{
if (!isBatch)
return;
if (files == null || files.length == 0)
return;
hashes = new HashMap<Integer, Set<Node>>();
identical = new HashMap<Integer, Boolean>();
masses = new HashMap<Integer, Integer>();
massThreshold = getMassThreshold();
for (BuildContext buildContext : files)
{
if (buildContext == null || buildContext.getAST() == null)
continue;
buildContext.getAST().accept(new Visitor());
}
if (doFuzzy)
{
processFuzzySimilarities();
}
analyze(files);
}
private int getMassThreshold()
{
return Platform.getPreferencesService().getInt(AptanaRDTPlugin.PLUGIN_ID,
AptanaRDTPlugin.DUPLICATE_CODE_MASS_THRESHOLD, DEFAULT_THRESHOLD, null);
}
private void analyze(BuildContext[] files)
{
prune();
for (Map.Entry<Integer, Set<Node>> entry : hashes.entrySet())
{
Integer hash = entry.getKey();
Collection<Node> nodes = entry.getValue();
Node first = nodes.iterator().next();
boolean isIdentical = true;
for (Node node : nodes)
{
if (!equals(node, first)) // FIXME Have to check values of things (like names of vars/types/methods,
// values of literals, etc)
{
isIdentical = false;
break;
}
}
identical.put(hash, isIdentical);
int mass = mass(first) * nodes.size();
if (isIdentical)
mass *= nodes.size();
masses.put(hash, mass);
total += masses.get(hash);
}
// For any masses above a given threshold loop over masses hashmap, and generate warnings for the nodes
// related!
Map<BuildContext, List<CategorizedProblem>> contextsToProblems = new HashMap<BuildContext, List<CategorizedProblem>>();
for (Map.Entry<Integer, Integer> entry : masses.entrySet())
{
if (entry.getValue() <= massThreshold) // FIXME Should probably be a different #!
continue;
Set<Node> nodes = hashes.get(entry.getKey());
for (Node node : nodes)
{
CategorizedProblem problem = new Warning(node.getPosition(), "Identical code structure with: "
+ otherNodesPositions(node, nodes), IProblem.DuplicateCodeStructure);
BuildContext context = findContext(files, node);
if (context == null)
continue;
List<CategorizedProblem> problems = contextsToProblems.get(context);
if (problems == null)
problems = new ArrayList<CategorizedProblem>();
problems.add(problem);
contextsToProblems.put(context, problems);
}
}
for (Map.Entry<BuildContext, List<CategorizedProblem>> entry : contextsToProblems.entrySet())
{
entry.getKey().recordNewProblems(entry.getValue().toArray(new CategorizedProblem[0]));
}
}
private String otherNodesPositions(Node node, Collection<Node> nodes)
{
StringBuilder builder = new StringBuilder();
for (Node node2 : nodes)
{
if (node2.getPosition().equals(node.getPosition()))
continue;
builder.append(node2.getPosition().toString()).append(", ");
}
if (builder.length() > 0)
{
builder.delete(builder.length() - 2, builder.length());
}
else
{
System.out.println("WTF?!");
}
return builder.toString();
}
private BuildContext findContext(BuildContext[] files, Node node)
{
String fileName = node.getPosition().getFile();
for (BuildContext buildContext : files)
{
IPath path = buildContext.getFile().getFullPath();
if (path.toPortableString().equals(fileName))
return buildContext;
}
return null;
}
private boolean equals(Node node, Node first)
{
return generateSexp(node).equals(generateSexp(first));
}
private void prune()
{
/*
* prune trees that aren't duped at all, or are too small
*/
List<Integer> toRemove = new ArrayList<Integer>();
for (Map.Entry<Integer, Set<Node>> entry : hashes.entrySet())
{
if (entry.getValue().size() == 1)
toRemove.add(entry.getKey());
}
for (Integer integer : toRemove)
{
hashes.remove(integer);
}
toRemove.clear();
// Prune all subhashes so we show largest match
Map<Integer, Set<Node>> hashesCopy = new HashMap<Integer, Set<Node>>(hashes);
for (Map.Entry<Integer, Set<Node>> entry : hashesCopy.entrySet())
{
if (toRemove.contains(entry.getKey()))
continue;
for (Node node : entry.getValue())
{
for (Integer h : allSubHashes(node))
{
toRemove.add(h); // So we can shortcut our iteration of copy of hashes
hashes.remove(h);
}
}
}
}
private Collection<Integer> allSubHashes(final Node node)
{
final Set<Integer> subHashes = new HashSet<Integer>();
InOrderVisitor visitor = new InOrderVisitor()
{
@Override
protected Object handleNode(Node visited)
{
if (!visited.equals(node))
subHashes.add(fuzzyHash(visited));
return super.handleNode(visited);
}
};
node.accept(visitor);
return subHashes;
}
private void processFuzzySimilarities()
{
// TODO Auto-generated method stub
}
/**
* Number of children who have children.
*
* @param node
* @return
*/
private int mass(Node node)
{
final int[] size = new int[] { 0 };
node.accept(new InOrderVisitor()
{
@Override
protected Object handleNode(Node visited)
{
if (visited != null)
size[0] += 1;
return super.handleNode(visited);
}
});
return size[0];
}
private int fuzzyHash(Node node)
{
return generateSexp(node).hashCode();
}
private String generateSexp(Node node)
{
final StringBuilder builder = new StringBuilder();
node.accept(new InOrderVisitor()
{
@Override
public Object acceptNode(Node node)
{
builder.append("[");
if (node != null)
{
builder.append(node.getClass().getSimpleName());
}
Object ret = super.acceptNode(node);
builder.append("]");
return ret;
}
});
return builder.toString();
}
@Override
public boolean isActive(IRubyProject project)
{
// TODO Allow changing this per-project?
return Platform.getPreferencesService().getBoolean(AptanaRDTPlugin.PLUGIN_ID,
AptanaRDTPlugin.DUPLICATE_CODE_CHECK_ENABLED, true, null);
}
private class Visitor extends InOrderVisitor
{
@Override
protected Object handleNode(Node visited)
{
if (mass(visited) >= massThreshold)
{
Integer hash = fuzzyHash(visited);
Set<Node> nodes = hashes.get(hash);
if (nodes == null)
nodes = new HashSet<Node>();
// Avoid adding duplicates by checking that positions are different
for (Node node : nodes)
{
if (node.getPosition().equals(visited.getPosition()))
{
return super.handleNode(visited);
}
}
nodes.add(visited);
hashes.put(hash, nodes);
}
return super.handleNode(visited);
}
}
}