package ch.unibe.scg.cc; import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import javax.inject.Inject; import ch.unibe.scg.cc.Protos.Clone; import ch.unibe.scg.cc.Protos.CloneOrBuilder; import ch.unibe.scg.cc.Protos.Snippet; import ch.unibe.scg.cc.Protos.Snippet.Builder; import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; /** * Given a list of matches, extracts all fully expanded clones. * * <p> * Popular snippets are treated special. We enforce earlier in the pipeline that * all functions that contain popular snippets also appear at least once in a * match's {@link Clone#getThisSnippet()} as an input to * {@link #expandClones(Iterable)}. */ class CloneExpander implements Serializable { // TODO: Needs work to load maps on deserialization. ByteBuffer is not serializable. Neither is snippet. private static final long serialVersionUID = 1L; private static final int MAX_GAP = 10; // TODO: this should not be a constant here. // Instead, look at the snippets, they should contain their length. /** Maps from functions hashes to all of their popular snippet, and from snippets to functions. */ private final PopularSnippetMaps popularSnippetMaps; private final Comparator<Clone> cloneComparator = new CloneComparator(); @Inject CloneExpander(PopularSnippetMaps popularSnippetMaps) { this.popularSnippetMaps = popularSnippetMaps; } static class CloneComparator implements Comparator<Clone>, Serializable { private static final long serialVersionUID = 1L; @Override public int compare(Clone o1, Clone o2) { return ComparisonChain .start() .compare( o1.getThatSnippet().getFunction().asReadOnlyByteBuffer(), o2.getThatSnippet().getFunction().asReadOnlyByteBuffer()) .compare( o1.getThisSnippet().getPosition(), o2.getThisSnippet().getPosition()) .result(); } } /** * Stitch together the matches into Clones. * * @param matches * sorted first by * {@code Clone.getThatSnippet().getFunction()}, * second by * {@code Clone.getThisSnippet().getPosition()}. * {@code Clone.getThisSnippet.getFunction()} must * be constant for the entire list. * @return The matches, stitched together. */ Collection<Clone> expandClones(Iterable<Clone> matches) { matches = ImmutableList.copyOf(matches); // Give us a chance to walk thru twice. if (Iterables.isEmpty(matches)) { return Collections.emptyList(); } Clone first = matches.iterator().next(); for (Clone match : matches) { assert match.getThisSnippet().getFunction().equals(first.getThisSnippet().getFunction()); } final ImmutableList.Builder<Clone> clones = ImmutableList.builder(); final LinkedList<Clone> unprocessedMatches = Lists.newLinkedList(matches); weaveInPopularSnippets(unprocessedMatches); while (!unprocessedMatches.isEmpty()) { final Iterator<Clone> iter = unprocessedMatches.iterator(); Clone last = iter.next(); iter.remove(); Clone.Builder clone = initializeClone(last); while (iter.hasNext()) { final Clone cur = iter.next(); if (!cur.getThatSnippet().getFunction().equals(last.getThatSnippet().getFunction())) { clones.add(finalizeClone(clone)); iter.remove(); clone = initializeClone(cur); last = cur; continue; } if (Math.abs(last.getThisSnippet().getPosition() - cur.getThisSnippet().getPosition()) <= MAX_GAP) { if (Math.abs(last.getThatSnippet().getPosition() - cur.getThatSnippet().getPosition()) <= MAX_GAP) { iter.remove(); // Note that clones are sorted by thisLength, not thatLength. clone.getThisSnippetBuilder().setLength(cur.getThisSnippet().getPosition() - clone.getThisSnippetBuilder().getPosition() + 1); // Move position. Builder thatBuilder = clone.getThatSnippetBuilder(); thatBuilder.setPosition(Math.min(thatBuilder.getPosition(), cur.getThatSnippet().getPosition())); thatBuilder.setLength(Math.max(thatBuilder.getLength(), cur.getThatSnippet().getPosition() - thatBuilder.getPosition() + 1)); } } else { break; } last = cur; } clones.add(finalizeClone(clone)); } return clones.build(); } /** The unprocessed matches still lack the popular rows. Weave them in here. */ private void weaveInPopularSnippets(LinkedList<Clone> unprocessedMatches) { if (unprocessedMatches.isEmpty()) { return; } final ByteBuffer thisFunction = unprocessedMatches.get(0).getThisSnippet().getFunction() .asReadOnlyByteBuffer(); if (!popularSnippetMaps.getFunction2PopularSnippets().containsKey(thisFunction)) { // Nothing to weave in. return; } List<Clone> toBeWeavedIns = new ArrayList<>(); for (Snippet thisLocation : popularSnippetMaps.getFunction2PopularSnippets().get(thisFunction)) { assert popularSnippetMaps.getSnippet2PopularSnippets().containsKey(thisLocation.getHash().asReadOnlyByteBuffer()); for (Snippet thatLocation : popularSnippetMaps.getSnippet2PopularSnippets().get(thisLocation.getHash() .asReadOnlyByteBuffer())) { // The following three lines *must* match the test in // MakeFunction2RoughClones.java // The idea is that only clone a to b should be detected, not b // to a. if (thisFunction.compareTo(thatLocation.getFunction().asReadOnlyByteBuffer()) >= 0) { continue; } toBeWeavedIns.add(Clone.newBuilder().setThisSnippet(thisLocation) .setThatSnippet(thatLocation).build()); } } Collections.sort(toBeWeavedIns, cloneComparator); final ListIterator<Clone> target = unprocessedMatches.listIterator(); Clone cur = target.next(); for (Clone toBeWeavedIn : toBeWeavedIns) { // Forward until we leave the right function or position is too big. while (target.hasNext() && cloneComparator.compare(toBeWeavedIn, cur) > 0) { cur = target.next(); } // We're one step too far now, so walk back left, unless the // insertion point is the last element if (cloneComparator.compare(toBeWeavedIn, cur) <= 0) { target.previous(); } target.add(toBeWeavedIn); } } private Clone finalizeClone(final Clone.Builder clone) { clone.getThisSnippetBuilder().setLength(clone.getThisSnippet().getLength() + Populator.MINIMUM_LINES - 1); clone.getThatSnippetBuilder().setLength(clone.getThatSnippet().getLength() + Populator.MINIMUM_LINES - 1); return clone.build(); } private Clone.Builder initializeClone(CloneOrBuilder firstMatch) { final Clone.Builder clone = Clone.newBuilder(); Builder thisBuilder = clone.getThisSnippetBuilder(); Builder thatBuilder = clone.getThatSnippetBuilder(); thisBuilder.setFunction(firstMatch.getThisSnippet().getFunction()); thatBuilder.setFunction(firstMatch.getThatSnippet().getFunction()); thisBuilder.setPosition(firstMatch.getThisSnippet().getPosition()); thatBuilder.setPosition(firstMatch.getThatSnippet().getPosition()); thisBuilder.setLength(1); thatBuilder.setLength(1); return clone; } }