package ch.unibe.scg.cc;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.junit.Test;
import ch.unibe.scg.cc.Protos.Clone;
import ch.unibe.scg.cc.Protos.Snippet;
import ch.unibe.scg.cc.javaFrontend.JavaType1ReplacerFactory;
import ch.unibe.scg.cc.lines.StringOfLines;
import ch.unibe.scg.cc.lines.StringOfLinesFactory;
import ch.unibe.scg.cells.InMemoryStorage;
import ch.unibe.scg.cells.LocalCounterModule;
import ch.unibe.scg.cells.LocalExecutionModule;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMultimap;
import com.google.inject.Guice;
import com.google.protobuf.ByteString;
// @formatter:off
/**
* Test {@link CloneExpander}.
* @author Niko Schwarz
*/
@SuppressWarnings("javadoc")
public final class SnippetSimilarTest {
final Comparator<Clone> cloneComparator = new CloneExpander.CloneComparator();
final Comparator<Snippet> snippetLocationComparator = new SnippetLocationComparator();
static class SnippetLocationComparator implements Comparator<Snippet>, Serializable {
private static final long serialVersionUID = 1L;
@Override public int compare(Snippet o1, Snippet o2) {
return o1.getHash().asReadOnlyByteBuffer().compareTo(
o2.getHash().asReadOnlyByteBuffer());
}
}
@Test
public void testAreSimilar() throws CannotBeHashedException {
// Not fully refactored, so we easily add println statements
// to make an example for the paper.
final ReplacerNormalizer n1 = new ReplacerNormalizer(new JavaType1ReplacerFactory().get());
final ReplacerNormalizer n2 = new ReplacerNormalizer(new Type2ReplacerFactory().get());
final ShingleHasher ss = Guice.createInjector(new CCModule(new InMemoryStorage(), new LocalCounterModule()),
new LocalExecutionModule()).getInstance(ShingleHasher.class);
final StringBuilder s1 = snippet1();
final StringBuilder s2 = snippet2();
final StringBuilder s3 = snippet3();
// type-1 normalization
n1.normalize(s1);
n1.normalize(s2);
n1.normalize(s3);
// type-2 normalization
n2.normalize(s1);
n2.normalize(s2);
n2.normalize(s3);
final StringOfLinesFactory solFactory = StringOfLinesFactory.getInstance();
List<List<Snippet>> table = ImmutableList.of(
allSnippets(solFactory.make(s1.toString(), '\n'), ss, new byte[] { 1 }),
allSnippets(solFactory.make(s2.toString(), '\n'), ss, new byte[] { 2 }),
allSnippets(solFactory.make(s3.toString(), '\n'), ss, new byte[] { 3 }));
table = filterCollisions(table);
final List<List<Clone>> matches = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
matches.add(new ArrayList<Clone>());
// only look at j > i to avoid outputting the same match twice.
for (int j = i + 1; j < table.size(); j++) {
for (final Snippet e : table.get(i)) {
final int pos = Collections.binarySearch(table.get(j), e, snippetLocationComparator);
if (pos >= 0) {
matches.get(i).add(Clone.newBuilder()
.setThisSnippet(e)
.setThatSnippet(table.get(j).get(pos))
.build());
}
}
}
Collections.sort(matches.get(matches.size() - 1), cloneComparator);
}
assertThat(matches.size(), is(3));
assertThat(printString(matches.get(0)),
is("(1-2|5$bb3e@3, 1-2|13$20a4@12, 1-2|14$6f45@13, 1-3|13$20a4@9, 1-3|14$6f45@10, )"));
assertThat(printString(matches.get(1)), is("(2-3|0$9721@0, 2-3|7$a037@4, 2-3|8$fc06@5, " +
"2-3|9$03d8@6, 2-3|10$4fe1@8, 2-3|11$4fe1@8, 2-3|12$20a4@9, 2-3|13$6f45@10, " +
"2-3|14$bfea@11, 2-3|15$abb0@12, 2-3|16$d0af@13, 2-3|17$c751@14, )"));
assertThat(printString(matches.get(2)), is("()"));
// Extract matches
final CloneExpander expanderWithoutLongRows = new CloneExpander(
new PopularSnippetMaps(null) {
final private static long serialVersionUID = 1L;
@Override public ImmutableMultimap<ByteBuffer, Snippet> getFunction2PopularSnippets() {
return ImmutableMultimap.of();
}
@Override public ImmutableMultimap<ByteBuffer, Snippet> getSnippet2PopularSnippets() {
return ImmutableMultimap.of();
}
});
Collection<Clone> builtClones = expanderWithoutLongRows.expandClones(matches.get(0));
assertThat(builtClones.toString(),
is("[thisSnippet {\n function: \"\\001\"\n position: 5\n length: 14\n}\n" +
"thatSnippet {\n function: \"\\002\"\n position: 3\n length: 15\n}\n, " +
"thisSnippet {\n function: \"\\001\"\n position: 13\n length: 6\n}\n" +
"thatSnippet {\n function: \"\\003\"\n position: 9\n length: 6\n}\n]"));
builtClones = expanderWithoutLongRows.expandClones(matches.get(1));
assertThat(builtClones.toString(),
is("[thisSnippet {\n function: \"\\002\"\n position: 0\n length: 22\n}\n" +
"thatSnippet {\n function: \"\\003\"\n position: 0\n length: 19\n}\n]"));
builtClones = expanderWithoutLongRows.expandClones(matches.get(2));
assertThat(builtClones.toString(), is("[]"));
}
/** Filter to keep only snippets that collide. */
private List<List<Snippet>> filterCollisions(List<List<Snippet>> table) {
final List<Snippet> filtering = new ArrayList<>();
for (final List<Snippet> subTable : table) {
filtering.addAll(subTable);
}
Collections.sort(filtering, new Comparator<Snippet>() {
@Override public int compare(Snippet o1, Snippet o2) {
return o1.getHash().asReadOnlyByteBuffer().compareTo(
o2.getHash().asReadOnlyByteBuffer());
}});
table = ImmutableList.<List<Snippet>>of(
new ArrayList<Snippet>(), new ArrayList<Snippet>(), new ArrayList<Snippet>());
Snippet last = filtering.get(0);
Snippet next = filtering.get(1);
if (last.getHash().asReadOnlyByteBuffer().equals(next.getHash().asReadOnlyByteBuffer())) {
table.get(last.getFunction().asReadOnlyByteBuffer().get(0) - 1).add(last);
}
for (int i = 1; i < filtering.size() - 1; i++) {
final Snippet cur = next;
next = filtering.get(i + 1);
if (cur.getHash().asReadOnlyByteBuffer().equals(last.getHash().asReadOnlyByteBuffer())
|| cur.getHash().asReadOnlyByteBuffer().equals(next.getHash().asReadOnlyByteBuffer())) {
table.get(cur.getFunction().asReadOnlyByteBuffer().get(0) - 1).add(cur);
}
last = cur;
}
final Snippet cur = filtering.get(filtering.size() - 1);
if (cur.getHash().asReadOnlyByteBuffer().equals(last.getHash().asReadOnlyByteBuffer())) {
table.get(cur.getFunction().asReadOnlyByteBuffer().get(0) - 1).add(cur);
}
assertThat(filtering.size(), is(51));
assertThat(table.get(0).size() + table.get(1).size() + table.get(2).size(), is(32));
for (final List<Snippet> subTable : table) {
Collections.sort(subTable, snippetLocationComparator);
}
return table;
}
List<Snippet> allSnippets(StringOfLines sol, Hasher hasher, byte[] function)
throws CannotBeHashedException {
final List<Snippet> ret = new ArrayList<>();
for (int frameStart = 0; frameStart + Populator.MINIMUM_LINES
< sol.getNumberOfLines(); frameStart++) {
final String value = sol.getLines(frameStart, Populator.MINIMUM_LINES);
final ByteBuffer newHash = ByteBuffer.wrap(hasher.hash(value));
ret.add(Snippet.newBuilder()
.setHash(ByteString.copyFrom(newHash))
.setFunction(ByteString.copyFrom(function))
.setPosition(frameStart)
.build());
}
Collections.sort(ret, snippetLocationComparator);
return ImmutableList.copyOf(ret); // compresses ret.
}
StringBuilder snippet1() {
return new StringBuilder(" public static int log10(int x, RoundingMode mode) {\n"
+ " checkPositive(\"x\", x);\n"
+ " int logFloor = log10Floor(x);\n"
+ " int floorPow = POWERS_OF_10[logFloor];\n"
+ " int result = -1;\n"
+ " switch (mode) {\n"
+ " case UNNECESSARY:\n"
+ " checkRoundingUnnecessary(x == floorPow);\n"
+ " // fall through\n"
+ " case DOWN:\n"
+ " result = logFloor;\n"
+ " case CEILING:\n"
+ " case UP:\n"
+ " result = (x == floorPow) ? logFloor : logFloor - 1;\n"
+ " case HALF_DOWN:\n"
+ " case HALF_UP:\n"
+ " case HALF_EVEN:\n"
+ " // sqrt(10) is irrational, so log10(x) - logFloor is never exactly\n"
+ " // 0.5\n"
+ " result = (x <= HALF_POWERS_OF_10[logFloor]) ? logFloor : logFloor - 1;\n"
+ " }\n"
+ " return result;\n"
+ " }");
}
StringBuilder snippet2() {
return new StringBuilder(" public static int log10(int x, RoundingMode mode) {\n"
+ " int logFloor = log10Floor(x);\n"
+ " int floorPow = powers_Of_10[logFloor];\n"
+ " switch (mode) {\n"
+ " case UNNECESSARY:\n"
+ " checkRoundingUnnecessary(x == floorPow);\n"
+ " // fall through\n"
+ " case FLOOR:\n"
+ " case DOWN:\n"
+ " return logFloor;\n"
+ " case CEILING:\n"
+ " case UP:\n"
+ " return (x == floorPow) ? logFloor : logFloor + 1;\n"
+ " case HALF_DOWN:\n"
+ " case HALF_UP:\n"
+ " case HALF_EVEN:\n"
+ " // sqrt(10) is irrational, so log10(x) - logFloor is never exactly\n"
+ " // 0.5\n"
+ " return (x <= half_Powers_Of_10[logFloor]) ? logFloor : logFloor + 1;\n"
+ " default:\n"
+ " throw new AssertionError();\n"
+ " }\n"
+ " }");
}
StringBuilder snippet3() {
return new StringBuilder(" public static int log10(int x, RoundingMode mode) {\n"
+ " int logFloor = log10Floor(x);\n"
+ " int floorPow = powers_Of_10[logFloor];\n"
+ " switch (mode) {\n"
+ " case FLOOR:\n"
+ " case DOWN:\n"
+ " return logFloor;\n"
+ " case CEILING:\n"
+ " case UP:\n"
+ " return (x == floorPow) ? logFloor : logFloor + 1;\n"
+ " case HALF_DOWN:\n"
+ " case HALF_UP:\n"
+ " case HALF_EVEN:\n"
+ " // sqrt(10) is irrational, so log10(x) - logFloor is never exactly\n"
+ " // 0.5\n"
+ " return (x <= half_Powers_Of_10[logFloor]) ? logFloor : logFloor + 1;\n"
+ " default:\n"
+ " throw new AssertionError();\n"
+ " }\n"
+ " }");
}
private String printString(Iterable<Clone> matches) {
final StringBuilder ret = new StringBuilder("(");
for (final Clone e : matches) {
final ByteBuffer hash = e.getThisSnippet().getHash().asReadOnlyByteBuffer();
ret.append(String.format("%d-%d|%d$%02x%02x@%d, ",
e.getThisSnippet().getFunction().asReadOnlyByteBuffer().get(0),
e.getThatSnippet().getFunction().asReadOnlyByteBuffer().get(0),
e.getThisSnippet().getPosition(), hash.get(0), hash.get(1),
e.getThatSnippet().getPosition()));
}
ret.append(')');
return ret.toString();
}
}