package ch.unibe.scg.cc; import static com.google.common.io.BaseEncoding.base16; import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; import static java.lang.annotation.ElementType.PARAMETER; import static java.lang.annotation.RetentionPolicy.RUNTIME; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import java.io.IOException; import java.lang.annotation.Retention; import java.lang.annotation.Target; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import javax.inject.Qualifier; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Assert; import org.junit.Test; import ch.unibe.scg.cc.Annotations.Populator; import ch.unibe.scg.cc.Protos.CloneType; import ch.unibe.scg.cc.Protos.CodeFile; import ch.unibe.scg.cc.Protos.Function; import ch.unibe.scg.cc.Protos.GitRepo; import ch.unibe.scg.cc.Protos.Project; import ch.unibe.scg.cc.Protos.Snippet; import ch.unibe.scg.cc.Protos.Version; import ch.unibe.scg.cells.AdapterOneShotIterable; import ch.unibe.scg.cells.CellsModule; import ch.unibe.scg.cells.InMemoryStorage; import ch.unibe.scg.cells.LocalCounterModule; import ch.unibe.scg.cells.LocalExecutionModule; import ch.unibe.scg.cells.Sink; import ch.unibe.scg.cells.Source; import ch.unibe.scg.cells.TableModule; import com.google.common.collect.Iterables; import com.google.common.io.ByteStreams; import com.google.inject.Binder; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.TypeLiteral; import com.google.protobuf.ByteString; @SuppressWarnings("javadoc") public final class GitPopulatorTest { private static final String TESTREPO = "testrepo.zip"; @Test public void testProjnameRegex() throws IOException { try (GitPopulator gitWalker = new GitPopulator(null, null, null, null, null)) { String fullPathString = "har://hdfs-haddock.unibe.ch/projects/testdata.har" + "/apfel/.git/objects/pack/pack-b017c4f4e226868d8ccf4782b53dd56b5187738f.pack"; String projName = gitWalker.extractProjectName(fullPathString); assertThat(projName, is("apfel")); fullPathString = "har://hdfs-haddock.unibe.ch/projects/dataset.har/dataset/sensei/objects/pack/pack-a33a3daca1573e82c6fbbc95846a47be4690bbe4.pack"; projName = gitWalker.extractProjectName(fullPathString); assertThat(projName, is("sensei")); } } @Test public void testPopulate() throws IOException, InterruptedException { Injector i = walkRepo(parseZippedGit(TESTREPO)); try(Source<Project> projectPartitions = i.getInstance( Key.get(new TypeLiteral<Source<Project>>() {}, Populator.class)); Source<Str<Function>> functionStringPartitions = i.getInstance(Key.get(new TypeLiteral<Source<Str<Function>>>() {}, Populator.class)); Source<Snippet> snippet2FuncsPartitions = i.getInstance(Key.get(new TypeLiteral<Source<Snippet>>() {}, TestSink.class)); Source<Version> versionPartitions = i.getInstance( Key.get(new TypeLiteral<Source<Version>>() {}, Populator.class)); Source<CodeFile> filePartitions = i.getInstance( Key.get(new TypeLiteral<Source<CodeFile>>() {}, Populator.class)); Source<Function> functionPartitions = i.getInstance( Key.get(new TypeLiteral<Source<Function>>() {}, Populator.class)); Source<Snippet> snippetPartitions = i.getInstance( Key.get(new TypeLiteral<Source<Snippet>>() {}, Populator.class))) { assertThat(Iterables.size(projectPartitions), is(1)); Iterable<Project> projects = Iterables.getOnlyElement(projectPartitions); assertThat(Iterables.size(projects), is(1)); Project p = Iterables.getOnlyElement(projects); assertThat(p.getName(), is("testrepo.zip")); assertThat(Iterables.size(versionPartitions), is(1)); Iterable<Version> versions = Iterables.getOnlyElement(versionPartitions); assertThat(Iterables.size(versions), is(1)); Version v = Iterables.getOnlyElement(versions); assertThat(v.getName(), is("master")); assertThat(v.getProject(), is(p.getHash())); assertThat(Iterables.size(filePartitions), is(1)); Iterable<CodeFile> files = Iterables.getOnlyElement(filePartitions); assertThat(Iterables.size(files), is(1)); CodeFile cf = Iterables.getOnlyElement(files); assertThat(cf.getPath(), is("GitTablePopulatorTest.java")); assertThat(cf.getVersion(), is(v.getHash())); assertThat(cf.getContents().indexOf("package ch.unibe.scg.cc.mappers;"), is(0)); assertThat(Iterables.size(functionPartitions), is(1)); Iterable<Function> functions = Iterables.getOnlyElement(functionPartitions); assertThat(Iterables.size(functions), is(1)); Function fn = Iterables.getOnlyElement(functions); assertThat(fn.getCodeFile(), is(cf.getHash())); assertThat(fn.getBaseLine(), is(10)); assertThat(fn.getContents().indexOf("public void testProjname"), is(1)); assertThat(Iterables.size(snippetPartitions), is(1)); // means we have only one function Iterable<Snippet> snippets = Iterables.getOnlyElement(snippetPartitions); assertThat(Iterables.size(snippets), is(24)); Snippet s0 = Iterables.get(snippets, 0); Snippet s1 = Iterables.get(snippets, 1); Snippet s7 = Iterables.get(snippets, 7); assertThat(s0.getFunction(), is(fn.getHash())); assertThat(s0.getLength(), is(5)); assertThat(s0.getPosition(), is(0)); assertThat(s1.getPosition(), is(1)); assertThat(s1.getFunction(), is(fn.getHash())); assertThat(s7.getPosition(), is(7)); assertThat(s7.getFunction(), is(fn.getHash())); // Check FunctionString Iterable<Str<Function>> functionStringRow = Iterables.getOnlyElement(functionStringPartitions); Str<Function> functionString = Iterables.getOnlyElement(functionStringRow); assertThat(functionString.contents.indexOf("public void testProjnameRegex"), is(1)); assertThat(Iterables.size(snippet2FuncsPartitions), is(24 - 2)); // 2 collisions Iterable<Snippet> snippets2Funcs = Iterables.get(snippet2FuncsPartitions, 0); assertThat(Iterables.size(snippets2Funcs), is(1)); } } @Test public void testPaperExampleFile2Function() throws IOException, InterruptedException { Injector i = walkRepo(GitPopulatorTest.parseZippedGit("paperExample.zip")); try(Source<Function> decodedPartitions = i.getInstance(Key.get(new TypeLiteral<Source<Function>>() {}, Populator.class))) { Iterable<Function> funs = Iterables.concat(decodedPartitions); assertThat(Iterables.size(funs), is(15)); Set<ByteString> funHashes = new HashSet<>(); for (Function fun : funs) { funHashes.add(fun.getHash()); } assertThat(funHashes.size(), is(9)); Set<String> fileHashes = new HashSet<>(); for (Iterable<Function> partition : decodedPartitions) { String cur = base16().encode(Iterables.get(partition, 0).getCodeFile().toByteArray()); Assert.assertFalse(cur + fileHashes, fileHashes.contains(cur)); fileHashes.add(cur); // Check that every partition shares the same file. for (Function f : partition) { assertThat(base16().encode(f.getCodeFile().toByteArray()), is(cur)); } } assertThat(fileHashes.size(), is(3)); } } @Test public void testPaperExampleSnippet2Functions() throws IOException, InterruptedException { Injector i = walkRepo(GitPopulatorTest.parseZippedGit("paperExample.zip")); try(Source<Snippet> snippet2Function = i.getInstance(Key.get(new TypeLiteral<Source<Snippet>>() {}, TestSink.class))) { assertThat(Iterables.size(snippet2Function), is(145)); // Numbers are taken from paper. See table II. ByteString row03D8 = ByteString.copyFrom(new byte[] {0x03, (byte) 0xd8}); Iterable<Snippet> partition03D8 = null; for(Iterable<Snippet> s2fPartition : snippet2Function) { if(Bytes.startsWith(Iterables.get(s2fPartition, 0).getHash().toByteArray(), row03D8.toByteArray())) { partition03D8 = s2fPartition; break; } } Assert.assertNotNull(partition03D8); assertThat(Iterables.size(partition03D8), is(2)); int actualDistance = Math.abs(Iterables.get(partition03D8, 0).getPosition() - Iterables.get(partition03D8, 1).getPosition()); assertThat(actualDistance, is(3)); } } @Test public void testPaperExampleFunction2Snippets() throws IOException, InterruptedException { Injector i = walkRepo(GitPopulatorTest.parseZippedGit("paperExample.zip")); try(Source<Snippet> function2snippetsPartitions = i.getInstance( Key.get(new TypeLiteral<Source<Snippet>>() {}, Populator.class))) { // Num partitions is the number of functions. As per populator test, that's 9. assertThat(Iterables.size(function2snippetsPartitions), is(9)); // We'll examine this row further in Function2RoughClones Iterable<Snippet> aaa0 = null; for (Iterable<Snippet> row : function2snippetsPartitions) { if (base16().encode(Iterables.get(row, 0).getFunction().toByteArray()).startsWith("AAA0")) { aaa0 = row; break; } } assert aaa0 != null; // Null analysis insists ... assertNotNull(aaa0); List<String> snippetHashes = new ArrayList<>(); for (Snippet s : aaa0) { if (s.getCloneType() == CloneType.GAPPED) { snippetHashes.add(base16().encode(s.getHash().toByteArray())); } } // These snippets were only partially checked. assertThat(snippetHashes, is(d618SnippetHashes())); } } static Collection<String> d618SnippetHashes() { return Arrays.asList("58BA4690385740B2C9F8FCBF890A1ECF3BDC17C4", "0FA256C80C3AF5E1AC1FE54F5F0AF85D8752F474", "0FA256C80C3AF5E1AC1FE54F5F0AF85D8752F474", "A9BBB1B13ECC261749436CAF9DC5DC20E9C2F68B", "98DB4D210584D3033A1E26785A6721C609B54D14", "BB3E14556ABA10796131584A07979C50470B4DBA", "38C303121D329190DA79CE955F0E790569D168D3", "598571B72AE83C72E299F3747B9C025848C45014", "301729FB42E326C3CE1130994C16BD4C9DF14A79", "A4A8B82E4ABE99EBF67D12A1FF190B61FF6E6520", "5F72E12E161EF9991A85572864F5FBE6C3DF72EB", "9C628251AE1C7A39F3265D1AACA3630B69DA3655", "1474378C2B8FDE56C8A835AFD8F7DFB46F1E59DC", "20A4E2A38A590E7D29978E7A3FF308EE15B6DC63", "6F4533745BDB2D84648440CE9D2826DECAEA72EE", "278629562C3404A50795A832EE4E81722319D775", "48C31A2277EF29216311E8FC7366A7ACE9F3A59B", "48C31A2277EF29216311E8FC7366A7ACE9F3A59B", "24D6FB97266FFFCC409DD4F57CDC938EE6423C5F"); } private static TableModule nullModule() { return new TableModule() { @Override public void configure(Binder binder) { // Do nothing. } }; } private static Injector walkRepo(GitRepo repo) throws IOException, InterruptedException { Injector i = Guice.createInjector(new CCModule(new InMemoryStorage(), new LocalCounterModule()), new LocalExecutionModule(), new CellsModule() { @Override protected void configure() { installTable(TestSink.class, new TypeLiteral<Snippet>() {}, Snippet2FunctionsCodec.class, new InMemoryStorage(), nullModule()); } }); try (GitPopulator gitPopulator = i.getInstance(GitPopulator.class); Sink<Snippet> snippetSink = i.getInstance(Key.get(new TypeLiteral<Sink<Snippet>>() {}, TestSink.class))) { gitPopulator.map(repo, new AdapterOneShotIterable<>(Arrays.asList(repo)), snippetSink); } return i; } @Qualifier @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) private static @interface TestSink {} static GitRepo parseZippedGit(String pathToZip) throws IOException { try(ZipInputStream packFile = new ZipInputStream(GitPopulatorTest.class.getResourceAsStream(pathToZip)); ZipInputStream packedRefs = new ZipInputStream(GitPopulatorTest.class.getResourceAsStream(pathToZip))) { for (ZipEntry entry; (entry = packFile.getNextEntry()) != null;) { if (entry.getName().endsWith(".pack")) { break; } } for (ZipEntry entry; (entry = packedRefs.getNextEntry()) != null;) { if (entry.getName().endsWith("packed-refs")) { break; } } return GitRepo.newBuilder() .setProjectName(pathToZip) .setPackFile(ByteString.copyFrom(ByteStreams.toByteArray(packFile))) .setPackRefs(ByteString.copyFrom(ByteStreams.toByteArray(packedRefs))) .build(); } } }