package ch.unibe.scg.cc;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.inject.Inject;
import ch.unibe.scg.cc.Protos.Clone;
import ch.unibe.scg.cc.Protos.CloneGroup;
import ch.unibe.scg.cc.Protos.CodeFile;
import ch.unibe.scg.cc.Protos.Function;
import ch.unibe.scg.cc.Protos.Occurrence;
import ch.unibe.scg.cc.Protos.Project;
import ch.unibe.scg.cc.Protos.Version;
import ch.unibe.scg.cc.lines.StringOfLinesFactory;
import ch.unibe.scg.cells.LookupTable;
import ch.unibe.scg.cells.OfflineMapper;
import ch.unibe.scg.cells.Sink;
import ch.unibe.scg.cells.Source;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimap;
import com.google.common.io.BaseEncoding;
import com.google.common.io.Closer;
import com.google.protobuf.ByteString;
class CloneGroupClusterer implements OfflineMapper<Clone, CloneGroup> {
final private LookupTable<CodeFile> filesTab;
final private LookupTable<Version> versionsTab;
final private LookupTable<Project> projectsTab;
final private LookupTable<Function> functionsTab;
final private LookupTable<Str<Function>> funStringsTab;
final private StringOfLinesFactory stringOfLinesFactory;
final private Map<ByteString, Collection<Occurrence>> functionOccurrences = new HashMap<>();
@Inject
CloneGroupClusterer(@Annotations.Populator LookupTable<CodeFile> filesTab,
@Annotations.Populator LookupTable<Version> versionsTab,
@Annotations.Populator LookupTable<Project> projectsTab,
@Annotations.Populator LookupTable<Function> functionsTab,
@Annotations.Populator LookupTable<Str<Function>> funStringsTab,
StringOfLinesFactory stringOfLinesFactory) {
this.filesTab = filesTab;
this.versionsTab = versionsTab;
this.projectsTab = projectsTab;
this.functionsTab = functionsTab;
this.funStringsTab = funStringsTab;
this.stringOfLinesFactory = stringOfLinesFactory;
}
@Override
public void map(Source<Clone> in, Sink<CloneGroup> out) throws IOException, InterruptedException {
Multimap<ByteString, Clone> hashToClone = HashMultimap.create();
for (Iterable<Clone> row : in) {
for (Clone c : row) {
hashToClone.put(c.getThisSnippet().getFunction(), c);
hashToClone.put(c.getThatSnippet().getFunction(), c);
}
}
while (!hashToClone.isEmpty()) {
ByteString start = hashToClone.entries().iterator().next().getKey();
String cloneGroupText = extractText(hashToClone.get(start).iterator().next());
Set<Occurrence> occurrences = new HashSet<>();
for (Collection<Clone> cs = hashToClone.get(start); !cs.isEmpty(); cs = hashToClone.get(start)) {
Clone c = cs.iterator().next();
remove(hashToClone, c);
extractConnectedComponent(hashToClone, c, occurrences);
}
out.write(CloneGroup.newBuilder()
.addAllOccurrences(occurrences)
.setText(cloneGroupText)
.build());
}
}
private String extractText(Clone anyClone) throws IOException {
String functionString = Iterables
.getOnlyElement(funStringsTab.readRow(anyClone.getThisSnippet().getFunction())).contents;
return stringOfLinesFactory.make(functionString, '\n').getLines(anyClone.getThisSnippet().getPosition(),
anyClone.getThatSnippet().getLength());
}
private void extractConnectedComponent(Multimap<ByteString, Clone> hashToClone, Clone c, Set<Occurrence> out)
throws IOException {
out.addAll(findOccurrences(c.getThatSnippet().getFunction()));
out.addAll(findOccurrences(c.getThisSnippet().getFunction()));
Collection<Clone> left = hashToClone.get(c.getThisSnippet().getFunction());
for (Clone leftClone : left) {
remove(hashToClone, leftClone);
extractConnectedComponent(hashToClone, leftClone, out);
}
Collection<Clone> right = hashToClone.get(c.getThatSnippet().getFunction());
for (Clone rightClone : right) {
remove(hashToClone, rightClone);
extractConnectedComponent(hashToClone, rightClone, out);
}
}
private void remove(Multimap<ByteString, Clone> hashToClone, Clone leftClone) {
boolean removedSomething = false;
ByteString thisFun = leftClone.getThisSnippet().getFunction();
if (hashToClone.containsEntry(thisFun, leftClone)) {
hashToClone.remove(thisFun, leftClone);
removedSomething = true;
}
ByteString thatFun = leftClone.getThatSnippet().getFunction();
if (hashToClone.containsEntry(thatFun, leftClone)) {
hashToClone.remove(thatFun, leftClone);
removedSomething = true;
}
assert removedSomething;
}
/** @return all occurrences of {@code functionKey} */
private Collection<Occurrence> findOccurrences(ByteString functionKey) throws IOException {
if (!functionOccurrences.containsKey(functionKey)) {
ImmutableList.Builder<Occurrence> ret = ImmutableList.builder();
Iterable<Function> funs = readColumn(functionsTab, functionKey, "functions");
for (Function fun : funs) {
Iterable<CodeFile> files = readColumn(filesTab, fun.getCodeFile(), "files");
for (CodeFile file : files) {
Iterable<Version> versions = readColumn(versionsTab, file.getVersion(), "versions");
for (Version version : versions) {
Iterable<Project> projects = readColumn(projectsTab, version.getProject(), "projects");
for (Project project : projects) {
ret.add(Occurrence.newBuilder().setFunction(fun).setCodeFile(file).setVersion(version)
.setProject(project).build());
}
}
}
}
functionOccurrences.put(functionKey, ret.build());
}
return functionOccurrences.get(functionKey);
}
private <T> Iterable<T> readColumn(LookupTable<T> tab, ByteString hash, String table) throws IOException {
Iterable<T> ret = tab.readColumn(hash);
if (Iterables.isEmpty(ret)) {
throw new RuntimeException("Found no " + table + " for hash "
+ BaseEncoding.base16().encode(hash.toByteArray()).substring(0, 6));
}
return ret;
}
@Override
public void close() throws IOException {
try(Closer closer = Closer.create()) {
closer.register(filesTab);
closer.register(versionsTab);
closer.register(projectsTab);
closer.register(functionsTab);
closer.register(funStringsTab);
}
}
}