package ch.unibe.scg.cc; import java.io.IOException; import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import ch.unibe.scg.cc.Protos.Clone; import ch.unibe.scg.cc.Protos.GitRepo; import ch.unibe.scg.cells.hadoop.HBaseStorage; import ch.unibe.scg.cells.hadoop.HadoopCounterModule; import ch.unibe.scg.cells.hadoop.HadoopPipeline; import ch.unibe.scg.cells.hadoop.Table; import ch.unibe.scg.cells.hadoop.TableAdmin; import ch.unibe.scg.cells.hadoop.UnibeModule; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.protobuf.ByteString; /** Run the clone detector on the cluster. */ public final class Main { private static final String OUT_TABLE = "ClonsNov"; /** Run in cluster. */ public static void main(String[] args) throws IOException, InterruptedException { Injector i = Guice.createInjector(new UnibeModule(), new CCModule(new HBaseStorage(), new HadoopCounterModule())); Configuration conf = i.getInstance(Configuration.class); TableAdmin admin = i.getInstance(TableAdmin.class); List<String> tabs = Arrays.asList("Snippets", "Functions", "CodeFiles", "Versions", "Projects", "FunctionStrings", "PopularSnippets", OUT_TABLE); ByteString fam = ByteString.copyFromUtf8("f"); if (args.length >= 1 && args[0].equals("--recreateTables")) { for (String tabName : tabs) { admin.deleteTable(tabName); admin.createTable(tabName, fam); } } try (Table<Clone> tab = admin.existing(OUT_TABLE, fam)) { HadoopPipeline<GitRepo, Clone> pipe = HadoopPipeline.fromHDFSToTable(conf, GitInputFormat.class, new Path("har://hdfs-haddock.unibe.ch:/projects/dataset-141.har/repos/"), tab); PipelineRunner runner = i.getInstance(PipelineRunner.class); runner.run(pipe); } } }