package ch.unibe.scg.cells.benchmarks; import java.io.IOException; import java.text.NumberFormat; import ch.unibe.scg.cells.Cells; import ch.unibe.scg.cells.InMemoryPipeline; import ch.unibe.scg.cells.LocalExecutionModule; import ch.unibe.scg.cells.benchmarks.CellsInMemoryWordCountBenchmark.FileContent; import ch.unibe.scg.cells.benchmarks.CellsInMemoryWordCountBenchmark.FileContentCodec; import com.google.common.collect.Iterables; import com.google.inject.Guice; /** * A cells job for training a distributed svm in memory. Map phase distributes data to svms, * reduce phase trains a set of svms on an subset of data. */ public final class CellsInMemorySVMBenchmark { private final static int TIMES = 5; /** Launches cells job. you can specify input path as parameter. * @throws IOException * @throws InterruptedException */ public static void main(String args[]) throws IOException, InterruptedException { String input = "benchmarks/svmdata"; double[] timings = new double[TIMES]; NumberFormat f = NumberFormat.getInstance(); f.setMaximumFractionDigits(2); for (int i = 0; i < TIMES; i++) { long startTime = System.nanoTime(); try (InMemoryPipeline<FileContent, String> pipe = Guice.createInjector(new LocalExecutionModule()).getInstance(InMemoryPipeline.Builder.class) .make(Cells.shard(Cells.encode(CellsInMemoryWordCountBenchmark.readFilesFromDisk(input), new FileContentCodec())))) { CellsHadoopSVMBenchmark.run(pipe); int dummy = 0; for (Iterable<String> wcs : pipe.lastEfflux()) { dummy += Iterables.size(wcs); } if (dummy == 0) { System.out.println(); } timings[i] = (System.nanoTime() - startTime) / 1_000_000_000.0; System.out.println(f.format(timings[i])); } } System.out.println("--------------"); System.out.println(String.format("min: %s", f.format(CellsInMemoryWordCountBenchmark.min(timings)))); } }