package org.epics.archiverappliance.zipfs; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.LinkedList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.Future; import org.apache.log4j.Logger; import org.junit.After; import org.junit.Before; import org.junit.Test; import com.google.common.base.Ascii; import edu.stanford.slac.archiverappliance.PB.utils.LineEscaper; /** * Test ways to optimize parallel fetch * @author mshankar * */ public class ZipFetchTest { private static Logger logger = Logger.getLogger(ZipFetchTest.class.getName()); private static final int NUM_DAYS = 365; private static final int DATA_PER_DAY = 1024*1024; private static final int SAMPLE_SIZE = NUM_DAYS*DATA_PER_DAY; private static final int LINE_SIZE = 32; private byte[] data = new byte[SAMPLE_SIZE]; @Before public void setUp() throws Exception { byte[] line = new byte[LINE_SIZE]; for(int i = 0; i < LINE_SIZE-1; i++) { line[i] = Ascii.SPACE; } line[LINE_SIZE-1] = LineEscaper.NEWLINE_CHAR; for(int l = 0; l < SAMPLE_SIZE/LINE_SIZE; l++) { System.arraycopy(line, 0, data, l*LINE_SIZE, line.length); } } @After public void tearDown() throws Exception { } private static int countLinesInDay(byte[] data, int day) throws IOException { int dayStartsAt = day*DATA_PER_DAY; int lineCount = 0; byte[] input = new byte[LINE_SIZE]; byte[] output = new byte[LINE_SIZE]; for(int l = 0; l < DATA_PER_DAY/LINE_SIZE; l++) { int linei = 0; System.arraycopy(data, dayStartsAt + l*LINE_SIZE, input, 0, input.length); for(int i = 0; i < input.length; i++) { byte b = input[i]; if(b == LineEscaper.ESCAPE_CHAR) { i++; if(i >= input.length) { throw new RuntimeException("Index " + i + " is greater then input array length " + input.length); } b = input[i]; switch(b) { case LineEscaper.ESCAPE_ESCAPE_CHAR: output[linei++] = LineEscaper.ESCAPE_CHAR;break; case LineEscaper.NEWLINE_ESCAPE_CHAR: output[linei++] = LineEscaper.NEWLINE_CHAR;break; case LineEscaper.CARRIAGERETURN_ESCAPE_CHAR: output[linei++] = LineEscaper.CARRIAGERETURN_CHAR;break; default: output[linei++] = b;break; } } else { output[linei++] = b; } } lineCount++; } return lineCount; } @Test public void compareSpeedup() throws Exception { long serial = testSerialFetch(); long parallel = testParallelFetch(); logger.info("Speedup as a % " + (parallel*100.0/serial)); } private long testSerialFetch() throws Exception { long st0 = System.currentTimeMillis(); int totallinecount = 0; for(int day = 0; day < NUM_DAYS; day++) { totallinecount += countLinesInDay(data, day); } long st1 = System.currentTimeMillis(); logger.info("Time taken for serial decoding " + (st1-st0) + "(ms) yielding " + totallinecount + " lines"); return st1-st0; } private static class ParallelFetch implements Callable<Integer> { private byte[] data; private int day; ParallelFetch(byte[] data, int day) { this.data = data; this.day = day; } @Override public Integer call() { try { return countLinesInDay(data, day); } catch(IOException ex) { logger.error(ex); } return 0; } } private long testParallelFetch() throws Exception { ForkJoinPool forkJoinPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); logger.info("The parallelism in the pool is " + forkJoinPool.getParallelism()); long st0 = System.currentTimeMillis(); List<Future<Integer>> futures = new LinkedList<Future<Integer>>(); for(int day = 0; day < NUM_DAYS; day++) { futures.add(forkJoinPool.submit(new ParallelFetch(data, day))); } long firstTaskFinish = 0; int totallinecount = 0; for(Future<Integer> future : futures) { int numLines = future.get(); if(firstTaskFinish == 0) { firstTaskFinish = System.currentTimeMillis(); } assertTrue("Day had no lines ", numLines > 1); totallinecount += numLines; } long st1 = System.currentTimeMillis(); logger.info("Time taken for parallel decoding " + (st1-st0) + "(ms) yielding " + totallinecount + " lines. The first task took " + (firstTaskFinish-st0)); forkJoinPool.shutdown(); return st1-st0; } }