package test.db.FastTableScanning; /* * Usage: * java -Xmx2G FastTableScanning * * This code was written by D. Lemire, see http://lemire.me/. * It was the basis of a blog post : "For your in-memory databases, do you really need an index?" * http://lemire.me/blog/archives/2010/12/20/for-your-in-memory-databases-do-you-really-need-an-index/ * * It is in the public domain. (No copyright.) * * */ import java.io.BufferedOutputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Random; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.atomic.AtomicInteger; public class FastTableScanning implements Iterable<int[]> { File backfile; public int c, N; public List<int[]> memoryversion; private FastTableScanning(int myN, int myc, int cardinality) throws IOException { backfile = File.createTempFile("PersistentNormalizedTable", "bin"); backfile.deleteOnExit();// this defeats the purpose of "persistence" but // will do for our limited purposes N = myN; c = myc; Random rand = new Random(); System.out.println("Creating on disk a table with " + c + " columns and " + N + " rows... (this can take some time)"); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream( new FileOutputStream(backfile))); for (int row = 0; row < N; ++row) { for (int column = 0; column < c; ++column) dos.writeInt(rand.nextInt(cardinality)); } dos.close(); System.out .println("Done! Wrote " + N * c * 4 / (1024 * 1024.0) + " MB"); } public static void main(String[] args) throws IOException, InterruptedException, BrokenBarrierException { FastTableScanning fs = new FastTableScanning(1000000, 10, 100); System.out .println("Now I'm going to repeatedly scan the data for slices using a memory-mapped file: "); for (int value = 0; value < 10; ++value) fs.computeCardinalityOfSlice(4, value); fs.loadInRam(); System.out .println("Now I'm going to repeatedly scan the data for slices using an in-memory array: "); for (int value = 0; value < 10; ++value) fs.computeCardinalityOfSlice(4, value); } public void loadInRam() { System.out .println("Loading the entire table in RAM. You may need to adjust the flags on your JVM to make this work without OutOfMemoryError (hint: -Xmx2G may work)."); memoryversion = new ArrayList<int[]>(); for (int[] row : this) { memoryversion.add(Arrays.copyOf(row, row.length)); } System.out.println("Done!"); } public int computeCardinalityOfSlice(final int filtervalue, final int filtercolumn) throws InterruptedException, BrokenBarrierException { long startTime = System.currentTimeMillis(); final int numberofthreads = Runtime.getRuntime().availableProcessors(); final CyclicBarrier cb = new CyclicBarrier(numberofthreads + 1); final AtomicInteger counter = new AtomicInteger(0); for (int t = 0; t < numberofthreads; ++t) { final int ft = t; Thread r = new Thread() { @Override public void run() { try { if (memoryversion != null) processRAMPartition(counter, filtervalue, filtercolumn, ft, numberofthreads); else processDiskPartition(counter, filtervalue, filtercolumn, ft, numberofthreads); } catch (IOException e) { e.printStackTrace(); } try { cb.await(); } catch (InterruptedException e) { e.printStackTrace(); } catch (BrokenBarrierException e) { e.printStackTrace(); } } }; r.start(); } cb.await(); long endTime = System.currentTimeMillis(); System.out.println("Completed scan in " + (endTime - startTime) / 1000.0 + " s "); System.out.println("** The cardinality of the slice is " + counter + " rows."); return counter.intValue(); } // this is a silly example, modify to fit your needs // as it is, it computes the cardinality of a slice private void processDiskPartition(AtomicInteger counter, int filtervalue, int filtercolumn, final int whichthread, final int howmanythreads) throws IOException { final int startindex = startIndex(whichthread, howmanythreads); final int endindex = endIndex(whichthread, howmanythreads); Iterator<int[]> j = iteratorFromTo(startindex, endindex); while (j.hasNext()) { int[] row = j.next(); if (row[filtercolumn] == filtervalue) { counter.incrementAndGet(); } } } // this is a silly example, modify to fit your needs // as it is, it computes the cardinality of a slice private void processRAMPartition(AtomicInteger counter, int filtervalue, int filtercolumn, final int whichthread, final int howmanythreads) throws IOException { final int startindex = startIndex(whichthread, howmanythreads); final int endindex = endIndex(whichthread, howmanythreads); Iterator<int[]> j = memoryversion.subList(startindex, endindex) .iterator(); while (j.hasNext()) { int[] row = j.next(); if (row[filtercolumn] == filtervalue) { counter.incrementAndGet(); } } } private int startIndex(final int whichthread, final int howmanythreads) { return N / howmanythreads * whichthread; } private int endIndex(final int whichthread, final int howmanythreads) { return whichthread + 1 == howmanythreads ? N : N / howmanythreads * (whichthread + 1); } public Iterator<int[]> iterator() { try { FileChannel roChannel = new RandomAccessFile(backfile, "r") .getChannel(); ByteBuffer readonlybuffer = roChannel.map( FileChannel.MapMode.READ_ONLY, 0, c * N * 4); final IntBuffer ib = readonlybuffer.asIntBuffer(); final int[] array = new int[c]; final int endrowid = N; return new Iterator<int[]>() { int counter = 0; @Override public boolean hasNext() { if (counter++ < endrowid) { ib.get(array); return true; } return false; } @Override public int[] next() { return array; } @Override public void remove() { throw new RuntimeException("not implemented"); } }; } catch (IOException e1) { e1.printStackTrace(); } return null; } public Iterator<int[]> iteratorFromTo(final int beginrowid, final int endrowid) throws IOException { FileChannel roChannel = new RandomAccessFile(backfile, "r") .getChannel(); ByteBuffer readonlybuffer = roChannel.map( FileChannel.MapMode.READ_ONLY, c * beginrowid * 4, c * (endrowid - beginrowid) * 4); final IntBuffer ib = readonlybuffer.asIntBuffer(); final int[] array = new int[c]; return new Iterator<int[]>() { int counter = beginrowid; @Override public boolean hasNext() { if (counter++ < endrowid) { ib.get(array); return true; } return false; } @Override public int[] next() { return array; } @Override public void remove() { throw new RuntimeException("not implemented"); } }; } }