ChunkSpeedTest.java example

Explorer
h2o-3-master
package water.fvec;

import org.junit.BeforeClass;
import org.junit.Test;
import water.*;
import water.util.Log;
import water.util.PrettyPrint;

public class ChunkSpeedTest extends TestUtil {
  @BeforeClass() public static void setup() { stall_till_cloudsize(1); }

  final int cols = 100;
  final int rows = 100000;
  final int rep = 10;
  final double[][] raw = new double[cols][rows];
  Chunk[] chunks = new Chunk[cols];


  @Test
  public void run() {
    for (int j = 0; j < cols; ++j) {
      for (int i = 0; i < rows; ++i) {
        raw[j][i] = get(j, i);
      }
    }
    for (int j = 0; j < cols; ++j) {
      chunks[j] = new NewChunk(raw[j]).compress();
      Log.info("Column " + j + " compressed into: " + chunks[j].getClass().toString());
    }
    Log.info("COLS: " + cols);
    Log.info("ROWS: " + rows);
    Log.info("REPS: " + rep);

    int ll = 5;
    for (int i = 0; i < ll; ++i)
      raw();
    for (int i = 0; i < ll; ++i)
      chunks();
    for (int i = 0; i < ll; ++i)
      chunks_bulk();
    for (int i = 0; i < ll; ++i)
      chunks_part();
    for (int i = 0; i < ll; ++i)
      chunks_visitor();
    for (int i = 0; i < ll; ++i)
      chunksInline();
//    for (int i = 0; i < ll; ++i)
//      mrtask(false);
//    for (int i = 0; i < ll; ++i)
//      rollups(false);
//    Log.info("Now doing funny stuff.\n\n");
//    for (int i = 0; i < ll; ++i)
//      mrtask(true);
//    for (int i = 0; i < ll; ++i)
//      rollups(true);
//    for (int i = 0; i < ll; ++i)
//      chunksInverted();
//    for (int i = 0; i < ll; ++i)
//      rawInverted();

  }

  double get(int j, int i) {
//        switch (j%1+0) { //just do 1 byte chunks
//        switch (j%1+1) { //just do 2 byte chunks
//        switch (j % 2) { //just do 1/2 byte chunks
    switch (j%4) { // do 3 chunk types
//        switch (j%4) { // do 4 chunk types
      case 0:
        return i % 200; //C1NChunk - 1 byte integer
      case 1:
        return i % 500; //C2Chunk - 2 byte integer
      case 2:
        return  i*Integer.MAX_VALUE;
      case 3:
        return i == 17 ? 1 : 0; //CX0Chunk - sparse
      default:
        throw H2O.unimpl();
    }
  }

  void raw()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      for (int j=0; j<cols; ++j) {
        for (int i = 0; i < rows; ++i) {
          sum += raw[j][i];
        }
      }
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    Log.info("Data size: " + PrettyPrint.bytes(rows * cols * 8));
    Log.info("Time for RAW double[]: " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }

  void rawInverted()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      for (int i = 0; i < rows; ++i) {
        for (int j=0; j<cols; ++j) {
          sum += raw[j][i];
        }
      }
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    Log.info("Data size: " + PrettyPrint.bytes(rows * cols * 8));
    Log.info("Time for INVERTED RAW double[]: " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }

  double walkChunk(final Chunk c) {
    double sum =0;
    for (int i = 0; i < rows; ++i) {
      sum += c.atd(i);
    }
    return sum;
  }
  double walkChunkBulk(final Chunk c, double [] vals) {
    double sum =0;
    c.getDoubles(vals,0,c._len);
    for (int i = 0; i < rows; ++i)
      sum += vals[i];
    return sum;
  }

  double walkChunkParts(final Chunk c, double [] vals) {
    double sum =0;
    int from = 0;
    while(from != c._len) {
      int to = Math.min(c._len,from+vals.length);
      int n = to - from;
      c.getDoubles(vals,from,to);
      for (int i = 0; i < n; ++i)
        sum += vals[i];
      from = to;
    }
    return sum;
  }



  double loop() {
    double sum =0;
    for (int j=0; j<cols; ++j) {
      sum += walkChunk(chunks[j]);
    }
    return sum;
  }

  double loop_bulk() {
    double sum =0;
    double [] vals = new double[chunks[0]._len];
    for (int j=0; j<cols; ++j) {
      sum += walkChunkBulk(chunks[j],vals);
    }
    return sum;
  }

  private static class ChunkSum extends ChunkVisitor {
    double sum;
    public void addZeros(int n){}
    public void addValue(double d){sum += d;}
    public void addValue(long l){sum += l;}
    public void addValue(int i){sum += i;}
  }
  double loop_visitor(){
    ChunkSum viz = new ChunkSum();
    for (int j=0; j<cols; ++j)
      chunks[j].processRows(viz,0,chunks[j].len());
    return viz.sum;
  }
  double loop_parts() {
    double sum =0;
    double [] vals = new double[16];
    for (int j=0; j<cols; ++j) {
      sum += walkChunkParts(chunks[j],vals);
    }
    return sum;
  }

  void chunksInline()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      for (int j=0; j<cols; ++j) {
        for (int i = 0; i < rows; ++i) {
          sum += chunks[j].atd(i);
        }
      }
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for INLINE chunks atd(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }

  void chunks()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      sum += loop();
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for METHODS chunks atd(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }

  void chunks_bulk()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      sum += loop_bulk();
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for METHODS chunks getDoubles(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }
  void chunks_part()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      sum += loop_parts();
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for METHODS chunks PARTS(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }
  void chunks_visitor()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      sum += loop_visitor();
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for METHODS chunks Visitor(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }
  void chunksInverted()
  {
    long start = 0;
    double sum = 0;
    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      for (int i = 0; i < rows; ++i) {
        for (int j=0; j<cols; ++j) {
          sum += chunks[j].atd(i);
        }
      }
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    for (int j=0; j<cols; ++j) {
      siz += chunks[j].byteSize();
    }
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for INVERTED INLINE chunks atd(): " + PrettyPrint.msecs(done - start, true));
    Log.info("");
  }

  class FillTask extends MRTask<FillTask> {
    @Override
    public void map(Chunk[] cs) {
      for (int col=0; col<cs.length; ++col) {
        for (int row=0; row<cs[0]._len; ++row) {
          cs[col].set(row, raw[col][row]);
        }
      }
    }
  }

  static class SumTask extends MRTask<SumTask> {
    double _sum;
    @Override
    public void map(Chunk[] cs) {
      for (int col=0; col<cs.length; ++col) {
        for (int row=0; row<cs[0]._len; ++row) {
          _sum += cs[col].atd(row);
        }
      }
    }
    @Override
    public void reduce(SumTask other) {
      _sum += other._sum;
    }
  }

  void mrtask(boolean parallel)
  {
    long start = 0;
    double sum = 0;
    Frame fr = new Frame();
    for (int i=0; i<cols; ++i) {
      if (parallel)
        fr.add("C" + i, Vec.makeCon(0, rows)); //multi-chunk (based on #cores)
      else
        fr.add("C"+i, Vec.makeVec(raw[i], Vec.newKey())); //directly fill from raw double array (1 chunk)
    }
    if (parallel) new FillTask().doAll(fr);

    for (int r = 0; r < rep; ++r) {
      if (r==rep/10)
        start = System.currentTimeMillis();
      sum += new SumTask().doAll(fr)._sum;
    }
    long done = System.currentTimeMillis();
    Log.info("Sum: " + sum);
    long siz = 0;
    siz += fr.byteSize();
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for " + (parallel ? "PARALLEL":"SERIAL") + " MRTask: " + PrettyPrint.msecs(done - start, true));
    Log.info("");
    fr.delete();
  }

  void rollups(boolean parallel)
  {
    Frame fr = new Frame();
//    Vec v = Vec.makeCon(Double.NaN, rows);
//    Log.info(v.mean());
//    Log.info(v.sigma());
//    Log.info(v.min());
//    Log.info(v.max());
//    Log.info(v.length());
//    Log.info(v.nzCnt());
//    Log.info(v.naCnt());
//    v.remove();
    for (int i=0; i<cols; ++i)
      fr.add("C" + i, Vec.makeCon(0, rows, parallel)); //multi-chunk (based on #cores)
    new FillTask().doAll(fr);

    long start = System.currentTimeMillis();
    for (int r = 0; r < rep; ++r) {
      for (int i=0; i<cols; ++i) {
        DKV.remove(fr.vec(i).rollupStatsKey());
        fr.vec(i).mean();
      }
    }
    long done = System.currentTimeMillis();
    long siz = 0;
    siz += fr.byteSize();
    Log.info("Data size: " + PrettyPrint.bytes(siz));
    Log.info("Time for " + (parallel ? "PARALLEL":"SERIAL") + " Rollups: " + PrettyPrint.msecs(done - start, true));
    Log.info("");
    fr.remove();
  }

  public static void main(String[] args) {
    setup();
    new ChunkSpeedTest().run();
  }
}