AstGroupSorted.java example

Explorer
h2o-3-master
/*
package water.rapids.ast.prims.mungers;

import water.fvec.*;
import water.*;
import water.rapids.RadixCount;
import water.rapids.assignG;
import water.util.ArrayUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;


public class AstGroupSorted {
   // 2^31 bytes > java max (2^31-1), so 2^30 / 8 bytes per long.   TO DO - how to make global?
  //private static final int MAXVECLONG = 134217728;
  //private static final int MAXVECBYTE = 1073741824;

  long[][] sort(Frame groupCols) {

    //return (new RadixOrder(groupCols, ArrayUtils.seq(0,groupCols.numCols()-1))._groupIndex);   // TO DO: won't work yet as needs 2nd group step
    return (new long[][] {{1,2,3}});
    // a vector


    System.out.println("Calling RadixCount ...");
    long t0 = System.nanoTime();
    long t00 = t0;
    int nChunks = groupCols.anyVec().nChunks();

    if( groupCols.numCols() != 1 )  throw H2O.unimpl(); // Only looking at column 0 for now
    long counts[][][] = new RadixCount(nChunks).doAll(groupCols.vec(0))._counts;
    System.out.println("Time of RadixCount: " + (System.nanoTime() - t0) / 1e9); t0 = System.nanoTime();
    // for (int c=0; c<5; c++) { System.out.print("First 10 for chunk "+c+" byte 0: "); for (int i=0; i<10; i++) System.out.print(counts[0][c][i] + " "); System.out.print("\n"); }

    long totalHist[] = new long[256];
    for (int c=0; c<nChunks; c++) {
      for (int h=0; h<256; h++) {
        totalHist[h] += counts[5][c][h];   // TO DO: hard coded 5 here
      }
    }

    for (int b=0; b<8; b++) {
      for (int h=0; h<256; h++) {
        long rollSum = 0;
        for (int c = 0; c < nChunks; c++) {
          long tmp = counts[b][c][h];
          counts[b][c][h] = rollSum;
          rollSum += tmp;
        }
      }
    }
    // Any radix skipping needs to be detected with a loop over node results to ensure no use of those bits on any node.
    System.out.println("Time to cumulate counts: " + (System.nanoTime() - t0) / 1e9); t0 = System.nanoTime();

    // TO DO:  by this stage we know now the width of byte field we need.  So allocate it tight up to MAXVEC
    // TO DO: reduce to 5 if we're only passed the first column
    int keySize = 7;
    long o[][][] = new long[256][][];
    byte x[][][] = new byte[256][][];  // for each bucket,  there might be > 2^31 bytes, so an extra dimension for that

    for (int c=0; c<256; c++) {
      if (totalHist[c] == 0) continue;
      int d;
      int nbatch = (int)(totalHist[c] * Math.max(keySize,8) / MAXVECBYTE);   // TO DO. can't be 2^31 because 2^31-1 was limit. If we use 2^30, instead of /, can we do >> for speed?
      int rem = (int)(totalHist[c] * Math.max(keySize,8) % MAXVECBYTE);
      assert nbatch==0;  // in the case of 20m rows, we should always be well within a batch size
      // The Math.max ensures that batches are aligned, even for wide keys.  For efficiency inside insert() above so it doesn't have to cross boundaries.
      o[c] = new long[nbatch + (rem>0?1:0)][];
      x[c] = new byte[nbatch + (rem>0?1:0)][];
      assert nbatch==0;
      for (d=0; d<nbatch; d++) {
        o[c][d] = new long[MAXVECLONG];
        // TO DO?: use MemoryManager.malloc8()
        x[c][d] = new byte[MAXVECBYTE];
      }
      if (rem>0) {
        o[c][d] = new long[rem];
        x[c][d] = new byte[rem * keySize];
      }
    }
    System.out.println("Time to allocate o[][] and x[][]: " + (System.nanoTime() - t0) / 1e9); t0 = System.nanoTime();
    // NOT TO DO: we do need the full allocation of x[] and o[].  We need o[]
    // anyway.  x[] will be as dense as possible.
    // o is the full ordering vector of the right size
    // x is the byte key aligned with o
    // o AND x are what bmerge() needs. Pushing x to each node as well as o avoids inter-node comms.

    // feasibly, that we could move by byte 5 and then skip the next byte.  Too
    // complex case though and rare so simplify
    new MoveByFirstByte(5, o, x, counts, keySize).doAll(groupCols);  
    System.out.println("Time to MoveByFirstByte: " + (System.nanoTime() - t0) / 1e9); t0 = System.nanoTime();

    // Add check that this first split is reasonable.  e.g. if it were just 2,
    // it definitely would not be enough.  90 is enough though.  Need to fill
    // L2 with pages.  
    // for counted completer 0:255
    long groups[][] = new long[256][];  //  at most MAXVEC groups per radix, currently
    long nGroup[] = new long[257];   // one extra to make undo of cumulate easier
    Futures fs = new Futures();
    for (int i=0; i<256; i++) {
      if (totalHist[i] > 0)
        fs.add(H2O.submitTask(new dradix(groups, nGroup, i, x[i], o[i], totalHist[i], keySize)));
    }
    fs.blockForPending();
    long nGroups = 0;
    for (int i = 0; i < 257; i++) {
      long tmp = nGroup[i];
      nGroup[i] = nGroups;
      nGroups += tmp;
    }
    System.out.println("Time to recursive radix: " + (System.nanoTime() - t0) / 1e9 ); t0 = System.nanoTime();
    System.out.println("Total groups found: " + nGroups);

    // We now have o and x that bmerge() needs

    long nrow = groupCols.numRows();

    long g[][] = new long[(int)(1 + nrow / MAXVECLONG)][];
    int c;
    for (c=0; c<nrow/MAXVECLONG; c++) {
      g[c] = new long[MAXVECLONG];
    }
    g[c] = new long[(int)(nrow % MAXVECLONG)];
    fs = new Futures();
    for (int i=0; i<256; i++) {
      if (totalHist[i] > 0)
        fs.add(H2O.submitTask(new assignG(g, groups[i], nGroup[i+1]-nGroup[i], nGroup[i], o[i])));
      // reuse the x vector we allocated before to store the group numbers.  i.e. a perfect and ordered hash, stored alongside table
    }
    fs.blockForPending();
    System.out.println("Time to assign group index (length nrows): " + (System.nanoTime() - t0) / 1e9 ); t0 = System.nanoTime();
    return g;

  }
}
*/