DMatrix.java example

Explorer
h2o-3-master
package hex;

import water.*;
import water.H2O.FJWThr;
import water.H2O.H2OCallback;
import water.H2O.H2OCountedCompleter;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.NewChunk.Value;
import water.fvec.Vec;
import water.util.ArrayUtils;
import water.util.Log;
import water.util.StringUtils;

import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Created by tomasnykodym on 11/13/14.
 *
 * Distributed matrix operations such as (sparse) multiplication and transpose.
 */
public class DMatrix  {

  /**
   * Transpose the Frame as if it was a matrix (i.e. rows become coumns).
   * Must be all numeric, currently will fail if there are too many rows ( >= ~.5M).
   * Result will be put into a new Vectro Group and will be balanced so that each vec will have
   * (4*num cpus in the cluster) chunks.
   *
   * @param src
   * @return
   */
  public static Frame transpose(Frame src){
    if(src.numRows() != (int)src.numRows())
      throw H2O.unimpl();
    int nchunks = Math.max(1,src.numCols()/10000);
    long [] espc = new long[nchunks+1];
    int rpc = (src.numCols() / nchunks);
    int rem = (src.numCols() % nchunks);
    Arrays.fill(espc, rpc);
    for (int i = 0; i < rem; ++i) ++espc[i];
    long sum = 0;
    for (int i = 0; i < espc.length; ++i) {
      long s = espc[i];
      espc[i] = sum;
      sum += s;
    }
    Key key = Vec.newKey();
    int rowLayout = Vec.ESPC.rowLayout(key,espc);
    return transpose(src, new Frame(new Vec(key,rowLayout).makeZeros((int)src.numRows())));
  }

  /**
   * Transpose the Frame as if it was a matrix (rows <-> columns).
   * Must be all numeric, will fail if there are too many rows ( >= ~.5M).
   *
   * Result is made to be compatible (i.e. the same vector group and chunking) with the target frame.
   *
   * @param src
   * @return
   */
  public static Frame transpose(Frame src, Frame tgt){
    if(src.numRows() != tgt.numCols() || src.numCols() != tgt.numRows())
      throw new IllegalArgumentException("dimension do not match!");
    for(Vec v:src.vecs()) {
      if (v.isCategorical())
        throw new IllegalArgumentException("transpose can only be applied to all-numeric frames (representing a matrix)");
      if(v.length() > 1000000)
        throw new IllegalArgumentException("too many rows, transpose only works for frames with < 1M rows.");
    }
    new TransposeTsk(tgt).doAll(src);
    return tgt;
  }

  /**
   * (MR)Task performing the matrix transpose.
   * It is to be applied to the source frame.
   * Target frame must be created up front (e.g. via Vec.makeZeros() call)
   * and passed in as an argument.
   *
   * Task will utilize sparsity and will preserve compression if possible
   * (compression may differ because of switching from column compressed to row-compressed form)
   */
  public static class TransposeTsk extends MRTask<TransposeTsk> {
    final Frame _tgt; // Target dataset, should be created up front, e.g. via Vec.makeZeros(n) call.
    public TransposeTsk(Frame tgt){ _tgt = tgt;}
    public void map(final Chunk[] chks) {
      final Frame tgt = _tgt;
      final long [] espc = tgt.anyVec().espc();
      final int colStart = (int)chks[0].start();
      for (int i = 0; i < espc.length - 1; ++i) {
        final int fi = i;
        final NewChunk[] tgtChunks = new NewChunk[chks[0]._len];
        for (int j = 0; j < tgtChunks.length; ++j)
          tgtChunks[j] = new NewChunk(tgt.vec(j + colStart), fi);
        for (int c = ((int) espc[fi]); c < (int) espc[fi + 1]; ++c) {
          Chunk nc = chks[c];
          if(nc.isSparseZero()) {
            for (int k = nc.nextNZ(-1); k < nc._len; k = nc.nextNZ(k)) {
              tgtChunks[k].addZeros((int) (c - espc[fi]) - tgtChunks[k]._len);
              nc.extractRows(tgtChunks[k], k);
            }
          } else
            for(int k = 0; k < nc._len; k++) {
              tgtChunks[k].addZeros((int) (c - espc[fi]) - tgtChunks[k]._len);
              nc.extractRows(tgtChunks[k], k);
            }
        }
        for (int j = 0; j < tgtChunks.length; ++j) { // finalize the target chunks and close them
          final int fj = j;
          tgtChunks[fj].addZeros((int) (espc[fi + 1] - espc[fi]) - tgtChunks[fj]._len);
          tgtChunks[fj].close(_fs);
          tgtChunks[fj] = null;
        }
      }
    }
  }


  /**
   * Info about matrix multiplication currently in progress.
   *
   * Contains runtime and (already computed)chunks stats
   *
   */
  public static class MatrixMulStats extends Iced {
    public final Key jobKey;
    public final long chunksTotal;
    public final long _startTime;
    public long lastUpdateAt;
    public long chunksDone;
    public long size;
    public int [] chunkTypes = new int[0];
    public long [] chunkCnts = new long[0];

    public MatrixMulStats(long n, Key jobKey){chunksTotal = n; _startTime = System.currentTimeMillis(); this.jobKey = jobKey;}

    public float progress(){ return (float)((double)chunksDone/chunksTotal);}
  }

  public static Frame mmul(Frame x, Frame y) {
    MatrixMulTsk t = new MatrixMulTsk(null,null,x,y);
    if(Thread.currentThread() instanceof FJWThr)
      t.fork().join();
    else
      H2O.submitTask(t).join();
    return t._z;
  }

  public static class MatrixMulTsk extends H2OCountedCompleter {
    final transient Frame _x;
    Frame _y;
    Frame _z;
    final Key _progressKey;
    AtomicInteger _cntr;
    public MatrixMulTsk(H2OCountedCompleter cmp, Key progressKey, Frame x, Frame y) {
      super(cmp);
      if(x.numCols() != y.numRows())
        throw new IllegalArgumentException("dimensions do not match! x.numcols = " + x.numCols() + ", y.numRows = " + y.numRows());
      _x = x;
      _y = y;
      _progressKey = progressKey;
    }

    @Override
    public void compute2() {
      _z = new Frame(_x.anyVec().makeZeros(_y.numCols()));
      int total_cores = H2O.CLOUD.size()*H2O.NUMCPUS;
      int chunksPerCol = _y.anyVec().nChunks();
      int maxP = 256*total_cores/chunksPerCol;
      Log.info("maxP = " + maxP);
      _cntr = new AtomicInteger(maxP-1);
      addToPendingCount(2*_y.numCols()-1);
      for(int i = 0; i < Math.min(_y.numCols(),maxP); ++i)
        forkVecTask(i);
    }

    private void forkVecTask(final int i) {
      new GetNonZerosTsk(new H2OCallback<GetNonZerosTsk>(this) {
        @Override
        public void callback(GetNonZerosTsk gnz) {
          new VecTsk(new Callback(), _progressKey, gnz._vals).dfork(ArrayUtils.append(_x.vecs(gnz._idxs), _z.vec(i)));
        }
      }).dfork(_y.vec(i));
    }
    private class Callback extends H2OCallback{
      public Callback(){super(MatrixMulTsk.this);}
      @Override
      public void callback(H2OCountedCompleter h2OCountedCompleter) {
        int i = _cntr.incrementAndGet();
        if(i < _y.numCols())
          forkVecTask(i);
      }
    }
  }
  static int cnt = 0;
  // to be invoked from R expression



  private static class GetNonZerosTsk extends MRTask<GetNonZerosTsk>{
    final int _maxsz;
    int     [] _idxs;
    double  [] _vals;
    public GetNonZerosTsk(H2OCountedCompleter cmp){super(cmp);_maxsz = 10000000;}
    public GetNonZerosTsk(H2OCountedCompleter cmp, int maxsz){super(cmp); _maxsz = maxsz;}

    @Override public void map(Chunk c){
      int istart = (int)c.start();
      assert (c.start() + c._len) == (istart + c._len);
      final int n = c.sparseLenZero();
      _idxs = MemoryManager.malloc4(n);
      _vals = MemoryManager.malloc8d(n);
      int j = 0;
      for(int i = c.nextNZ(-1); i < c._len; i = c.nextNZ(i),++j) {
        _idxs[j] = i + istart;
        _vals[j] = c.atd(i);
      }
      assert j == n;
      if(_idxs.length > _maxsz)
        throw new RuntimeException("too many nonzeros! found at least " + _idxs.length + " nonzeros.");
    }

    @Override public void reduce(GetNonZerosTsk gnz){
      if(_idxs.length + gnz._idxs.length > _maxsz)
        throw new RuntimeException("too many nonzeros! found at least " + (_idxs.length + gnz._idxs.length) + " nonzeros.");
      int [] idxs = MemoryManager.malloc4(_idxs.length + gnz._idxs.length);
      double [] vals = MemoryManager.malloc8d(_vals.length + gnz._vals.length);
      ArrayUtils.sortedMerge(_idxs,_vals,gnz._idxs,gnz._vals,idxs,vals);
      _idxs = idxs;
      _vals = vals;
    }
  }
  // compute single vec of the output in matrix multiply
  private static class VecTsk extends MRTask<VecTsk> {
    double [] _y;
    Key _progressKey;
    public VecTsk(H2OCountedCompleter cmp, Key progressKey, double [] y){
      super(cmp);
      _progressKey = progressKey;
      _y = y;
    }

    @Override public void setupLocal(){_fr.lastVec().preWriting();}
    @Override public void map(Chunk [] chks) {
      Chunk zChunk = chks[chks.length-1];
      double [] res = MemoryManager.malloc8d(chks[0]._len);
      for(int i = 0; i < _y.length; ++i) {
        final double yVal = _y[i];
        final Chunk xChunk = chks[i];
        for (int k = xChunk.nextNZ(-1); k < res.length; k = xChunk.nextNZ(k))
          try { res[k] += yVal * xChunk.atd(k);} catch(Throwable t) {
            t.printStackTrace();
            throw new RuntimeException(t);
          }
      }
      Chunk modChunk = new NewChunk(res).setSparseRatio(2).compress();
      if(_progressKey != null)
        new UpdateProgress(modChunk.getBytes().length,modChunk.frozenType()).fork(_progressKey);
      DKV.put(zChunk.vec().chunkKey(zChunk.cidx()),modChunk,_fs);
    }
    @Override public void closeLocal(){
      _y = null; // drop inputs
      _progressKey = null;
    }
  }

  private static class UpdateProgress extends TAtomic<MatrixMulStats> {
    final int _chunkSz;
    final int _chunkType;

    public UpdateProgress(int sz, int type) {
      _chunkSz = sz;
      _chunkType = type;
    }

    @Override
    public MatrixMulStats atomic(MatrixMulStats old) {
      old.chunkCnts = old.chunkCnts.clone();
      int j = -1;
      for(int i = 0; i < old.chunkTypes.length; ++i) {
        if(_chunkType == old.chunkTypes[i]) {
          j = i;
          break;
        }
      }
      if(j == -1) {
        old.chunkTypes = Arrays.copyOf(old.chunkTypes,old.chunkTypes.length+1);
        old.chunkCnts = Arrays.copyOf(old.chunkCnts,old.chunkCnts.length+1);
        old.chunkTypes[old.chunkTypes.length-1] = _chunkType;
        j = old.chunkTypes.length-1;
      }
      old.chunksDone++;
      old.chunkCnts[j]++;
      old.lastUpdateAt = System.currentTimeMillis();
      old.size += _chunkSz;
      return old;
    }
  }
}