GBM.java example

Explorer
h2o-2-master
package hex.gbm;

import hex.ConfusionMatrix;
import hex.VarImp;
import hex.VarImp.VarImpRI;
import hex.gbm.DTree.DecidedNode;
import hex.gbm.DTree.LeafNode;
import hex.gbm.DTree.Split;
import hex.gbm.DTree.TreeModel.TreeStats;
import hex.gbm.DTree.UndecidedNode;
import water.*;
import water.api.*;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.util.*;
import water.util.Log.Tag.Sys;

import java.util.Arrays;

import static water.util.ModelUtils.getPrediction;
import static water.util.Utils.div;

// Gradient Boosted Trees
//
// Based on "Elements of Statistical Learning, Second Edition, page 387"
public class GBM extends SharedTreeModelBuilder<GBM.GBMModel> {
  static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
  static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.

  @API(help = "Distribution for computing loss function. AUTO selects gaussian for continuous and multinomial for categorical response", filter = Default.class, json=true, importance=ParamImportance.CRITICAL)
  public Family family = Family.AUTO;

  @API(help = "Learning rate, from 0. to 1.0", filter = Default.class, dmin=0, dmax=1, json=true, importance=ParamImportance.SECONDARY)
  public double learn_rate = 0.1;

  @API(help = "Grid search parallelism", filter = Default.class, lmax = 4, gridable=false, importance=ParamImportance.SECONDARY)
  public int grid_parallelism = 1;

  @API(help = "Seed for the random number generator - only for balancing classes (autogenerated)", filter = Default.class)
  long seed = -1; // To follow R-semantics, each call of GBM with imbalance should provide different seed. -1 means seed autogeneration

  @API(help = "Perform Group Splitting Categoricals", filter=Default.class)
  public boolean group_split = true;

  /** Distribution functions */
  // Note: AUTO will select gaussian for continuous, and multinomial for categorical response
  // TODO: Replace with drop-down that displays different distributions depending on cont/cat response
  public enum Family {
    AUTO, bernoulli
  }

  /** Sum of variable empirical improvement in squared-error. The value is not scaled! */
  private transient float[/*nfeatures*/] _improvPerVar;

  public static class GBMModel extends DTree.TreeModel {
    static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
    static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.

    @API(help = "Model parameters", json = true)
    final private GBM parameters;
    @Override public final GBM get_params() { return parameters; }
    @Override public final Request2 job() { return get_params(); }

    @API(help = "Learning rate, from 0. to 1.0") final double learn_rate;
    @API(help = "Distribution for computing loss function. AUTO selects gaussian for continuous and multinomial for categorical response")
    final Family family;

    @API(help = "Initially predicted value (for zero trees)")
    double initialPrediction;

    public GBMModel(GBM job, Key key, Key dataKey, Key testKey, String names[], String domains[][], String[] cmDomain, int ntrees, int max_depth, int min_rows, int nbins, double learn_rate, Family family, int num_folds, float[] priorClassDist, float[] classDist) {
      super(key,dataKey,testKey,names,domains,cmDomain,ntrees,max_depth,min_rows,nbins,num_folds,priorClassDist,classDist);
      this.parameters = Job.hygiene((GBM) job.clone());
      this.learn_rate = learn_rate;
      this.family = family;
    }
    private GBMModel(GBMModel prior, DTree[] trees, double err, ConfusionMatrix cm, TreeStats tstats) {
      super(prior, trees, err, cm, tstats);
      this.parameters = prior.parameters;
      this.learn_rate = prior.learn_rate;
      this.family = prior.family;
      this.initialPrediction = prior.initialPrediction;
    }
    private GBMModel(GBMModel prior, DTree[] trees, TreeStats tstats) {
      super(prior, trees, tstats);
      this.parameters = prior.parameters;
      this.learn_rate = prior.learn_rate;
      this.family = prior.family;
      this.initialPrediction = prior.initialPrediction;
    }
    private GBMModel(GBMModel prior, double err, ConfusionMatrix cm, VarImp varimp, AUCData validAUC) {
      super(prior, err, cm, varimp, validAUC);
      this.parameters = prior.parameters;
      this.learn_rate = prior.learn_rate;
      this.family = prior.family;
      this.initialPrediction = prior.initialPrediction;
    }
    private GBMModel(GBMModel prior, Key[][] treeKeys, double[] errs, ConfusionMatrix[] cms, TreeStats tstats, VarImp varimp, AUCData validAUC) {
      super(prior, treeKeys, errs, cms, tstats, varimp, validAUC);
      this.parameters = prior.parameters;
      this.learn_rate = prior.learn_rate;
      this.family = prior.family;
    }

    @Override protected TreeModelType getTreeModelType() { return TreeModelType.GBM; }

    @Override protected float[] score0(double[] data, float[] preds) {
      float[] p = super.score0(data, preds);    // These are f_k(x) in Algorithm 10.4
      if(family == Family.bernoulli) {
        double fx = p[1] + initialPrediction;
        p[2] = 1.0f/(float)(1f+Math.exp(-fx));
        p[1] = 1f-p[2];
        p[0] = getPrediction(p, data);
        return p;
      }
      if (nclasses()>1) { // classification
        // Because we call Math.exp, we have to be numerically stable or else
        // we get Infinities, and then shortly NaN's.  Rescale the data so the
        // largest value is +/-1 and the other values are smaller.
        // See notes here:  http://www.hongliangjie.com/2011/01/07/logsum/
        float maxval=Float.NEGATIVE_INFINITY;
        float dsum=0;
        if (nclasses()==2)  p[2] = - p[1];
        // Find a max
        for( int k=1; k<p.length; k++) maxval = Math.max(maxval,p[k]);
        assert !Float.isInfinite(maxval) : "Something is wrong with GBM trees since returned prediction is " + Arrays.toString(p);
        for(int k=1; k<p.length;k++)
          dsum+=(p[k]=(float)Math.exp(p[k]-maxval));
        div(p,dsum);
        p[0] = getPrediction(p, data);
      } else { // regression
        // Prediction starts from the mean response, and adds predicted residuals
        preds[0] += initialPrediction;
      }
      return p;
    }

    @Override protected void generateModelDescription(StringBuilder sb) {
      DocGen.HTML.paragraph(sb,"Learn rate: "+learn_rate);
    }

    @Override protected void toJavaUnifyPreds(SB bodyCtxSB) {
      if(family == Family.bernoulli) {
        bodyCtxSB.i().p("// Compute Probabilities for Bernoulli 0-1 classifier").nl();
        bodyCtxSB.i().p("double fx = preds[1] + "+initialPrediction+";").nl();
        bodyCtxSB.i().p("preds[2] = 1.0f/(float)(1.0f+Math.exp(-fx));").nl();
        bodyCtxSB.i().p("preds[1] = 1.0f-preds[2];").nl();
      }
      else if (isClassifier()) {
        bodyCtxSB.i().p("// Compute Probabilities for classifier (scale via http://www.hongliangjie.com/2011/01/07/logsum/)").nl();
        bodyCtxSB.i().p("float dsum = 0, maxval = Float.NEGATIVE_INFINITY;").nl();
        if (nclasses()==2) {
          bodyCtxSB.i().p("preds[2] = -preds[1];").nl();
        }
        bodyCtxSB.i().p("for(int i=1; i<preds.length; i++) maxval = Math.max(maxval, preds[i]);").nl();
        bodyCtxSB.i().p("for(int i=1; i<preds.length; i++) dsum += (preds[i]=(float) Math.exp(preds[i] - maxval));").nl();
        bodyCtxSB.i().p("for(int i=1; i<preds.length; i++) preds[i] = preds[i] / dsum;").nl();
      }
      else {
        bodyCtxSB.i().p("// Compute Regression").nl();
        bodyCtxSB.i().p("preds[1] += "+initialPrediction+";").nl();
      }
    }
    @Override protected void setCrossValidationError(ValidatedJob job, double cv_error, water.api.ConfusionMatrix cm, AUCData auc, HitRatio hr) {
      GBMModel gbmm = ((GBM)job).makeModel(this, cv_error, cm.cm == null ? null : new ConfusionMatrix(cm.cm, cms[0].nclasses()), this.varimp, auc);
      gbmm._have_cv_results = true;
      DKV.put(this._key, gbmm); //overwrite this model
    }
  }
  public Frame score( Frame fr ) { return ((GBMModel)UKV.get(dest())).score(fr);  }

  @Override protected Log.Tag.Sys logTag() { return Sys.GBM__; }
  @Override protected GBMModel makeModel(Key outputKey, Key dataKey, Key testKey, int ntrees, String[] names, String[][] domains, String[] cmDomain, float[] priorClassDist, float[] classDist) {
    return new GBMModel(this, outputKey, dataKey, validation==null?null:testKey, names, domains, cmDomain, ntrees, max_depth, min_rows, nbins, learn_rate, family, n_folds,priorClassDist,classDist);
  }
  @Override protected GBMModel makeModel( GBMModel model, double err, ConfusionMatrix cm, VarImp varimp, AUCData validAUC) {
    return new GBMModel(model, err, cm, varimp, validAUC);
  }
  @Override protected GBMModel makeModel(GBMModel model, DTree[] ktrees, TreeStats tstats) {
    return new GBMModel(model, ktrees, tstats);
  }
  public GBM() { description = "Distributed GBM"; importance = true; }

  @Override protected GBMModel updateModel(GBMModel additionModel, GBMModel checkpoint, boolean overwriteCheckpoint) {
    // Do not forget to clone trees in case that we are not going to overwrite checkpoint
    Key[][] treeKeys = null;
    if (!overwriteCheckpoint) throw H2O.unimpl("Cloning of tree models is not implemented yet!");
    else treeKeys = checkpoint.treeKeys;
    return new GBMModel(additionModel, treeKeys, checkpoint.errs, checkpoint.cms, checkpoint.treeStats, checkpoint.varimp, checkpoint.validAUC);
  }

  /** Return the query link to this page */
  public static String link(Key k, String content) {
    RString rs = new RString("<a href='/2/GBM.query?source=%$key'>%content</a>");
    rs.replace("key", k.toString());
    rs.replace("content", content);
    return rs.toString();
  }

  @Override protected void execImpl() {
    try {
      logStart();
      buildModel(seed);
      if (n_folds > 0) CrossValUtils.crossValidate(this);
    } finally {
      remove();                   // Remove Job
      state = UKV.<Job>get(self()).state;
      new TAtomic<GBMModel>() {
        @Override
        public GBMModel atomic(GBMModel m) {
          if (m != null) m.get_params().state = state;
          return m;
        }
      }.invoke(dest());
    }
  }

  @Override public int gridParallelism() {
    return grid_parallelism;
  }

  @Override protected Response redirect() {
    return GBMProgressPage.redirect(this, self(), dest());
  }

  @Override protected void initAlgo( GBMModel initialModel) {
    // Initialize gbm-specific data structures
    if (importance) _improvPerVar = new float[initialModel.nfeatures()];
    // assert (family != Family.bernoulli) || (_nclass == 2) : "Bernoulli requires the response to be a 2-class categorical";
    if(family == Family.bernoulli && _nclass != 2)
      throw new IllegalArgumentException("Bernoulli requires the response to be a 2-class categorical");
  }
  @Override protected void initWorkFrame(GBMModel initialModel, Frame fr) {
    // Tag out rows missing the response column
    new ExcludeNAResponse().doAll(fr);
    // Initial value is mean(y)
    final double mean = (float) fr.vec(initialModel.responseName()).mean();
    // Initialize working response based on given loss function
    if (_nclass == 1) { /* regression */
      initialModel.initialPrediction = mean; // Regression initially predicts the response mean
      new MRTask2() {
        @Override public void map(Chunk[] chks) {
          Chunk tr = chk_tree(chks, 0); // there is only one tree for regression
          for (int i=0; i<tr._len; i++) tr.set0(i, mean);
        }
      }.doAll(fr);
    } else if(family == Family.bernoulli) {
      // Initial value is log( mean(y)/(1-mean(y)) )
      final float init = (float) Math.log(mean/(1.0f-mean));
      initialModel.initialPrediction = init;
      new MRTask2() {
        @Override public void map(Chunk[] chks) {
          Chunk tr = chk_tree(chks, 0); // only the tree for y = 0 is used
          for (int i=0; i<tr._len; i++) tr.set0(i, init);
        }
      }.doAll(fr);
    } else { /* multinomial */
      /* Preserve 0s in working columns */
    }
    // Update tree fields based on checkpoint
    if (checkpoint!=null) {
      Timer t = new Timer();
      new ResidualsCollector(_ncols, _nclass, initialModel.treeKeys).doAll(fr);
      Log.info(logTag(), "Reconstructing tree residuals stats from checkpointed model took " + t);
    }
  }
  // ==========================================================================
  // Compute a GBM tree.

  // Start by splitting all the data according to some criteria (minimize
  // variance at the leaves).  Record on each row which split it goes to, and
  // assign a split number to it (for next pass).  On *this* pass, use the
  // split-number to build a per-split histogram, with a per-histogram-bucket
  // variance.
  @Override protected GBMModel buildModel( GBMModel model, final Frame fr, String names[], String domains[][], Timer t_build ) {
    // Build trees until we hit the limit
    int tid;
    DTree[] ktrees = null;              // Trees
    TreeStats tstats = model.treeStats!=null ? model.treeStats : new TreeStats();
    for( tid=0; tid<ntrees; tid++) {
      // During first iteration model contains 0 trees, then 0-trees, then 1-tree,...
      // BUT if validation is not specified model does not participate in voting
      // but on-the-fly computed data are used
      if (tid!=0 || checkpoint==null) { // do not make initial scoring if model already exist
        model = doScoring(model, fr, ktrees, tid, tstats, false, false, false);
      }
      // ESL2, page 387
      // Step 2a: Compute prediction (prob distribution) from prior tree results:
      //   Work <== f(Tree)
      new ComputeProb().doAll(fr);

      // ESL2, page 387
      // Step 2b i: Compute residuals from the prediction (probability distribution)
      //   Work <== f(Work)
      new ComputeRes().doAll(fr);

      // ESL2, page 387, Step 2b ii, iii, iv
      Timer kb_timer = new Timer();
      ktrees = buildNextKTrees(fr);
      Log.info(Sys.GBM__, (tid+1) + ". tree was built in " + kb_timer.toString());
      if( !Job.isRunning(self()) ) break; // If canceled during building, do not bulkscore

      // Check latest predictions
      tstats.updateBy(ktrees);
    }
    // Final scoring (skip if job was cancelled)
    if (Job.isRunning(self())) {
      model = doScoring(model, fr, ktrees, tid, tstats, true, false, false);
    }

    return model;
  }

  // --------------------------------------------------------------------------
  // Tag out rows missing the response column
  class ExcludeNAResponse extends MRTask2<ExcludeNAResponse> {
    @Override public void map( Chunk chks[] ) {
      Chunk ys = chk_resp(chks);
      for( int row=0; row<ys._len; row++ )
        if( ys.isNA0(row) )
          for( int t=0; t<_nclass; t++ )
            chk_nids(chks,t).set0(row,-1);
    }
  }

  // --------------------------------------------------------------------------
  // Compute Prediction from prior tree results.
  // Classification (multinomial): Probability Distribution of loglikelyhoods
  //   Prob_k = exp(Work_k)/sum_all_K exp(Work_k)
  // Classification (bernoulli): Probability of y = 1 given logit link function
  //   Prob_0 = 1/(1 + exp(Work)), Prob_1 = 1/(1 + exp(-Work))
  // Regression: Just prior tree results
  // Work <== f(Tree)
  class ComputeProb extends MRTask2<ComputeProb> {
    @Override public void map( Chunk chks[] ) {
      Chunk ys = chk_resp(chks);
      if( family == Family.bernoulli ) {
        Chunk tr = chk_tree(chks,0);
        Chunk wk = chk_work(chks,0);
        for( int row = 0; row < ys._len; row++)
          // wk.set0(row, 1.0f/(1f+Math.exp(-tr.at0(row))) ); // Prob_1
          wk.set0(row, 1.0f/(1f+Math.exp(tr.at0(row))) );     // Prob_0
      } else if( _nclass > 1 ) {       // Classification
        float fs[] = new float[_nclass+1];
        for( int row=0; row<ys._len; row++ ) {
          float sum = score1(chks,fs,row);
          if( Float.isInfinite(sum) ) // Overflow (happens for constant responses)
            for( int k=0; k<_nclass; k++ )
              chk_work(chks,k).set0(row,Float.isInfinite(fs[k+1])?1.0f:0.0f);
          else
            for( int k=0; k<_nclass; k++ ) // Save as a probability distribution
              chk_work(chks,k).set0(row,fs[k+1]/sum);
        }
      } else {                  // Regression
        Chunk tr = chk_tree(chks,0); // Prior tree sums
        Chunk wk = chk_work(chks,0); // Predictions
        for( int row=0; row<ys._len; row++ )
          wk.set0(row,(float)tr.at0(row));
      }
    }
  }

  // Read the 'tree' columns, do model-specific math and put the results in the
  // fs[] array, and return the sum.  Dividing any fs[] element by the sum
  // turns the results into a probability distribution.
  @Override protected float score1( Chunk chks[], float fs[/*nclass*/], int row ) {
    if(family == Family.bernoulli) {
      fs[1] = 1.0f/(float)(1f+Math.exp(chk_tree(chks,0).at0(row)));
      fs[2] = 1f-fs[1];
      return fs[1]+fs[2];
    }
    if( _nclass == 1 )          // Classification?
      return fs[0]=(float)chk_tree(chks,0).at0(row); // Regression.
    if( _nclass == 2 ) {        // The Boolean Optimization
      // This optimization assumes the 2nd tree of a 2-class system is the
      // inverse of the first.  Fill in the missing tree
      fs[1] = (float)Math.exp(chk_tree(chks,0).at0(row));
      fs[2] = 1.0f/fs[1]; // exp(-d) === 1/d
      return fs[1]+fs[2];
    }
    float sum=0;
    for( int k=0; k<_nclass; k++ ) // Sum across of likelyhoods
      sum+=(fs[k+1]=(float)Math.exp(chk_tree(chks,k).at0(row)));
    return sum;
  }

  // --------------------------------------------------------------------------
  // Compute Residuals from Actuals
  // Work <== f(Work)
  class ComputeRes extends MRTask2<ComputeRes> {
    @Override public void map( Chunk chks[] ) {
      Chunk ys = chk_resp(chks);
      if(family == Family.bernoulli) {
        for(int row = 0; row < ys._len; row++) {
          if( ys.isNA0(row) ) continue;
          int y = (int)ys.at80(row); // zero-based response variable
          Chunk wk = chk_work(chks,0);
          // wk.set0(row, y-(float)wk.at0(row));  // wk.at0(row) is Prob_1
          wk.set0(row, y-1f+(float)wk.at0(row));  // wk.at0(row) is Prob_0
        }
      } else if( _nclass > 1 ) {       // Classification

        for( int row=0; row<ys._len; row++ ) {
          if( ys.isNA0(row) ) continue;
          int y = (int)ys.at80(row); // zero-based response variable
          // Actual is '1' for class 'y' and '0' for all other classes
          for( int k=0; k<_nclass; k++ ) {
            if( _distribution[k] != 0 ) {
              Chunk wk = chk_work(chks,k);
              wk.set0(row, (y==k?1f:0f)-(float)wk.at0(row) );
            }
          }
        }

      } else {                  // Regression
        Chunk wk = chk_work(chks,0); // Prediction==>Residuals
        for( int row=0; row<ys._len; row++ )
          wk.set0(row, (float)(ys.at0(row)-wk.at0(row)) );
      }
    }
  }

  // --------------------------------------------------------------------------
  // Build the next k-trees, which is trying to correct the residual error from
  // the prior trees.  From LSE2, page 387.  Step 2b ii, iii.
  private DTree[] buildNextKTrees(Frame fr) {
    // We're going to build K (nclass) trees - each focused on correcting
    // errors for a single class.
    final DTree[] ktrees = new DTree[_nclass];

    // Initial set of histograms.  All trees; one leaf per tree (the root
    // leaf); all columns
    DHistogram hcs[][][] = new DHistogram[_nclass][1/*just root leaf*/][_ncols];

    // Adjust nbins for the top-levels
    int adj_nbins = Math.max((1<<(10-0)),nbins);

    for( int k=0; k<_nclass; k++ ) {
      // Initially setup as-if an empty-split had just happened
      if( _distribution == null || _distribution[k] != 0 ) {
        // The Boolean Optimization
        // This optimization assumes the 2nd tree of a 2-class system is the
        // inverse of the first.  This is false for DRF (and true for GBM) -
        // DRF picks a random different set of columns for the 2nd tree.
        if( k==1 && _nclass==2 ) continue;
        ktrees[k] = new DTree(fr._names,_ncols,(char)nbins,(char)_nclass,min_rows);
        new GBMUndecidedNode(ktrees[k],-1,DHistogram.initialHist(fr,_ncols,adj_nbins,hcs[k][0],min_rows,group_split,false) ); // The "root" node
      }
    }
    int[] leafs = new int[_nclass]; // Define a "working set" of leaf splits, from here to tree._len

    // ----
    // ESL2, page 387.  Step 2b ii.
    // One Big Loop till the ktrees are of proper depth.
    // Adds a layer to the trees each pass.
    int depth=0;
    for( ; depth<max_depth; depth++ ) {
      if( !Job.isRunning(self()) ) return null;

      hcs = buildLayer(fr, ktrees, leafs, hcs, false, false);

      // If we did not make any new splits, then the tree is split-to-death
      if( hcs == null ) break;
    }

    // Each tree bottomed-out in a DecidedNode; go 1 more level and insert
    // LeafNodes to hold predictions.
    for( int k=0; k<_nclass; k++ ) {
      DTree tree = ktrees[k];
      if( tree == null ) continue;
      int leaf = leafs[k] = tree.len();
      for( int nid=0; nid<leaf; nid++ ) {
        if( tree.node(nid) instanceof DecidedNode ) {
          DecidedNode dn = tree.decided(nid);
          for( int i=0; i<dn._nids.length; i++ ) {
            int cnid = dn._nids[i];
            if( cnid == -1 || // Bottomed out (predictors or responses known constant)
                tree.node(cnid) instanceof UndecidedNode || // Or chopped off for depth
                (tree.node(cnid) instanceof DecidedNode &&  // Or not possible to split
                 ((DecidedNode)tree.node(cnid))._split.col()==-1) )
              dn._nids[i] = new GBMLeafNode(tree,nid).nid(); // Mark a leaf here
          }
          // Handle the trivial non-splitting tree
          if( nid==0 && dn._split.col() == -1 )
            new GBMLeafNode(tree,-1,0);
        }
      }
    } // -- k-trees are done

    // ----
    // ESL2, page 387.  Step 2b iii.  Compute the gammas, and store them back
    // into the tree leaves.  Includes learn_rate.
    // For classification (bernoulli):
    //    gamma_i = sum res_i / sum p_i*(1 - p_i) where p_i = y_i - res_i
    // For classification (multinomial):
    //    gamma_i_k = (nclass-1)/nclass * (sum res_i / sum (|res_i|*(1-|res_i|)))
    // For regression (gaussian):
    //    gamma_i = sum res_i / count(res_i)
    GammaPass gp = new GammaPass(ktrees,leafs).doAll(fr);
    double m1class = _nclass > 1 && family != Family.bernoulli ? (double)(_nclass-1)/_nclass : 1.0; // K-1/K for multinomial
    for( int k=0; k<_nclass; k++ ) {
      final DTree tree = ktrees[k];
      if( tree == null ) continue;
      for( int i=0; i<tree._len-leafs[k]; i++ ) {
        double g = gp._gss[k][i] == 0 // Constant response?
          ? (gp._rss[k][i]==0?0:1000) // Cap (exponential) learn, instead of dealing with Inf
          : learn_rate*m1class*gp._rss[k][i]/gp._gss[k][i];
        assert !Double.isNaN(g);
        ((LeafNode)tree.node(leafs[k]+i))._pred = g;
      }
    }

    // ----
    // ESL2, page 387.  Step 2b iv.  Cache the sum of all the trees, plus the
    // new tree, in the 'tree' columns.  Also, zap the NIDs for next pass.
    // Tree <== f(Tree)
    // Nids <== 0
    new MRTask2() {
      @Override public void map( Chunk chks[] ) {
        // For all tree/klasses
        for( int k=0; k<_nclass; k++ ) {
          final DTree tree = ktrees[k];
          if( tree == null ) continue;
          final Chunk nids = chk_nids(chks,k);
          final Chunk ct   = chk_tree(chks,k);
          for( int row=0; row<nids._len; row++ ) {
            int nid = (int)nids.at80(row);
            if( nid < 0 ) continue;
            // Prediction stored in Leaf is cut to float to be deterministic in reconstructing
            // <tree_klazz> fields from tree prediction
            ct.set0(row, (float)(ct.at0(row) + (float) ((LeafNode)tree.node(nid))._pred));
            nids.set0(row,0);
          }
        }
      }
    }.doAll(fr);

    // Collect leaves stats
    for (int i=0; i<ktrees.length; i++)
      if( ktrees[i] != null )
        ktrees[i].leaves = ktrees[i].len() - leafs[i];
    // DEBUG: Print the generated K trees
    // printGenerateTrees(ktrees);

    return ktrees;
  }


  // ---
  // ESL2, page 387.  Step 2b iii.
  // Nids <== f(Nids)
  private class GammaPass extends MRTask2<GammaPass> {
    final DTree _trees[]; // Read-only, shared (except at the histograms in the Nodes)
    final int   _leafs[]; // Number of active leaves (per tree)
    // Per leaf: sum(res);
    double _rss[/*tree/klass*/][/*tree-relative node-id*/];
    // Per leaf: multinomial: sum(|res|*1-|res|), gaussian: sum(1), bernoulli: sum((y-res)*(1-y+res))
    double _gss[/*tree/klass*/][/*tree-relative node-id*/];
    GammaPass(DTree trees[], int leafs[]) { _leafs=leafs; _trees=trees; }
    @Override public void map( Chunk[] chks ) {
      _gss = new double[_nclass][];
      _rss = new double[_nclass][];
      final Chunk resp = chk_resp(chks); // Response for this frame

      // For all tree/klasses
      for( int k=0; k<_nclass; k++ ) {
        final DTree tree = _trees[k];
        final int   leaf = _leafs[k];
        if( tree == null ) continue; // Empty class is ignored

        // A leaf-biased array of all active Tree leaves.
        final double gs[] = _gss[k] = new double[tree._len-leaf];
        final double rs[] = _rss[k] = new double[tree._len-leaf];
        final Chunk nids = chk_nids(chks,k); // Node-ids  for this tree/class
        final Chunk ress = chk_work(chks,k); // Residuals for this tree/class

        // If we have all constant responses, then we do not split even the
        // root and the residuals should be zero.
        if( tree.root() instanceof LeafNode ) continue;
        for( int row=0; row<nids._len; row++ ) { // For all rows
          int nid = (int)nids.at80(row);         // Get Node to decide from
          if( nid < 0 ) continue;                // Missing response
          if( tree.node(nid) instanceof UndecidedNode ) // If we bottomed out the tree
            nid = tree.node(nid)._pid;                  // Then take parent's decision
          DecidedNode dn = tree.decided(nid);           // Must have a decision point
          if( dn._split._col == -1 )                    // Unable to decide?
            dn = tree.decided(nid = dn._pid); // Then take parent's decision
          int leafnid = dn.ns(chks,row); // Decide down to a leafnode
          assert leaf <= leafnid && leafnid < tree._len;
          assert tree.node(leafnid) instanceof LeafNode;
          // Note: I can which leaf/region I end up in, but I do not care for
          // the prediction presented by the tree.  For GBM, we compute the
          // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
          // leaf, and get our prediction from that.
          nids.set0(row,leafnid);
          assert !ress.isNA0(row);

          // Compute numerator (rs) and denominator (gs) of gamma
          double res = ress.at0(row);
          double ares = Math.abs(res);
          if(family == Family.bernoulli) {
            double prob = resp.at0(row) - res;
            gs[leafnid-leaf] += prob*(1-prob);
          } else
            gs[leafnid-leaf] += _nclass > 1 ? ares*(1-ares) : 1;
          rs[leafnid-leaf] += res;
        }
      }
    }
    @Override public void reduce( GammaPass gp ) {
      Utils.add(_gss,gp._gss);
      Utils.add(_rss,gp._rss);
    }
  }

  @Override protected DecidedNode makeDecided( UndecidedNode udn, DHistogram hs[] ) {
    return new GBMDecidedNode(udn,hs);
  }

  // ---
  // GBM DTree decision node: same as the normal DecidedNode, but
  // specifies a decision algorithm given complete histograms on all
  // columns.  GBM algo: find the lowest error amongst *all* columns.
  static class GBMDecidedNode extends DecidedNode {
    GBMDecidedNode( UndecidedNode n, DHistogram[] hs ) { super(n,hs); }
    @Override public UndecidedNode makeUndecidedNode(DHistogram[] hs ) {
      return new GBMUndecidedNode(_tree,_nid,hs);
    }

    // Find the column with the best split (lowest score).  Unlike RF, GBM
    // scores on all columns and selects splits on all columns.
    @Override public DTree.Split bestCol( UndecidedNode u, DHistogram[] hs ) {
      DTree.Split best = new DTree.Split(-1,-1,null,(byte)0,Double.MAX_VALUE,Double.MAX_VALUE,0L,0L,0,0);
      if( hs == null ) return best;
      for( int i=0; i<hs.length; i++ ) {
        if( hs[i]==null || hs[i].nbins() <= 1 ) continue;
        DTree.Split s = hs[i].scoreMSE(i);
        if( s == null ) continue;
        if( best == null || s.se() < best.se() ) best = s;
        if( s.se() <= 0 ) break; // No point in looking further!
      }
      return best;
    }
  }

  // ---
  // GBM DTree undecided node: same as the normal UndecidedNode, but specifies
  // a list of columns to score on now, and then decide over later.
  // GBM algo: use all columns
  static class GBMUndecidedNode extends UndecidedNode {
    GBMUndecidedNode( DTree tree, int pid, DHistogram hs[] ) { super(tree,pid,hs); }
    // Randomly select mtry columns to 'score' in following pass over the data.
    // In GBM, we use all columns (as opposed to RF, which uses a random subset).
    @Override public int[] scoreCols( DHistogram[] hs ) { return null; }
  }

  // ---
  static class GBMLeafNode extends LeafNode {
    GBMLeafNode( DTree tree, int pid ) { super(tree,pid); }
    GBMLeafNode( DTree tree, int pid, int nid ) { super(tree,pid,nid); }
    // Insert just the predictions: a single byte/short if we are predicting a
    // single class, or else the full distribution.
    @Override protected AutoBuffer compress(AutoBuffer ab) { assert !Double.isNaN(_pred); return ab.put4f((float)_pred); }
    @Override protected int size() { return 4; }
  }

  /** Compute relative variable importance for GBM model.
   *
   *  See (45), (35) formulas in Friedman: Greedy Function Approximation: A Gradient boosting machine.
   *  Algo used here can be used for computation individual importance of features per output class. */
  @Override protected VarImp doVarImpCalc(GBMModel model, DTree[] ktrees, int tid, Frame validationFrame, boolean scale) {
    assert model.ntrees()-1-_ntreesFromCheckpoint == tid : "varimp computation expect model with already serialized trees: tid="+tid;
    // Iterates over k-tree
    for (DTree t : ktrees) { // Iterate over trees
      if (t!=null) {
        for (int n = 0; n< t.len()-t.leaves; n++)
          if (t.node(n) instanceof DecidedNode) { // it is split node
            Split split = t.decided(n)._split;
            if (split.col()!=-1) // Skip impossible splits ~ leafs
              _improvPerVar[split.col()] += split.improvement(); // least squares improvement
          }
      }
    }
    // Compute variable importance for all trees in model
    float[] varimp   = new float[model.nfeatures()];

    int   ntreesTotal = model.ntrees() * model.nclasses();
    int   maxVar = 0;
    for (int var=0; var<_improvPerVar.length; var++) {
      varimp[var] = _improvPerVar[var] / ntreesTotal;
      if (varimp[var] > varimp[maxVar]) maxVar = var;
    }
    // GBM scale varimp to scale 0..100
    if (scale) {
      float maxVal = varimp[maxVar];
      for (int var=0; var<varimp.length; var++) varimp[var] /= maxVal;
    }

    return new VarImpRI(varimp);
  }

  /**
   * Cross-Validate a GBM model by building new models on N train/test holdout splits
   * @param splits Frames containing train/test splits
   * @param cv_preds Array of Frames to store the predictions for each cross-validation run
   * @param offsets Array to store the offsets of starting row indices for each cross-validation run
   * @param i Which fold of cross-validation to perform
   */
  @Override public void crossValidate(Frame[] splits, Frame[] cv_preds, long[] offsets, int i) {
    // Train a clone with slightly modified parameters (to account for cross-validation)
    GBM cv = (GBM) this.clone();
    cv.genericCrossValidation(splits, offsets, i);
    cv_preds[i] = ((GBMModel) UKV.get(cv.dest())).score(cv.validation);
  }

}