package hex; import hex.genmodel.GenModel; import water.MRTask; import water.Scope; import water.exceptions.H2OIllegalArgumentException; import water.fvec.Chunk; import water.fvec.Frame; import water.fvec.Vec; import water.util.ArrayUtils; import water.util.MathUtils; import java.util.Arrays; public class ModelMetricsBinomial extends ModelMetricsSupervised { public final AUC2 _auc; public final double _logloss; public final double _mean_per_class_error; public final GainsLift _gainsLift; public ModelMetricsBinomial(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, AUC2 auc, double logloss, GainsLift gainsLift) { super(model, frame, nobs, mse, domain, sigma); _auc = auc; _logloss = logloss; _gainsLift = gainsLift; _mean_per_class_error = cm() == null ? Double.NaN : cm().mean_per_class_error(); } public static ModelMetricsBinomial getFromDKV(Model model, Frame frame) { ModelMetrics mm = ModelMetrics.getFromDKV(model, frame); if( !(mm instanceof ModelMetricsBinomial) ) throw new H2OIllegalArgumentException("Expected to find a Binomial ModelMetrics for model: " + model._key.toString() + " and frame: " + frame._key.toString(), "Expected to find a ModelMetricsBinomial for model: " + model._key.toString() + " and frame: " + frame._key.toString() + " but found a: " + (mm == null ? null : mm.getClass())); return (ModelMetricsBinomial) mm; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(super.toString()); if (_auc != null) sb.append(" AUC: " + (float)_auc._auc + "\n"); sb.append(" logloss: " + (float)_logloss + "\n"); sb.append(" mean_per_class_error: " + (float)_mean_per_class_error + "\n"); sb.append(" default threshold: " + (_auc == null ? 0.5 : (float)_auc.defaultThreshold()) + "\n"); if (cm() != null) sb.append(" CM: " + cm().toASCII()); if (_gainsLift != null) sb.append(_gainsLift); return sb.toString(); } public double logloss() { return _logloss; } public double mean_per_class_error() { return _mean_per_class_error; } @Override public AUC2 auc_obj() { return _auc; } @Override public ConfusionMatrix cm() { if( _auc == null ) return null; double[][] cm = _auc.defaultCM(); return cm == null ? null : new ConfusionMatrix(cm, _domain); } public GainsLift gainsLift() { return _gainsLift; } // expose simple metrics criteria for sorting public double auc() { return auc_obj()._auc; } public double lift_top_group() { return gainsLift().response_rates[0] / gainsLift().avg_response_rate; } /** * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class) * @param targetClassProbs A Vec containing target class probabilities * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data) * @return ModelMetrics object */ static public ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels) { return make(targetClassProbs,actualLabels,actualLabels.domain()); } /** * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class) * @param targetClassProbs A Vec containing target class probabilities * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data) * @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given) * @return ModelMetrics object */ static public ModelMetricsBinomial make(Vec targetClassProbs, Vec actualLabels, String[] domain) { Scope.enter(); Vec _labels = actualLabels.toCategoricalVec(); if (domain==null) domain = _labels.domain(); if (_labels == null || targetClassProbs == null) throw new IllegalArgumentException("Missing actualLabels or predictedProbs for binomial metrics!"); if (!targetClassProbs.isNumeric()) throw new IllegalArgumentException("Predicted probabilities must be numeric per-class probabilities for binomial metrics."); if (targetClassProbs.min() < 0 || targetClassProbs.max() > 1) throw new IllegalArgumentException("Predicted probabilities must be between 0 and 1 for binomial metrics."); if (domain.length!=2) throw new IllegalArgumentException("Domain must have 2 class labels, but is " + Arrays.toString(domain) + " for binomial metrics."); _labels = _labels.adaptTo(domain); if (_labels.cardinality()!=2) throw new IllegalArgumentException("Adapted domain must have 2 class labels, but is " + Arrays.toString(_labels.domain()) + " for binomial metrics."); Frame predsLabel = new Frame(targetClassProbs); predsLabel.add("labels", _labels); MetricBuilderBinomial mb = new BinomialMetrics(_labels.domain()).doAll(predsLabel)._mb; _labels.remove(); Frame preds = new Frame(targetClassProbs); ModelMetricsBinomial mm = (ModelMetricsBinomial)mb.makeModelMetrics(null, predsLabel, null, preds); mm._description = "Computed on user-given predictions and labels, using F1-optimal threshold: " + mm.auc_obj().defaultThreshold() + "."; Scope.exit(); return mm; } // helper to build a ModelMetricsBinomial for a N-class problem from a Frame that contains N per-class probability columns, and the actual label as the (N+1)-th column private static class BinomialMetrics extends MRTask<BinomialMetrics> { public BinomialMetrics(String[] domain) { this.domain = domain; } String[] domain; public MetricBuilderBinomial _mb; @Override public void map(Chunk[] chks) { _mb = new MetricBuilderBinomial(domain); Chunk actuals = chks[1]; double [] ds = new double[3]; for (int i=0;i<chks[0]._len;++i) { ds[2] = chks[0].atd(i); //class 1 probs (user-given) ds[1] = 1-ds[2]; //class 0 probs ds[0] = GenModel.getPrediction(ds, null, ds, Double.NaN/*ignored - uses AUC's default threshold*/); //label _mb.perRow(ds, new float[]{actuals.at8(i)}, null); } } @Override public void reduce(BinomialMetrics mrt) { _mb.reduce(mrt._mb); } } public static class MetricBuilderBinomial<T extends MetricBuilderBinomial<T>> extends MetricBuilderSupervised<T> { protected double _logloss; protected AUC2.AUCBuilder _auc; public MetricBuilderBinomial( String[] domain ) { super(2,domain); _auc = new AUC2.AUCBuilder(AUC2.NBINS); } public double auc() {return new AUC2(_auc)._auc;} // Passed a float[] sized nclasses+1; ds[0] must be a prediction. ds[1...nclasses-1] must be a class // distribution; @Override public double[] perRow(double ds[], float[] yact, Model m) {return perRow(ds, yact, 1, 0, m);} @Override public double[] perRow(double ds[], float[] yact, double w, double o, Model m) { if( Float .isNaN(yact[0]) ) return ds; // No errors if actual is missing if(ArrayUtils.hasNaNs(ds)) return ds; // No errors if prediction has missing values (can happen for GLM) if(w == 0 || Double.isNaN(w)) return ds; final int iact = (int)yact[0]; if( iact != 0 && iact != 1 ) return ds; // The actual is effectively a NaN _count++; _wcount += w; _wY += w*iact; _wYY += w*iact*iact; // Compute error double err = iact+1 < ds.length ? 1-ds[iact+1] : 1; // Error: distance from predicting ycls as 1.0 _sumsqe += w*err*err; // Squared error assert !Double.isNaN(_sumsqe); // Compute log loss _logloss += w*MathUtils.logloss(err); _auc.perRow(ds[2],iact,w); return ds; // Flow coding } @Override public void reduce( T mb ) { super.reduce(mb); // sumseq, count _logloss += mb._logloss; _auc.reduce(mb._auc); } /** * Create a ModelMetrics for a given model and frame * @param m Model * @param f Frame * @param frameWithWeights Frame that contains extra columns such as weights * @param preds Optional predictions (can be null), only used to compute Gains/Lift table for binomial problems @return * @return */ @Override public ModelMetrics makeModelMetrics(Model m, Frame f, Frame frameWithWeights, Frame preds) { if (frameWithWeights ==null) frameWithWeights = f; double mse = Double.NaN; double logloss = Double.NaN; double sigma = Double.NaN; AUC2 auc = null; GainsLift gl = null; if (_wcount > 0) { sigma = weightedSigma(); mse = _sumsqe / _wcount; logloss = _logloss / _wcount; auc = new AUC2(_auc); gl = null; if (preds!=null) { Vec resp = m==null && f.vec(f.numCols()-1).isCategorical() ? f.vec(f.numCols()-1) //work-around for the case where we don't have a model, assume that the last column is the actual response : f.vec(m._parms._response_column); Vec weight = m==null?null : frameWithWeights.vec(m._parms._weights_column); if (resp != null) { try { gl = new GainsLift(preds.lastVec(), resp, weight); gl.exec(m != null ? m._output._job : null); } catch(Throwable t) {} } } } ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, logloss, gl); if (m!=null) m.addModelMetrics(mm); return mm; } public String toString(){ if(_wcount == 0) return "empty, no rows"; return "auc = " + MathUtils.roundToNDigits(auc(),3) + ", logloss = " + _logloss / _wcount; } } }