package hex;
import hex.genmodel.utils.DistributionFamily;
import water.IcedUtils;
import water.MRTask;
import water.exceptions.H2OIllegalArgumentException;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.util.ArrayUtils;
import water.util.MathUtils;
public class ModelMetricsRegression extends ModelMetricsSupervised {
/** For all algos except GLM this is mean residual deviance. For GLM it's total residual deviance. */
public double residual_deviance() { return _mean_residual_deviance; }
public double mean_residual_deviance() { return _mean_residual_deviance; }
public final double _mean_residual_deviance;
public final double _mean_absolute_error;
public double mae() { return _mean_absolute_error; }
public final double _root_mean_squared_log_error;
public double rmsle() { return _root_mean_squared_log_error; }
public ModelMetricsRegression(Model model, Frame frame, long nobs, double mse, double sigma, double mae,double rmsle, double meanResidualDeviance) {
super(model, frame, nobs, mse, null, sigma);
_mean_residual_deviance = meanResidualDeviance;
_mean_absolute_error = mae;
_root_mean_squared_log_error = rmsle;
}
public static ModelMetricsRegression getFromDKV(Model model, Frame frame) {
ModelMetrics mm = ModelMetrics.getFromDKV(model, frame);
if (! (mm instanceof ModelMetricsRegression))
throw new H2OIllegalArgumentException("Expected to find a Regression ModelMetrics for model: " + model._key.toString() + " and frame: " + frame._key.toString(),
"Expected to find a ModelMetricsRegression for model: " + model._key.toString() + " and frame: " + frame._key.toString() + " but found a: " + mm.getClass());
return (ModelMetricsRegression) mm;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(super.toString());
sb.append(" mean residual deviance: " + (float)_mean_residual_deviance + "\n");
sb.append(" mean absolute error: " + (float)_mean_absolute_error + "\n");
sb.append(" root mean squared log error: " + (float)_root_mean_squared_log_error + "\n");
return sb.toString();
}
/**
* Build a Regression ModelMetrics object from predicted and actual targets
* @param predicted A Vec containing predicted values
* @param actual A Vec containing the actual target values
* @return ModelMetrics object
*/
static public ModelMetricsRegression make(Vec predicted, Vec actual, DistributionFamily family) {
if (predicted == null || actual == null)
throw new IllegalArgumentException("Missing actual or predicted targets for regression metrics!");
if (!predicted.isNumeric())
throw new IllegalArgumentException("Predicted values must be numeric for regression metrics.");
if (!actual.isNumeric())
throw new IllegalArgumentException("Actual values must be numeric for regression metrics.");
if (family == DistributionFamily.quantile || family == DistributionFamily.tweedie || family == DistributionFamily.huber)
throw new IllegalArgumentException("Unsupported distribution family, requires additional parameters which cannot be specified right now.");
Frame predsActual = new Frame(predicted);
predsActual.add("actual", actual);
MetricBuilderRegression mb = new RegressionMetrics(family).doAll(predsActual)._mb;
ModelMetricsRegression mm = (ModelMetricsRegression)mb.makeModelMetrics(null, predsActual, null, null);
mm._description = "Computed on user-given predictions and targets, distribution: " + (family ==null? DistributionFamily.gaussian.toString(): family.toString()) + ".";
return mm;
}
// helper to build a ModelMetricsRegression for a N-class problem from a Frame that contains N per-class probability columns, and the actual label as the (N+1)-th column
private static class RegressionMetrics extends MRTask<RegressionMetrics> {
public MetricBuilderRegression _mb;
final Distribution _distribution;
RegressionMetrics(DistributionFamily family) {
_distribution = family ==null ? new Distribution(DistributionFamily.gaussian) : new Distribution(family);
}
@Override public void map(Chunk[] chks) {
_mb = new MetricBuilderRegression(_distribution);
Chunk preds = chks[0];
Chunk actuals = chks[1];
double [] ds = new double[1];
for (int i=0;i<chks[0]._len;++i) {
ds[0] = preds.atd(i);
_mb.perRow(ds, new float[]{(float)actuals.atd(i)}, null);
}
}
@Override public void reduce(RegressionMetrics mrt) { _mb.reduce(mrt._mb); }
}
public static class MetricBuilderRegression<T extends MetricBuilderRegression<T>> extends MetricBuilderSupervised<T> {
double _sumdeviance;
Distribution _dist;
double _abserror;
double _rmslerror;
public MetricBuilderRegression() {
super(1,null); //this will make _work = new float[2];
}
public MetricBuilderRegression(Distribution dist) {
super(1,null); //this will make _work = new float[2];
_dist=dist;
}
// ds[0] has the prediction and ds[1,..,N] is ignored
@Override public double[] perRow(double ds[], float[] yact, Model m) {return perRow(ds, yact, 1, 0, m);}
@Override public double[] perRow(double ds[], float[] yact, double w, double o, Model m) {
if( Float.isNaN(yact[0]) ) return ds; // No errors if actual is missing
if(ArrayUtils.hasNaNs(ds)) return ds; // No errors if prediction has missing values (can happen for GLM)
if(w == 0 || Double.isNaN(w)) return ds;
// Compute error
double err = yact[0] - ds[0]; // Error: distance from the actual
double err_msle = Math.pow(Math.log1p(ds[0]) - Math.log1p(yact[0]),2); //Squared log error
_sumsqe += w*err*err; // Squared error
_abserror += w*Math.abs(err);
_rmslerror += w*err_msle;
assert !Double.isNaN(_sumsqe);
if (m != null && m._parms._distribution != DistributionFamily.huber)
_sumdeviance += m.deviance(w, yact[0], ds[0]);
else if (_dist!=null)
_sumdeviance += _dist.deviance(w, yact[0], ds[0]);
_count++;
_wcount += w;
_wY += w*yact[0];
_wYY += w*yact[0]*yact[0];
return ds; // Flow coding
}
@Override public void reduce( T mb ) {
super.reduce(mb);
_sumdeviance += mb._sumdeviance;
_abserror += mb._abserror;
_rmslerror += mb._rmslerror;
}
// Having computed a MetricBuilder, this method fills in a ModelMetrics
public ModelMetrics makeModelMetrics(Model m, Frame f, Frame adaptedFrame, Frame preds) {
double mse = _sumsqe / _wcount;
double mae = _abserror/_wcount; //Mean Absolute Error
double rmsle = Math.sqrt(_rmslerror/_wcount); //Root Mean Squared Log Error
if (adaptedFrame ==null) adaptedFrame = f;
double meanResDeviance = 0;
if (m != null && m._parms._distribution == DistributionFamily.huber) {
assert(_sumdeviance==0); // should not yet be computed
if (preds != null) {
Vec actual = adaptedFrame.vec(m._parms._response_column);
Vec weight = adaptedFrame.vec(m._parms._weights_column);
//compute huber delta based on huber alpha quantile on absolute prediction error
double huberDelta = computeHuberDelta(actual, preds.anyVec(), weight, m._parms._huber_alpha);
// make a deep copy of the model's current distribution state (huber delta)
_dist = IcedUtils.deepCopy(m._dist);
_dist.setHuberDelta(huberDelta);
meanResDeviance = new MeanResidualDeviance(_dist, preds.anyVec(), actual, weight).exec().meanResidualDeviance;
}
} else {
meanResDeviance = _sumdeviance / _wcount; //mean residual deviance
}
ModelMetricsRegression mm = new ModelMetricsRegression(m, f, _count, mse, weightedSigma(), mae, rmsle, meanResDeviance);
if (m!=null) m.addModelMetrics(mm);
return mm;
}
}
public static double computeHuberDelta(Vec actual, Vec preds, Vec weight, double huberAlpha) {
Vec absdiff = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] nc) {
for (int i = 0; i < cs[0].len(); ++i)
nc[0].addNum(Math.abs(cs[0].atd(i) - cs[1].atd(i)));
}
}.doAll(1, (byte) 3, new Frame(new String[]{"preds", "actual"}, new Vec[]{preds, actual})).outputFrame().anyVec();
// make a deep copy of the model's current distribution state (huber delta)
//compute huber delta based on huber alpha quantile on absolute prediction error
double hd = MathUtils.computeWeightedQuantile(weight, absdiff, huberAlpha);
absdiff.remove();
return hd;
}
}