Leaderboard.java example

Explorer
h2o-3-master
package ai.h2o.automl;

import hex.*;
import water.*;
import water.api.schemas3.KeyV3;
import water.exceptions.H2OIllegalArgumentException;
import water.fvec.Frame;
import water.util.IcedHashMap;
import water.util.Log;
import water.util.TwoDimTable;

import java.util.*;

import static water.DKV.getGet;
import static water.Key.make;
import water.util.ArrayUtils;
/**
 * Utility to track all the models built for a given dataset type.
 * <p>
 * Note that if a new Leaderboard is made for the same project it'll
 * keep using the old model list, which allows us to run AutoML multiple
 * times and keep adding to the leaderboard.
 * <p>
 * The models are returned sorted by either an appropriate default metric
 * for the model category (auc, mean per class error, or mean residual deviance),
 * or by a metric that's set via #setMetricAndDirection.
 * <p>
 * TODO: make this robust against removal of models from the DKV.
 */
public class Leaderboard extends Keyed<Leaderboard> {
  /**
   * Identifier for models that should be grouped together in the leaderboard
   * (e.g., "airlines" and "iris").
   */
  private final String project;

  /**
   * List of models for this leaderboard, sorted by metric so that the best is first,
   * according to the standard metric for the given model type.
   * <p>
   * Updated inside addModels().
   */
  private Key<Model>[] models = new Key[0];

  /**
   * Test set ModelMetrics objects for the models.
   * <p>
   * Updated inside addModels().
   */
  private IcedHashMap<Key<ModelMetrics>, ModelMetrics> test_set_metrics = new IcedHashMap<>();

  /**
   * Sort metrics for the models in this leaderboard, in the same order as the models.
   * <p>
   * Updated inside addModels().
   */
  public double[] sort_metrics = new double[0];

  /**
   * Additional metrics for the models in this leaderboard, in the same order as the models
   * rmse, mae, and rmsle for regression & logloss for binomial classification
   * <p>
   * Updated inside addModels().
   */
  public double[] rmse = new double[0];
  public double[] mae = new double[0];
  public double[] rmsle = new double[0];
  public double[] logloss = new double[0];

  /**
   * Metric used to sort this leaderboard.
   */
  private String sort_metric;

  /**
   * Other metrics reported in leaderboard (logloss for binomial, rmse, mae, and rmsle for regression)
   */
  private String[] other_metrics;

  /**
   * Metric direction used in the sort.
   */
  private boolean sort_decreasing;

  /**
   * UserFeedback object used to send, um, feedback to the, ah, user.  :-)
   * Right now this is a "new leader" message.
   */
  private UserFeedback userFeedback;

  /**
   * Frame for which we return the metrics, by default.
   */
  private Frame testFrame;

  /** HIDEME! */
  private Leaderboard() {
    throw new UnsupportedOperationException("Do not call the default constructor Leaderboard().");
  }

  /**
   *
   */
  public Leaderboard(String project, UserFeedback userFeedback, Frame testFrame) {
    this._key = make(idForProject(project));
    Leaderboard old = DKV.getGet(this._key);

    if (null != old) {
      // pick up where we left off
      // note that if subsequent runs use a different test frame the models are re-scored
      this.models = old.models;
      this.test_set_metrics = old.test_set_metrics;
      this.sort_metrics = old.sort_metrics;
    }
    this.project = project;
    this.userFeedback = userFeedback;
    this.testFrame = testFrame;
    DKV.put(this);
  }

  // satisfy typing for job return type...
  public static class LeaderboardKeyV3 extends KeyV3<Iced, LeaderboardKeyV3, Leaderboard> {
    public LeaderboardKeyV3() {
    }

    public LeaderboardKeyV3(Key<Leaderboard> key) {
      super(key);
    }
  }

  public static String idForProject(String project) { return "AutoML_Leaderboard_" + project; }

  public String getProject() {
    return project;
  }

  public void setMetricAndDirection(String metric,String[] otherMetrics, boolean sortDecreasing){
    this.sort_metric = metric;
    this.other_metrics = otherMetrics;
    this.sort_decreasing = sortDecreasing;
    DKV.put(this);
  }

  public void setMetricAndDirection(String metric,boolean sortDecreasing){
    this.sort_metric = metric;
    this.sort_decreasing = sortDecreasing;
    DKV.put(this);
  }

  public void setDefaultMetricAndDirection(Model m) {
    if (m._output.isBinomialClassifier())
      setMetricAndDirection("auc",new String[] {"logloss"}, true);
    else if (m._output.isClassifier())
      setMetricAndDirection("mean_per_class_error", false);
    else if (m._output.isSupervised())
      setMetricAndDirection("mean_residual_deviance",new String[]{"rmse","mae","rmsle"}, false);
  }

  /**
   * Add the given models to the leaderboard.  Note that to make this easier to use from
   * Grid, which returns its models in random order, we allow the caller to add the same
   * model multiple times and we eliminate the duplicates here.
   * @param newModels
   */
  final public void addModels(final Key<Model>[] newModels) {
    if (null == this._key)
      throw new H2OIllegalArgumentException("Can't add models to a Leaderboard which isn't in the DKV.");

    if (this.sort_metric == null) {
      // lazily set to default for this model category
      setDefaultMetricAndDirection(newModels[0].get());
    }

    final Key<Model> newLeader[] = new Key[1]; // only set if there's a new leader

    new TAtomic<Leaderboard>() {
      @Override
      final public Leaderboard atomic(Leaderboard old) {
        if (old == null) old = new Leaderboard();

        final Key<Model>[] oldModels = old.models;
        final Key<Model> oldLeader = (oldModels == null || 0 == oldModels.length) ? null : oldModels[0];

        // eliminate duplicates
        Set<Key<Model>> uniques = new HashSet(oldModels.length + newModels.length);
        uniques.addAll(Arrays.asList(oldModels));
        uniques.addAll(Arrays.asList(newModels));
        old.models = uniques.toArray(new Key[0]);

        // TODO: remove from tatomic?
        // which models are really new?  we need to call score on them
        Set<Key<Model>> reallyNewModels = new HashSet<>(uniques);
        reallyNewModels.removeAll(Arrays.asList(oldModels));

        // Try fetching ModelMetrics for *all* models, not just reallyNewModels,
        // because the testFrame might have changed.
        old.test_set_metrics = new IcedHashMap<>();
        for (Key<Model> aKey : old.models) {
          Model aModel = aKey.get();
          if (null == aModel) {
            userFeedback.warn(UserFeedbackEvent.Stage.ModelTraining, "Model in the leaderboard has unexpectedly been deleted from H2O: " + aKey);
            continue;
          }

          ModelMetrics mm = ModelMetrics.getFromDKV(aModel, testFrame);
          if (mm == null) {
            Frame preds = aModel.score(testFrame);
            mm = ModelMetrics.getFromDKV(aModel, testFrame);
          }
          old.test_set_metrics.put(mm._key, mm);
        }

        // Sort by metric on the test set.
        // TODO TODO TODO this sorts by the metrics in Model._output
        try {
          List<Key<Model>> modelsSorted = ModelMetrics.sortModelsByMetric(testFrame, sort_metric, sort_decreasing, Arrays.asList(old.models));
          old.models = modelsSorted.toArray(new Key[0]);
        }
        catch (H2OIllegalArgumentException e) {
          Log.warn("ModelMetrics.sortModelsByMetric failed: " + e);
          throw e;
        }

        Model[] models = new Model[old.models.length];
        old.sort_metrics = old.getSortMetrics(old.sort_metric, old.test_set_metrics, testFrame, modelsForModelKeys(old.models, models));
        if(sort_metric.equals("auc")){ //Binomial case
          old.logloss= old.getOtherMetrics("logloss", old.test_set_metrics, testFrame, modelsForModelKeys(old.models, models));
        }else if(sort_metric.equals("mean_residual_deviance")){ //Regression case
          old.rmse= old.getOtherMetrics("rmse", old.test_set_metrics, testFrame, modelsForModelKeys(old.models, models));
          old.mae= old.getOtherMetrics("mae", old.test_set_metrics, testFrame, modelsForModelKeys(old.models, models));
          old.rmsle= old.getOtherMetrics("rmsle", old.test_set_metrics, testFrame, modelsForModelKeys(old.models, models));
        }

        // If we're updated leader let this know so that it can notify the user
        // (outside the tatomic, since it can take a long time).
        if (oldLeader == null || ! oldLeader.equals(old.models[0]))
          newLeader[0] = old.models[0];

        return old;
      } // atomic
    }.invoke(this._key);

    // We've updated the DKV but not this instance, so:
    Leaderboard updated = DKV.getGet(this._key);
    this.models = updated.models;
    this.test_set_metrics = updated.test_set_metrics;
    this.sort_metrics = updated.sort_metrics;
    if(sort_metric.equals("auc")){ //Binomial case
      this.logloss = updated.logloss;
    }else if(sort_metric.equals("mean_residual_deviance")){ //Regression
      this.rmse = updated.rmse;
      this.mae = updated.mae;
      this.rmsle = updated.rmsle;
    }

    // always
    EckoClient.updateLeaderboard(this);
    if (null != newLeader[0]) {
      userFeedback.info(UserFeedbackEvent.Stage.ModelTraining, "New leader: " + newLeader[0]);
    }
  }


  public void addModel(final Key<Model> key) {
    Key<Model>keys[] = new Key[1];
    keys[0] = key;
    addModels(keys);
  }

  public void addModel(final Model model) {
    Key<Model>keys[] = new Key[1];
    keys[0] = model._key;
    addModels(keys);
  }

  private static Model[] modelsForModelKeys(Key<Model>[] modelKeys, Model[] models) {
    assert models.length >= modelKeys.length;
    int i = 0;
    for (Key<Model> modelKey : modelKeys)
      models[i++] = getGet(modelKey);
    return models;
  }

  /**
   * @return list of keys of models sorted by the default metric for the model category, fetched from the DKV
   */
  public Key<Model>[] getModelKeys() {
    return ((Leaderboard)DKV.getGet(this._key)).models;
  }

  /**
   * @return list of keys of models sorted by the given metric, fetched from the DKV
   */
  public Key<Model>[] modelKeys(String metric, boolean sortDecreasing) {
    Key<Model>[] models = getModelKeys();
    List<Key<Model>> newModelsSorted =
            ModelMetrics.sortModelsByMetric(metric, sortDecreasing, Arrays.asList(models));
    return newModelsSorted.toArray(new Key[0]);
  }

  /**
   * @return list of models sorted by the default metric for the model category
   */
  public Model[] getModels() {
    Key<Model>[] modelKeys = getModelKeys();

    if (modelKeys == null || 0 == modelKeys.length) return new Model[0];

    Model[] models = new Model[modelKeys.length];
    return modelsForModelKeys(modelKeys, models);
  }

  /**
   * @return list of models sorted by the given metric
   */
  public Model[] getModels(String metric, boolean sortDecreasing) {
    Key<Model>[] modelKeys = modelKeys(metric, sortDecreasing);

    if (modelKeys == null || 0 == modelKeys.length) return new Model[0];

    Model[] models = new Model[modelKeys.length];
    return modelsForModelKeys(modelKeys, models);
  }

  public Model getLeader() {
    Key<Model>[] modelKeys = getModelKeys();

    if (modelKeys == null || 0 == modelKeys.length) return null;

    return modelKeys[0].get();
  }

  /*
  public long[] getTimestamps(Model[] models) {
    long[] timestamps = new long[models.length];
    int i = 0;
    for (Model m : models)
      timestamps[i++] = m._output._end_time;
    return timestamps;
  }
  */

  public double[] getSortMetrics() {
    return getSortMetrics(this.sort_metric, this.test_set_metrics, this.testFrame, this.getModels());
  }

  public static double[] getOtherMetrics(String other_metric, IcedHashMap<Key<ModelMetrics>, ModelMetrics> test_set_metrics, Frame testFrame, Model[] models) {
    double[] other_metrics = new double[models.length];
    int i = 0;
    for (Model m : models)
      other_metrics[i++] = ModelMetrics.getMetricFromModelMetric(test_set_metrics.get(ModelMetrics.buildKey(m, testFrame)), other_metric);
    return other_metrics;
  }

  public static double[] getSortMetrics(String sort_metric, IcedHashMap<Key<ModelMetrics>, ModelMetrics> test_set_metrics, Frame testFrame, Model[] models) {
    double[] sort_metrics = new double[models.length];
    int i = 0;
    for (Model m : models)
      sort_metrics[i++] = ModelMetrics.getMetricFromModelMetric(test_set_metrics.get(ModelMetrics.buildKey(m, testFrame)), sort_metric);
    return sort_metrics;
  }

  /**
   * Delete everything in the DKV that this points to.  We currently need to be able to call this after deleteWithChildren().
   */
  public void delete() {
    remove();
  }

  public void deleteWithChildren() {
    for (Model m : getModels())
      m.delete();
    delete();
  }

  public static double[] defaultMetricForModel(Model m) {
    ModelMetrics mm =
            m._output._cross_validation_metrics != null ?
                    m._output._cross_validation_metrics :
                    m._output._validation_metrics != null ?
                            m._output._validation_metrics :
                            m._output._training_metrics;
    return defaultMetricForModel(m, mm);
  }

  public static double[] defaultMetricForModel(Model m, ModelMetrics mm) {
    if (m._output.isBinomialClassifier()) {
      return new double[] {(((ModelMetricsBinomial)mm).auc()),((ModelMetricsBinomial) mm).logloss()};
    } else if (m._output.isClassifier()) {
      return new double[] {(((ModelMetricsMultinomial)mm).mean_per_class_error())};
    } else if (m._output.isSupervised()) {
      return new double[] {((ModelMetricsRegression)mm).mean_residual_deviance(),mm.rmse(), ((ModelMetricsRegression) mm).mae(), ((ModelMetricsRegression) mm).rmsle()};
    }
    Log.warn("Failed to find metric for model: " + m);
    return new double[] {Double.NaN};
  }

  public static String[] defaultMetricNameForModel(Model m) {
    if (m._output.isBinomialClassifier()) {
      return new String[] {"auc","logloss"};
    } else if (m._output.isClassifier()) {
      return new String[] {"mean per-class error"};
    } else if (m._output.isSupervised()) {
      return new String[] {"mean_residual_deviance","rmse","mae","rmsle"};
    }
    return new String[] {"unknown"};
  }

  public String rankTsv() {
    String fieldSeparator = "\\t";
    String lineSeparator = "\\n";

    StringBuffer sb = new StringBuffer();
//  sb.append("Rank").append(fieldSeparator).append("Error").append(lineSeparator);
    sb.append("Error").append(lineSeparator);

    Model[] models = getModels();
    for (int i = models.length - 1; i >= 0; i--) {
      // TODO: allow the metric to be passed in.  Note that this assumes the validation (or training) frame is the same.
      Model m = models[i];
      sb.append(defaultMetricForModel(m));
      sb.append(lineSeparator);
    }
    return sb.toString();
  }

  public String timeTsv() {
    String fieldSeparator = "\\t";
    String lineSeparator = "\\n";

    StringBuffer sb = new StringBuffer();
    //sb.append("Time").append(fieldSeparator).append("Error").append(lineSeparator);
    sb.append("Error").append(lineSeparator);

    Model[] models = getModels();
    for (int i = models.length - 1; i >= 0; i--) {
      // TODO: allow the metric to be passed in.  Note that this assumes the validation (or training) frame is the same.
      Model m = models[i];
      //sb.append(timestampFormat.format(m._output._end_time));
      //sb.append(fieldSeparator);

      sb.append(defaultMetricForModel(m));
      sb.append(lineSeparator);
    }
    return sb.toString();
  }

  protected static final String[] colHeaders(String metric, String[] other_metric) {
    //return new String[] {"model ID", "timestamp", metric.toString()};
    String[] headers = ArrayUtils.append(new String[]{"model_id",metric.toString()},other_metric);
    return headers;
  }

  protected static final String[] colHeadersMult(String metric) {
    //return new String[] {"model ID", "timestamp", metric.toString()};
    return new String[] {"model_id", metric.toString()};
  }

  protected static final String[] colTypesMultinomial= {
          "string",
          "string"};

  protected static final String[] colFormatsMultinomial= {
          "%s",
          "%s"};

  protected static final String[] colTypesBinomial= {
          "string",
          "string",
          "string"};

  protected static final String[] colFormatsBinomial= {
          "%s",
          "%s",
          "%s"};

  protected static final String[] colTypesRegression= {
          "string",
          "string",
          "string",
          "string",
          "string"};

  protected static final String[] colFormatsRegression= {
          "%s",
          "%s",
          "%s",
          "%s",
          "%s"};

  public static final TwoDimTable makeTwoDimTable(String tableHeader, String sort_metric, String[] other_metric, int length) {
    String[] rowHeaders = new String[length];
    for (int i = 0; i < length; i++) rowHeaders[i] = "" + i;

    if(sort_metric.equals("mean_per_class_error")){ //Multinomial
      return new TwoDimTable(tableHeader,
              "models sorted in order of " + sort_metric + ", best first",
              rowHeaders,
              Leaderboard.colHeadersMult(sort_metric),
              Leaderboard.colTypesMultinomial,
              Leaderboard.colFormatsMultinomial,
              "#");
    }else if(sort_metric.equals("auc")){ //Binomial
      return new TwoDimTable(tableHeader,
              "models sorted in order of " + sort_metric + ", best first",
              rowHeaders,
              Leaderboard.colHeaders(sort_metric,other_metric),
              Leaderboard.colTypesBinomial,
              Leaderboard.colFormatsBinomial,
              "#");
    } else { //Regression
      return new TwoDimTable(tableHeader,
              "models sorted in order of " + sort_metric + ", best first",
              rowHeaders,
              Leaderboard.colHeaders(sort_metric,other_metric),
              Leaderboard.colTypesRegression,
              Leaderboard.colFormatsRegression,
              "#");
    }
  }


  //public void addTwoDimTableRow(TwoDimTable table, int row, String[] modelIDs, long[] timestamps, double[] errors) {
  public void addTwoDimTableRowMultinomial(TwoDimTable table, int row, String[] modelIDs, double[] errors) {
    int col = 0;
    table.set(row, col++, modelIDs[row]);
    //table.set(row, col++, timestampFormat.format(new Date(timestamps[row])));
    table.set(row, col++, String.format("%.6f", errors[row]));
  }

  public void addTwoDimTableRowBinomial(TwoDimTable table, int row, String[] modelIDs, double[] errors, double[] otherErrors) {
    int col = 0;
    table.set(row, col++, modelIDs[row]);
    //table.set(row, col++, timestampFormat.format(new Date(timestamps[row])));
    table.set(row, col++, String.format("%.6f", errors[row]));
    table.set(row, col++, String.format("%.6f", otherErrors[row]));

  }
  public void addTwoDimTableRowRegression(TwoDimTable table, int row, String[] modelIDs, double[] errors, double[] rmse, double[] mae, double[] rmsle) {
    int col = 0;
    table.set(row, col++, modelIDs[row]);
    //table.set(row, col++, timestampFormat.format(new Date(timestamps[row])));
    table.set(row, col++, String.format("%.6f", errors[row]));
    table.set(row, col++, String.format("%.6f", rmse[row]));
    table.set(row, col++, String.format("%.6f", mae[row]));
    table.set(row, col++, String.format("%.6f", rmsle[row]));
  }

  public TwoDimTable toTwoDimTable() {
    return toTwoDimTable("Leaderboard for project: " + project, false);
  }

  public TwoDimTable toTwoDimTable(String tableHeader, boolean leftJustifyModelIds) {
    Model[] models = this.getModels();
    //long[] timestamps = getTimestamps(models);
    String[] modelIDsFormatted = new String[models.length];

    TwoDimTable table = makeTwoDimTable(tableHeader, sort_metric, other_metrics, models.length);

    // %-s doesn't work in TwoDimTable.toString(), so fake it here:
    int maxModelIdLen = -1;
    for (Model m : models)
      maxModelIdLen = Math.max(maxModelIdLen, m._key.toString().length());
    for (int i = 0; i < models.length; i++)
      if (leftJustifyModelIds) {
        modelIDsFormatted[i] =
                (models[i]._key.toString() +
                        "                                                                                         ")
                        .substring(0, maxModelIdLen);
      } else {
        modelIDsFormatted[i] = models[i]._key.toString();
      }

    for (int i = 0; i < models.length; i++)
      //addTwoDimTableRow(table, i, modelIDsFormatted, timestamps, sort_metrics);
      if(sort_metric.equals("mean_per_class_error")){ //Multinomial case
        addTwoDimTableRowMultinomial(table, i, modelIDsFormatted, sort_metrics);
      }else if(sort_metric.equals("auc")) { //Binomial case
        addTwoDimTableRowBinomial(table, i, modelIDsFormatted, sort_metrics, logloss);
      }else{ //Regression
        addTwoDimTableRowRegression(table, i, modelIDsFormatted, sort_metrics, rmse, mae, rmsle);
      }
    return table;
  }

  //private static final SimpleDateFormat timestampFormat = new SimpleDateFormat("HH:mm:ss.SSS");

  //public static String toString(String project, Model[] models, String fieldSeparator, String lineSeparator, boolean includeTitle, boolean includeHeader, boolean includeTimestamp) {
  public static String toString(String project, Model[] models, String fieldSeparator, String lineSeparator, boolean includeTitle, boolean includeHeader) {
    StringBuilder sb = new StringBuilder();
    if (includeTitle) {
      sb.append("Leaderboard for project \"")
              .append(project)
              .append("\": ");

      if (models.length == 0) {
        sb.append("<empty>");
        return sb.toString();
      }
      sb.append(lineSeparator);
    }

    boolean printedHeader = false;
    for (Model m : models) {
      // TODO: allow the metric to be passed in.  Note that this assumes the validation (or training) frame is the same.
      if (includeHeader && ! printedHeader) {
        sb.append("model_id");
        sb.append(fieldSeparator);

        sb.append(defaultMetricNameForModel(m));

        /*
        if (includeTimestamp) {
          sb.append(fieldSeparator);
          sb.append("timestamp");
        }
        */
        sb.append(lineSeparator);
        printedHeader = true;
      }

      sb.append(m._key.toString());
      sb.append(fieldSeparator);

      sb.append(defaultMetricForModel(m));

      /*
      if (includeTimestamp) {
        sb.append(fieldSeparator);
        sb.append(timestampFormat.format(m._output._end_time));
      }
      */

      sb.append(lineSeparator);
    }
    return sb.toString();
  }

  public String toString(String fieldSeparator, String lineSeparator) {
    //return toString(project, getModels(), fieldSeparator, lineSeparator, true, true, false);
    return toString(project, getModels(), fieldSeparator, lineSeparator, true, true);
  }

  @Override
  public String toString() {
    return toString(" ; ", " | ");
  }
}