ModelParametersSchemaV3.java example

Explorer
h2o-3-master
package water.api.schemas3;

import hex.genmodel.utils.DistributionFamily;
import hex.Model;
import hex.ScoreKeeper;
import water.*;
import water.api.API;
import water.api.schemas3.KeyV3.FrameKeyV3;
import water.api.schemas3.KeyV3.ModelKeyV3;
import water.fvec.Frame;
import water.util.PojoUtils;

import java.lang.reflect.Field;
import java.util.*;

/**
 * An instance of a ModelParameters schema contains the Model build parameters (e.g., K and max_iterations for KMeans).
 * NOTE: use subclasses, not this class directly.  It is not abstract only so that we can instantiate it to generate metadata
 * for it for the metadata API.
 */
public class ModelParametersSchemaV3<P extends Model.Parameters, S extends ModelParametersSchemaV3<P, S>>
    extends SchemaV3<P, S> {
  ////////////////////////////////////////
  // NOTE:
  // Parameters must be ordered for the UI
  ////////////////////////////////////////
  public String[] fields() {
    try { return (String[]) getClass().getField("fields").get(getClass()); }
    catch (Exception e) { throw H2O.fail("Caught exception from accessing the schema field list", e);  }
  }

  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  // CAREFUL: This class has its own JSON serializer.  If you add a field here you probably also want to add it to the serializer!
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  // Parameters common to all models:
  @API(level = API.Level.critical, direction = API.Direction.INOUT,
      help="Destination id for this model; auto-generated if not specified.")
  public ModelKeyV3 model_id;

  @API(level = API.Level.critical, direction = API.Direction.INOUT,
      help = "Id of the training data frame (Not required, to allow initial validation of model parameters).")
  public FrameKeyV3 training_frame;

  @API(level = API.Level.critical, direction = API.Direction.INOUT, gridable = true,
      help = "Id of the validation data frame.")
  public FrameKeyV3 validation_frame;

  @API(level = API.Level.critical, direction = API.Direction.INOUT,
      help = "Number of folds for N-fold cross-validation (0 to disable or >= 2).")
  public int nfolds;

  @API(level = API.Level.expert, direction = API.Direction.INOUT,
      help = "Whether to keep the predictions of the cross-validation models.")
  public boolean keep_cross_validation_predictions;

  @API(level = API.Level.expert, direction = API.Direction.INOUT,
      help = "Whether to keep the cross-validation fold assignment.")
  public boolean keep_cross_validation_fold_assignment;

  @API(help="Allow parallel training of cross-validation models", direction=API.Direction.INOUT, level = API.Level.expert)
  public boolean parallelize_cross_validation;

  @API(help = "Distribution function", values = { "AUTO", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber" }, level = API.Level.secondary, gridable = true)
  public DistributionFamily distribution;

  @API(level = API.Level.secondary, direction = API.Direction.INPUT, gridable = true,
          help = "Tweedie power for Tweedie regression, must be between 1 and 2.")
  public double tweedie_power;

  @API(level = API.Level.secondary, direction = API.Direction.INPUT, gridable = true,
          help = "Desired quantile for Quantile regression, must be between 0 and 1.")
  public double quantile_alpha;


  @API(help = "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).",
          level = API.Level.secondary, direction = API.Direction.INPUT, gridable = true)
  public double huber_alpha;

  @API(level = API.Level.critical, direction = API.Direction.INOUT, gridable = true,
      is_member_of_frames = {"training_frame", "validation_frame"},
      is_mutually_exclusive_with = {"ignored_columns"},
      help = "Response variable column.")
  public FrameV3.ColSpecifierV3 response_column;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true,
      is_member_of_frames = {"training_frame", "validation_frame"},
      is_mutually_exclusive_with = {"ignored_columns", "response_column"},
      help = "Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it" +
          " from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice." +
          " Negative weights are not allowed.")
  public FrameV3.ColSpecifierV3 weights_column;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true,
      is_member_of_frames = {"training_frame", "validation_frame"},
      is_mutually_exclusive_with = {"ignored_columns","response_column", "weights_column"},
      help = "Offset column. This will be added to the combination of columns before applying the link function.")
  public FrameV3.ColSpecifierV3 offset_column;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true,
      is_member_of_frames = {"training_frame"},
      is_mutually_exclusive_with = {"ignored_columns", "response_column", "weights_column", "offset_column"},
      help = "Column with cross-validation fold index assignment per observation.")
  public FrameV3.ColSpecifierV3 fold_column;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true,
      values = {"AUTO", "Random", "Modulo", "Stratified"},
      help = "Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will " +
          "stratify the folds based on the response variable, for classification problems.")
  public Model.Parameters.FoldAssignmentScheme fold_assignment;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true,
          values = {"AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse"},
          help = "Encoding scheme for categorical features")
  public Model.Parameters.CategoricalEncodingScheme categorical_encoding;

  @API(level = API.Level.critical, direction = API.Direction.INOUT,
      is_member_of_frames = {"training_frame", "validation_frame"},
      help = "Names of columns to ignore for training.")
  public String[] ignored_columns;

  @API(level = API.Level.critical, direction = API.Direction.INOUT,
      help = "Ignore constant columns.")
  public boolean ignore_const_cols;

  @API(level = API.Level.secondary, direction = API.Direction.INOUT,
      help = "Whether to score during each iteration of model training.")
  public boolean score_each_iteration;

  /**
   * A model key associated with a previously trained
   * model. This option allows users to build a new model as a
   * continuation of a previously generated model (e.g., by a grid search).
   */
  @API(level = API.Level.secondary, direction=API.Direction.INOUT,
      help = "Model checkpoint to resume training with.")
  public ModelKeyV3 checkpoint;

  /**
   * Early stopping based on convergence of stopping_metric.
   * Stop if simple moving average of length k of the stopping_metric does not improve (by stopping_tolerance) for k=stopping_rounds scoring events."
   * Can only trigger after at least 2k scoring events. Use 0 to disable.
   */
  @API(help = "Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = true)
  public int stopping_rounds;

  @API(help = "Maximum allowed runtime in seconds for model training. Use 0 to disable.", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = true)
  public double max_runtime_secs;

  /**
   * Metric to use for convergence checking, only for _stopping_rounds > 0
   */
  @API(help = "Metric to use for early stopping (AUTO: logloss for classification, deviance for regression)", values = {"AUTO", "deviance", "logloss", "MSE", "RMSE","MAE","RMSLE", "AUC", "lift_top_group", "misclassification", "mean_per_class_error"}, level = API.Level.secondary, direction=API.Direction.INOUT, gridable = true)
  public ScoreKeeper.StoppingMetric stopping_metric;

  @API(help = "Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much)", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = true)
  public double stopping_tolerance;

  protected static String[] append_field_arrays(String[] first, String[] second) {
    String[] appended = new String[first.length + second.length];
    System.arraycopy(first, 0, appended, 0, first.length);
    System.arraycopy(second, 0, appended, first.length, second.length);
    return appended;
  }

  public S fillFromImpl(P impl) {
    PojoUtils.copyProperties(this, impl, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES);

    if (impl._train != null) {
      Value v = DKV.get(impl._train);
      if (v != null) {
        training_frame = new FrameKeyV3(((Frame) v.get())._key);
      }
    }

    if (impl._valid != null) {
      Value v = DKV.get(impl._valid);
      if (v != null) {
        validation_frame = new FrameKeyV3(((Frame) v.get())._key);
      }
    }

    return (S)this;
  }

  public P fillImpl(P impl) {
    super.fillImpl(impl);

    impl._train = (this.training_frame == null) ? null : Key.<Frame>make(this.training_frame.name);
    impl._valid = (this.validation_frame == null) ? null : Key.<Frame>make(this.validation_frame.name);
    impl._max_runtime_secs = nfolds > 0 ? max_runtime_secs / (nfolds+1) : max_runtime_secs;

    return impl;
  }

  private static void compute_transitive_closure_of_is_mutually_exclusive(ModelParameterSchemaV3[] metadata) {
    // Form the transitive closure of the is_mutually_exclusive field lists by visiting
    // all fields and collecting the fields in a Map of Sets.  Then pass over them a second
    // time setting the full lists.
    Map<String, Set<String>> field_exclusivity_groups = new HashMap<>();
    for (ModelParameterSchemaV3 param : metadata) {
      String name = param.name;

      // Turn param.is_mutually_exclusive_with into a List which we will walk over twice
      List<String> me = new ArrayList<String>();
      me.add(name);
      // Note: this can happen if this field doesn't have an @API annotation, in which case we got an earlier WARN
      if (param.is_mutually_exclusive_with != null) me.addAll(Arrays.asList(param.is_mutually_exclusive_with));

      // Make a new Set which contains ourselves, fields we have already been connected to,
      // and fields *they* have already been connected to.
      Set<String> new_set = new HashSet<>();
      for (String s : me) {
        // Were we mentioned by a previous field?
        if (field_exclusivity_groups.containsKey(s))
          new_set.addAll(field_exclusivity_groups.get(s));
        else
          new_set.add(s);
      }

      // Now point all the fields in our Set to the Set.
      for (String s : me) {
        field_exclusivity_groups.put(s, new_set);
      }
    }

    // Now walk over all the fields and create new comprehensive is_mutually_exclusive arrays, not containing self.
    for (ModelParameterSchemaV3 param : metadata) {
      String name = param.name;
      Set<String> me = field_exclusivity_groups.get(name);
      Set<String> not_me = new HashSet<>(me);
      not_me.remove(name);
      param.is_mutually_exclusive_with = not_me.toArray(new String[not_me.size()]);
    }
  }

  /**
   * Write the parameters, including their metadata, into an AutoBuffer.  Used by
   * ModelBuilderSchema#writeJSON_impl and ModelSchemaV3#writeJSON_impl.
   */
  public static AutoBuffer writeParametersJSON(AutoBuffer ab, ModelParametersSchemaV3 parameters, ModelParametersSchemaV3 default_parameters) {
    String[] fields = parameters.fields();

    // Build ModelParameterSchemaV2 objects for each field, and the call writeJSON on the array
    ModelParameterSchemaV3[] metadata = new ModelParameterSchemaV3[fields.length];

    String field_name = null;
    try {
      for (int i = 0; i < fields.length; i++) {
        field_name = fields[i];
        Field f = parameters.getClass().getField(field_name);

        // TODO: cache a default parameters schema
        ModelParameterSchemaV3 schema = new ModelParameterSchemaV3(parameters, default_parameters, f);
        metadata[i] = schema;
      }
    } catch (NoSuchFieldException e) {
      throw new RuntimeException("Caught exception accessing field: " + field_name + " for schema object: " + parameters + ": " + e.toString());
    }

    compute_transitive_closure_of_is_mutually_exclusive(metadata);

    ab.putJSONA("parameters", metadata);
    return ab;
  }
}