package ai.h2o.automl; import hex.ScoreKeeper; import hex.grid.HyperSpaceSearchCriteria; import hex.schemas.GridSearchSchema; import water.Iced; import water.Key; import water.api.schemas3.ImportFilesV3; import water.api.schemas3.JobV3; import water.fvec.Frame; import water.parser.ParseSetup; /** * Parameters which specify the build (or extension) of an AutoML build job. */ public class AutoMLBuildSpec extends Iced { /** * Default constructor provides the default behavior. */ public AutoMLBuildSpec() { this.input_spec = new AutoMLInput(); this.build_control = new AutoMLBuildControl(); // Note: no defaults for input_spec! this.feature_engineering = new AutoMLFeatureEngineering(); this.build_models = new AutoMLBuildModels (); this.ensemble_parameters = new AutoMLEnsembleParameters(); } /** * The specification of overall build parameters for the AutoML process. */ static final public class AutoMLBuildControl extends Iced { public AutoMLBuildControl() { stopping_criteria = new HyperSpaceSearchCriteria.RandomDiscreteValueSearchCriteria(); // reasonable defaults: stopping_criteria.set_max_runtime_secs(3600); stopping_criteria.set_stopping_rounds(3); stopping_criteria.set_stopping_tolerance(0.001); stopping_criteria.set_stopping_metric(ScoreKeeper.StoppingMetric.AUTO); } /** * Identifier for models that should be grouped together in the leaderboard * (e.g., "airlines" and "iris"). If the user doesn't set it we use the basename * of the training file name. */ public String project = null; public String loss = "AUTO"; // TODO: plumb through public HyperSpaceSearchCriteria.RandomDiscreteValueSearchCriteria stopping_criteria; } /** * The specification of the datasets to be used for the AutoML process. * The user can specify a directory path, a file path (including HDFS, s3 or the like), * or the ID of an already-parsed Frame in the H2O cluster. Paths are processed * as usual in H2O. */ static final public class AutoMLInput extends Iced { public ImportFilesV3.ImportFiles training_path; public ImportFilesV3.ImportFiles validation_path; public ImportFilesV3.ImportFiles test_path; public ParseSetup parse_setup; // @API(help="auxiliary relational datasets", direction=API.Direction.INPUT) // public String[] datasets_to_join; public Key<Frame> training_frame; public Key<Frame> validation_frame; public Key<Frame> test_frame; public String response_column; public String[] ignored_columns; } /** * The specification of automatic feature engineering to be used for the AutoML process. */ static final public class AutoMLFeatureEngineering extends Iced { public boolean try_mutations = false; } /** * The specification of the parameters for building models for a single algo (e.g., GBM), including base model parameters and hyperparameter search. */ static final public class AutoMLBuildModels extends Iced { public AutoML.algo[] exclude_algos; public GridSearchSchema[] model_searches; } /** * The specification of ensemble-building to be used for the AutoML process, if any. If this object is null, do not build ensembles. */ static final public class AutoMLEnsembleParameters extends Iced { } public AutoMLBuildControl build_control; public AutoMLInput input_spec; public AutoMLFeatureEngineering feature_engineering; public AutoMLBuildSpec.AutoMLBuildModels build_models; public AutoMLEnsembleParameters ensemble_parameters; // output public JobV3 job; private transient String project_cached = null; public String project() { if (null != project_cached) return project_cached; // allow the user to override: if (null != build_control.project) { project_cached = build_control.project; return project_cached; } String specified = input_spec.training_path != null ? input_spec.training_path.path : input_spec.training_frame.toString(); String[] path = specified.split("/"); project_cached = path[path.length - 1] .replace(".hex", "") .replace(".CSV", "") .replace(".XLS", "") .replace(".XSLX", "") .replace(".SVMLight", "") .replace(".ARFF", "") .replace(".ORC", "") .replace(".csv", "") .replace(".xls", "") .replace(".xslx", "") .replace(".svmlight", "") .replace(".arff", "") .replace(".orc", ""); project_cached = "automl_" + project_cached; return project_cached; } }