package hex.glm; import hex.FrameSplitter; import hex.ModelMetricsBinomialGLM; import hex.ModelMetricsBinomialGLM.ModelMetricsMultinomialGLM; import hex.ModelMetricsMultinomial; import hex.deeplearning.DeepLearningModel; import hex.glm.GLMModel.GLMParameters; import hex.glm.GLMModel.GLMParameters.Family; import hex.glm.GLMModel.GLMParameters.Solver; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import water.*; import water.fvec.*; import water.util.FrameUtils; import java.util.Arrays; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; /** * Created by tomasnykodym on 10/28/15. */ public class GLMBasicTestMultinomial extends TestUtil { static Frame _covtype; static Frame _train; static Frame _test; @BeforeClass public static void setup() { stall_till_cloudsize(1); _covtype = parse_test_file("smalldata/covtype/covtype.20k.data"); _covtype.replace(_covtype.numCols()-1,_covtype.lastVec().toCategoricalVec()).remove(); Key[] keys = new Key[]{Key.make("train"),Key.make("test")}; H2O.submitTask(new FrameSplitter(_covtype, new double[]{.8},keys,null)).join(); _train = DKV.getGet(keys[0]); _test = DKV.getGet(keys[1]); } @AfterClass public static void cleanUp() { if(_covtype != null) _covtype.delete(); if(_train != null) _train.delete(); if(_test != null) _test.delete(); } @Test public void testCovtypeNoIntercept(){ GLMParameters params = new GLMParameters(Family.multinomial); GLMModel model = null; Frame preds = null; Vec weights = _covtype.anyVec().makeCon(1); Key k = Key.<Frame>make("cov_with_weights"); Frame f = new Frame(k,_covtype.names(),_covtype.vecs()); f.add("weights",weights); DKV.put(f); try { params._response_column = "C55"; params._train = k; params._valid = _covtype._key; params._objective_epsilon = 1e-6; params._beta_epsilon = 1e-4; params._weights_column = "weights"; params._missing_values_handling = DeepLearningModel.DeepLearningParameters.MissingValuesHandling.Skip; params._intercept = false; double[] alpha = new double[]{0,.5,.1}; Solver s = Solver.L_BFGS; System.out.println("solver = " + s); params._solver = s; params._max_iterations = 5000; for (int i = 0; i < alpha.length; ++i) { params._alpha = new double[]{alpha[i]}; // params._lambda[0] = lambda[i]; model = new GLM(params).trainModel().get(); System.out.println(model.coefficients()); // Assert.assertEquals(0,model.coefficients().get("Intercept"),0); double [][] bs = model._output.getNormBetaMultinomial(); for(double [] b:bs) Assert.assertEquals(0,b[b.length-1],0); System.out.println(model._output._model_summary); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); preds = model.score(_covtype); ModelMetricsMultinomialGLM mmTrain = (ModelMetricsMultinomialGLM) hex.ModelMetricsMultinomial.getFromDKV(model, _covtype); assertTrue(model._output._training_metrics.equals(mmTrain)); model.delete(); model = null; preds.delete(); preds = null; } } finally{ weights.remove(); DKV.remove(k); if(model != null)model.delete(); if(preds != null)preds.delete(); } } @Test public void testCovtypeBasic(){ GLMParameters params = new GLMParameters(Family.multinomial); GLMModel model = null; Frame preds = null; Vec weights = _covtype.anyVec().makeCon(1); Key k = Key.<Frame>make("cov_with_weights"); Frame f = new Frame(k,_covtype.names(),_covtype.vecs()); f.add("weights",weights); DKV.put(f); try { params._response_column = "C55"; params._train = k; params._valid = _covtype._key; params._lambda = new double[]{4.881e-05}; params._alpha = new double[]{1}; params._objective_epsilon = 1e-6; params._beta_epsilon = 1e-4; params._weights_column = "weights"; params._missing_values_handling = DeepLearningModel.DeepLearningParameters.MissingValuesHandling.Skip; double[] alpha = new double[]{1}; double[] expected_deviance = new double[]{25499.76}; double[] lambda = new double[]{2.544750e-05}; for (Solver s : new Solver[]{Solver.IRLSM, Solver.COORDINATE_DESCENT, Solver.L_BFGS}) { System.out.println("solver = " + s); params._solver = s; params._max_iterations = params._solver == Solver.L_BFGS?300:10; for (int i = 0; i < alpha.length; ++i) { params._alpha[0] = alpha[i]; params._lambda[0] = lambda[i]; model = new GLM(params).trainModel().get(); System.out.println(model._output._model_summary); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); assertTrue(model._output._training_metrics.equals(model._output._validation_metrics)); assertTrue(((ModelMetricsMultinomialGLM) model._output._training_metrics)._resDev <= expected_deviance[i] * 1.1); preds = model.score(_covtype); ModelMetricsMultinomialGLM mmTrain = (ModelMetricsMultinomialGLM) hex.ModelMetricsMultinomial.getFromDKV(model, _covtype); assertTrue(model._output._training_metrics.equals(mmTrain)); model.delete(); model = null; preds.delete(); preds = null; } } } finally{ weights.remove(); DKV.remove(k); if(model != null)model.delete(); if(preds != null)preds.delete(); } } @Test public void testCovtypeMinActivePredictors(){ GLMParameters params = new GLMParameters(Family.multinomial); GLMModel model = null; Frame preds = null; try { params._response_column = "C55"; params._train = _covtype._key; params._valid = _covtype._key; params._lambda = new double[]{4.881e-05}; params._alpha = new double[]{1}; params._objective_epsilon = 1e-6; params._beta_epsilon = 1e-4; params._max_active_predictors = 50; params._max_iterations = 10; double[] alpha = new double[]{.99}; double expected_deviance = 33000; double[] lambda = new double[]{2.544750e-05}; Solver s = Solver.COORDINATE_DESCENT; System.out.println("solver = " + s); params._solver = s; model = new GLM(params).trainModel().get(); System.out.println(model._output._model_summary); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); System.out.println("rank = " + model._output.rank() + ", max active preds = " + params._max_active_predictors); assertTrue(model._output.rank() < params._max_active_predictors + model._output.nclasses()); assertTrue(model._output._training_metrics.equals(model._output._validation_metrics)); assertTrue(((ModelMetricsMultinomialGLM) model._output._training_metrics)._resDev <= expected_deviance * 1.1); preds = model.score(_covtype); ModelMetricsMultinomialGLM mmTrain = (ModelMetricsMultinomialGLM) hex.ModelMetricsMultinomial.getFromDKV(model, _covtype); assertTrue(model._output._training_metrics.equals(mmTrain)); model.delete(); model = null; preds.delete(); preds = null; } finally{ if(model != null)model.delete(); if(preds != null)preds.delete(); } } @Test public void testCovtypeLS(){ GLMParameters params = new GLMParameters(Family.multinomial); GLMModel model = null; Frame preds = null; try { double expected_deviance = 33000; params._nlambdas = 3; params._response_column = "C55"; params._train = _covtype._key; params._valid = _covtype._key; params._alpha = new double[]{.99}; params._objective_epsilon = 1e-6; params._beta_epsilon = 1e-4; params._max_active_predictors = 50; params._max_iterations = 500; params._solver = Solver.AUTO; params._lambda_search = true; model = new GLM(params).trainModel().get(); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); assertTrue(model._output._training_metrics.equals(model._output._validation_metrics)); preds = model.score(_covtype); ModelMetricsMultinomialGLM mmTrain = (ModelMetricsMultinomialGLM) hex.ModelMetricsMultinomial.getFromDKV(model, _covtype); assertTrue(model._output._training_metrics.equals(mmTrain)); assertTrue(((ModelMetricsMultinomialGLM) model._output._training_metrics)._resDev <= expected_deviance); System.out.println(model._output._model_summary); model.delete(); model = null; preds.delete(); preds = null; } finally{ if(model != null)model.delete(); if(preds != null)preds.delete(); } } @Test public void testCovtypeNAs(){ GLMParameters params = new GLMParameters(Family.multinomial); GLMModel model = null; Frame preds = null; Frame covtype_subset = null, covtype_copy = null; try { double expected_deviance = 26000; covtype_copy = _covtype.deepCopy("covtype_copy"); DKV.put(covtype_copy); Vec.Writer w = covtype_copy.vec(54).open(); w.setNA(10); w.setNA(20); w.setNA(30); w.close(); covtype_subset = new Frame(Key.<Frame>make("covtype_subset"),new String[]{"C51","C52","C53","C54","C55"},covtype_copy.vecs(new int[]{50,51,52,53,54})); DKV.put(covtype_subset); // params._nlambdas = 3; params._response_column = "C55"; params._train = covtype_copy._key; params._valid = covtype_copy._key; params._alpha = new double[]{.99}; params._objective_epsilon = 1e-6; params._beta_epsilon = 1e-4; params._max_active_predictors = 50; params._max_iterations = 500; params._solver = Solver.L_BFGS; params._missing_values_handling = DeepLearningModel.DeepLearningParameters.MissingValuesHandling.Skip; // params._lambda_search = true; model = new GLM(params).trainModel().get(); assertEquals(covtype_copy.numRows()-3-1,model._nullDOF); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); assertTrue(model._output._training_metrics.equals(model._output._validation_metrics)); preds = model.score(covtype_copy); ModelMetricsMultinomialGLM mmTrain = (ModelMetricsMultinomialGLM) hex.ModelMetricsMultinomial.getFromDKV(model, covtype_copy); assertTrue(model._output._training_metrics.equals(mmTrain)); assertTrue(((ModelMetricsMultinomialGLM) model._output._training_metrics)._resDev <= expected_deviance); System.out.println(model._output._model_summary); model.delete(); model = null; preds.delete(); preds = null; // now run the same on the subset params._train = covtype_subset._key; model = new GLM(params).trainModel().get(); assertEquals(covtype_copy.numRows()-3-1,model._nullDOF); System.out.println(model._output._training_metrics); System.out.println(model._output._validation_metrics); assertTrue(model._output._training_metrics.equals(model._output._validation_metrics)); preds = model.score(_covtype); System.out.println(model._output._model_summary); assertTrue(((ModelMetricsMultinomialGLM) model._output._training_metrics)._resDev <= 66000); model.delete(); model = null; preds.delete(); preds = null; } finally{ if(covtype_subset != null) covtype_subset.delete(); if(covtype_copy != null)covtype_copy.delete(); if(model != null)model.delete(); if(preds != null)preds.delete(); } } }