package hex.glrm; import hex.genmodel.algos.glrm.GlrmInitialization; import hex.genmodel.algos.glrm.GlrmLoss; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import hex.DataInfo; import hex.Model; import hex.grid.Grid; import hex.grid.GridSearch; import water.Job; import water.Key; import water.TestUtil; import water.fvec.Frame; import water.util.ArrayUtils; public class GLRMGridTest extends TestUtil { @BeforeClass() public static void setup() { stall_till_cloudsize(1); } @Test public void testMultipleGridInvocation() { Grid<GLRMModel.GLRMParameters> grid = null; Frame fr = null; try { fr = parse_test_file("smalldata/iris/iris_wheader.csv"); // Hyper-space HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {{ put("_k", new Integer[] {2, 4}); // Search over this range of the init enum put("_transform", new DataInfo.TransformType[] { DataInfo.TransformType.NONE, DataInfo.TransformType.DEMEAN }); }}; // Name of used hyper parameters String[] hyperParamNames = hyperParms.keySet().toArray(new String[hyperParms.size()]); Arrays.sort(hyperParamNames); int hyperSpaceSize = ArrayUtils.crossProductSize(hyperParms); // Create default parameters GLRMModel.GLRMParameters params = new GLRMModel.GLRMParameters(); params._train = fr._key; params._seed = 4224L; params._loss = GlrmLoss.Absolute; params._init = GlrmInitialization.SVD; // // Fire off a grid search multiple times with same key and make sure // that results are same // final int ITER_CNT = 2; Key<Model>[][] modelKeys = new Key[ITER_CNT][]; Key<Grid> gridKey = Key.make("GLRM_grid_iris" + Key.rand()); for (int i = 0; i < ITER_CNT; i++) { Job<Grid> gs = GridSearch.startGridSearch(gridKey, params, hyperParms); grid = (Grid<GLRMModel.GLRMParameters>) gs.get(); modelKeys[i] = grid.getModelKeys(); // Make sure number of produced models match size of specified hyper space Assert.assertEquals("Size of grid should match to size of hyper space", hyperSpaceSize, grid.getModelCount() + grid.getFailureCount()); // // Make sure that names of used parameters match // String[] gridHyperNames = grid.getHyperNames(); Arrays.sort(gridHyperNames); Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames, gridHyperNames); } Assert.assertArrayEquals("The model keys should be same between two iterations!", modelKeys[0], modelKeys[1]); } finally { if (fr != null) { fr.remove(); } if (grid != null) { grid.remove(); } } } @Test public void testGridAppend() { Grid<GLRMModel.GLRMParameters> grid = null; Frame fr = null; try { fr = parse_test_file("smalldata/iris/iris_wheader.csv"); // Hyper-space HashMap<String, Object[]> hyperParms = new HashMap<String, Object[]>() {{ put("_k", new Integer[] {2, 4}); // Search over this range of the init enum put("_transform", new DataInfo.TransformType[] { DataInfo.TransformType.NONE, DataInfo.TransformType.DEMEAN }); }}; // Name of used hyper parameters final String[] hyperParamNames1 = hyperParms.keySet().toArray(new String[hyperParms.size()]); Arrays.sort(hyperParamNames1); final int hyperSpaceSize1 = ArrayUtils.crossProductSize(hyperParms); // Create default parameters GLRMModel.GLRMParameters params = new GLRMModel.GLRMParameters(); params._train = fr._key; params._seed = 4224L; params._loss = GlrmLoss.Absolute; params._init = GlrmInitialization.SVD; // // Fire off a grid two times with same key and make sure // that final grid contains all models from both runs. // Key<Grid> gridKey = Key.make("GLRM_grid_iris" + Key.rand()); // 1st iteration final Job<Grid> gs1 = GridSearch.startGridSearch(gridKey, params, hyperParms); grid = (Grid<GLRMModel.GLRMParameters>) gs1.get(); // Make sure number of produced models match size of specified hyper space Assert.assertEquals("Size of grid should match to size of hyper space", hyperSpaceSize1, grid.getModelCount() + grid.getFailureCount()); // Make sure that names of used parameters match String[] gridHyperNames1 = grid.getHyperNames(); Arrays.sort(gridHyperNames1); Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames1, gridHyperNames1); // 2nd iteration hyperParms.put("_k", new Integer[] { 3 }); final String[] hyperParamNames2 = hyperParms.keySet().toArray(new String[hyperParms.size()]); Arrays.sort(hyperParamNames2); final int hyperSpaceSize2 = ArrayUtils.crossProductSize(hyperParms); Assert.assertArrayEquals("Names of hyperspaces should be same!", hyperParamNames1, hyperParamNames2); final Job<Grid> gs2 = GridSearch.startGridSearch(gridKey, params, hyperParms); grid = (Grid<GLRMModel.GLRMParameters>) gs2.get(); // Make sure number of produced models match size of specified hyper space Assert.assertEquals("Size of grid should match to size of hyper space", hyperSpaceSize1 + hyperSpaceSize2, grid.getModelCount() + grid.getFailureCount()); // Make sure that names of used parameters match String[] gridHyperNames2 = grid.getHyperNames(); Arrays.sort(gridHyperNames2); Assert.assertArrayEquals("Hyper parameters names should match!", hyperParamNames2, gridHyperNames2); // Verify PUBDEV-2633 - unique names of models Set<String> modelNames = new HashSet<>(grid.getModelCount()); for (Key<Model> modelKey : grid.getModelKeys()) { modelNames.add(modelKey.toString()); } Assert.assertEquals("Model names should be unique!", grid.getModelCount(), modelNames.size()); } finally { if (fr != null) { fr.remove(); } if (grid != null) { grid.remove(); } } } }