package water.bindings.examples.retrofit; import water.bindings.H2oApi; import water.bindings.pojos.*; import java.io.IOException; import java.util.Arrays; public class ImportPatternExample { public static void importPatternExample(String url) throws IOException { H2oApi h2o = url != null ? new H2oApi(url) : new H2oApi(); //Set url if (url != null) { h2o.setUrl(url); } //Util var JobV3 job = null; //Init h2o session String sessionId = h2o.newSession().sessionKey; //Import and parse files based on regex pattern { //prostate dataset (Single file) String pattern = "prostate_0.*"; //Regex pattern of file to import ImportFilesV3 importBody = h2o.importFiles("../smalldata/junit/parse_folder", pattern); ParseSetupV3 parseSetupParams = new ParseSetupV3(); parseSetupParams.sourceFrames = H2oApi.stringArrayToKeyArray(importBody.destinationFrames, FrameKeyV3.class); parseSetupParams.checkHeader = 1; ParseSetupV3 parseSetupBody = h2o.guessParseSetup(parseSetupParams); ParseV3 parseParms = new ParseV3(); H2oApi.copyFields(parseParms, parseSetupBody); parseParms.destinationFrame = H2oApi.stringToFrameKey("prostate"); parseParms.blocking = true; ParseV3 parseBody = h2o.parse(parseParms); assert importBody.files.length == 1; String[] parsedFiles = new String[importBody.files.length]; for(int i = 0; i < importBody.files.length; i ++){ parsedFiles[i] = importBody.files[i].substring(importBody.files[0].lastIndexOf("/")+1); } String[] result = {"prostate_0.csv"}; assert parseBody.numberColumns == 9; assert parseBody.rows == 10; String[] colNames = {"ID", "CAPSULE", "AGE", "RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"}; assert Arrays.equals(parseBody.columnNames,colNames); } { //iris dataset (Single file) String pattern = "iris_.*_correct.*"; //Regex pattern of file to import ImportFilesV3 importBody = h2o.importFiles("../smalldata/iris", pattern); ParseSetupV3 parseSetupParams = new ParseSetupV3(); parseSetupParams.sourceFrames = H2oApi.stringArrayToKeyArray(importBody.destinationFrames, FrameKeyV3.class); parseSetupParams.checkHeader = 1; ParseSetupV3 parseSetupBody = h2o.guessParseSetup(parseSetupParams); ParseV3 parseParms = new ParseV3(); H2oApi.copyFields(parseParms, parseSetupBody); parseParms.destinationFrame = H2oApi.stringToFrameKey("iris"); parseParms.blocking = true; ParseV3 parseBody = h2o.parse(parseParms); assert importBody.files.length == 1; String[] parsedFiles = new String[importBody.files.length]; for(int i = 0; i < importBody.files.length; i ++){ parsedFiles[i] = importBody.files[i].substring(importBody.files[i].lastIndexOf("/")+1); } String[] result = {"iris_wheader_correct.csv"}; assert Arrays.equals(parsedFiles,result); assert parseBody.numberColumns == 5; assert parseBody.rows == 150; String[] colNames = {"sepal_length", "sepal_width", "petal_length", "petal_width", "species"}; assert Arrays.equals(parseBody.columnNames,colNames); } { //GBM datasets (Multiple files) String pattern = "50_.*"; //Regex pattern of files to import ImportFilesV3 importBody = h2o.importFiles("../smalldata/gbm_test", pattern); ParseSetupV3 parseSetupParams = new ParseSetupV3(); parseSetupParams.sourceFrames = H2oApi.stringArrayToKeyArray(importBody.destinationFrames, FrameKeyV3.class); parseSetupParams.checkHeader = 1; ParseSetupV3 parseSetupBody = h2o.guessParseSetup(parseSetupParams); ParseV3 parseParms = new ParseV3(); H2oApi.copyFields(parseParms, parseSetupBody); parseParms.destinationFrame = H2oApi.stringToFrameKey("50cat"); parseParms.blocking = true; ParseV3 parseBody = h2o.parse(parseParms); assert importBody.files.length == 2; String[] parsedFiles = new String[importBody.files.length]; for(int i = 0; i < importBody.files.length; i ++){ parsedFiles[i] = importBody.files[i].substring(importBody.files[i].lastIndexOf("/")+1); } String[] result = {"50_cattest_train.csv","50_cattest_test.csv"}; Arrays.sort(result); Arrays.sort(parsedFiles); assert Arrays.equals(parsedFiles,result); assert parseBody.numberColumns == 3; assert parseBody.rows == 5000; String[] colNames = {"x1","x2","y"}; assert Arrays.equals(parseBody.columnNames,colNames); } // STEP 99: end the session h2o.endSession(); } public static void importPatternExample() throws IOException { importPatternExample(null); } public static void main (String[] args) throws IOException { importPatternExample(); } }