package embedded; import java.io.*; import java.util.*; import java.util.Map.Entry; /** * A Sample Main Class which embeddeds the H2O Scoring Engine * * The scoring engine is a modular, seperable piece of H2O intended to be * embedded into a larger user application. It understands how to load and * score all H2O models, but cannot do modeling by itself. * * In the directory above this one (the 'embedded' package directory), * compile this file: * $ javac -cp ".;h2o.jar" embedded/ScoreMain.java * And Go! * $ java -cp ".;h2o.jar" embedded/ScoreMain * Sample App Init Code Goes Here; loading data to score * Loading model to score * [h2o] Warning: feature btr_port_nom used by the model is not in the provided feature list from the data * Initial score0(HashMap)=30.2953306 * Initial score (HashMap)=30.2953306 * Initial score (Arrays )=30.2953306 * Timing... * score(HashMap) 1000 in 2085678ns = 2085.678 nanosec/score * score(Arrays ) 1000 in 835487ns = 835.487 nanosec/score * score(HashMap) 1000000 in 442568040ns = 442.56804 nanosec/score * score(Arrays ) 1000000 in 198211951ns = 198.211951 nanosec/score * score(HashMap) 10000000 in 4311345978ns = 431.1345978 nanosec/score * score(Arrays ) 10000000 in 1942011556ns = 194.2011556 nanosec/score * score(HashMap) 100000000 in 43612466550ns = 436.1246655 nanosec/score * score(Arrays ) 100000000 in 19390716132ns = 193.90716132 nanosec/score * Sample App Shuts Down */ class ScoreRandom { // A JavaScript-like data row; K/V pairs indexed by a String fieldname, and // returning either a String or a subclass of Number (Double or Long) or a // Boolean. static HashMap<String, Comparable> ROW; // A *efficient* but less convenient representation of a data row. Features // are named as with the HashMap version, but we convert the feature String // into an index into these arrays; Strings go in the matching String array // and other values in the double array. Booleans go in the double array as // 0 or 1. Missing values are null in the Strings or NaNs in the doubles. // As long as the *set of features* does not change, then this mapping can be // invariant across many data rows. static String[] FEATURES; static String[] SS; static double[] DS; // A mapping from the dense columns desired by the model, to the above // feature list, computed by asking the model for a mapping (given a list of // features). Some features may be unused and won't appear in the mapping. // If the data row features list does not mention all the features the model // needs, then this map will contain a -1 for the missing feature index. static int[] MAP; public static void main(String[] args) throws Exception { // Prep the app; get data available sampleAppInit(args); // Load a PMML model System.out.println("Loading model to score"); InputStream fis = new BufferedInputStream(new FileInputStream("../../../../demo/SampleScorecard.pmml")); water.score.ScoreModel scm = water.parser.PMMLParser.parse(fis); // Pre-compute the row data into arrays. The expectation is that this // mapping is done early by the data producer, and a row of data is passed // about as a pair of SS/DS arrays. The mapping is invariant between the // data rows and the model (i.e., changing either the model or the data // layout requires computing a new mapping). FEATURES = ROW.keySet().toArray(new String[0]); SS = new String[FEATURES.length]; DS = new double[FEATURES.length]; for( int i=0; i<FEATURES.length; i++ ) { SS[i] = null; DS[i] = Double.NaN; Object o = ROW.get(FEATURES[i]); if( o == null ) ; else if( o instanceof String ) SS[i]=(String)o; else if( o instanceof Number ) DS[i]=((Number)o).doubleValue(); else if( o instanceof Boolean) DS[i]=((Boolean)o).booleanValue() ? 1 : 0; else throw new IllegalArgumentException("Unknown datatype "+o.getClass()); } // Given a feature list, map them to columns in the model MAP = scm.columnMapping(FEATURES); sampleAppDoesStuff(scm); System.out.println("Sample App Shuts Down"); System.exit(0); } public static void sampleAppInit(String[] args) throws Exception { System.out.println("Sample App Init Code Goes Here; loading data to score"); // Make data available. In this case, parse a simple text file and inject // pairs into a HashMap. File f = new File("../../demo/SampleData.txt"); String text = new Scanner( f ).useDelimiter("\\A").next(); ROW = new HashMap<String, Comparable>(); String[] toks = text.split("[,\\{\\}]"); for( String tok : toks ) { if( tok.length() > 0 ) { String pair[] = tok.split("[=:]"); if( pair.length==2 ) { String feature = trim(pair[0]); ROW.put(feature,asNum(trim(pair[1]))); } } } } static String trim( String x ) { return x.trim().replaceAll("\"",""); } static Comparable asNum( String x ) { if( "true" .equals(x) ) return new Boolean(true ); if( "false".equals(x) ) return new Boolean(false); try { return new Long (Long .valueOf(x)); } catch( NumberFormatException nfe ) { } try { return new Double (Double.valueOf(x)); } catch( NumberFormatException nfe ) { } return x; } public static void sampleAppDoesStuff(water.score.ScoreModel scm) { System.out.println("Initial score_interpreter(HashMap)="+ ((water.score.ScorecardModel)scm).score_interpreter(ROW)); System.out.println("Initial score (HashMap)="+scm.score(ROW)); System.out.println("Initial score (Arrays )="+scm.score(MAP,SS,DS)); for( int i=0; i<1000; i++ ) { loop1000(scm,true ); loop1000(scm,false); } try { Thread.sleep(1000); } catch( Exception e ) { } System.out.println("Timing..."); Random rand = new Random(); for( int i = 0; i < 1000; i++ ) { for(Entry<String, Comparable> entry : ROW.entrySet()) { if(entry.getValue() instanceof Double) entry.setValue(rand.nextDouble()); if(entry.getValue() instanceof String) entry.setValue("" + rand.nextDouble()); } timeTwo(scm,1000); // 1 million System.out.println(scm.score(ROW)); } } public static void timeTwo(water.score.ScoreModel scm, int iter ) { timeOne(scm,iter,true ); timeOne(scm,iter,false); } public static void timeOne(water.score.ScoreModel scm, int iter, boolean mapOrAry) { //long start = System.currentTimeMillis(); long start = System.nanoTime(); for( int i=0; i<iter; i++ ) loop1000(scm,mapOrAry); //long now = System.currentTimeMillis(); long now = System.nanoTime(); long delta = now-start; //double nanos = delta*1000.0*1000.0; double nanos = (double)delta; System.out.println("score("+(mapOrAry?"HashMap":"Arrays ")+") "+(iter*1000)+" in "+(long)nanos+"ns = "+(nanos/(iter*1000.0))+" nanosec/score"); } public static void loop1000(water.score.ScoreModel scm, boolean mapOrAry) { if( mapOrAry ) for( int i=0; i<1000; i++ ) scm.score(ROW); else for( int i=0; i<1000; i++ ) scm.score(MAP,SS,DS); } }