package water.score; import java.util.HashMap; import java.util.HashSet; import water.util.Log; import water.util.Log.Tag.Sys; /** * Embedded Scoring model */ public abstract class ScoreModel { public final String _name; public final String _colNames[]; // Column names ScoreModel( String name, String colNames[] ) { _name = name; _colNames = colNames; } // Convert an XML name to a java name protected static String xml2jname( String xml ) { // Convert pname to a valid java name StringBuilder nn = new StringBuilder(); char[] cs = xml.toCharArray(); if( !Character.isJavaIdentifierStart(cs[0]) ) nn.append('X'); for( char c : cs ) { if( !Character.isJavaIdentifierPart(c) ) { nn.append('_'); } else { nn.append(c); } } String jname = nn.toString(); return jname; } // The list of JIT'd classes, each a specific subclass of ScorecardModel // representing the optimized version of a particular set of scoring rules. final static HashSet<String> CLASS_NAMES = new HashSet<String>(); // Make a unique class name for jit'd subclasses of ScoreModel protected static String uniqueClassName(String name) { // Make a unique class name String cname = xml2jname(name); if( CLASS_NAMES.contains(cname) ) { int i=0; while( CLASS_NAMES.contains(cname+i) ) i++; cname = cname+i; } CLASS_NAMES.add(cname); return cname; } // A mapping from the dense columns desired by the model, to the above // feature list, computed by asking the model for a mapping (given a list of // features). Some features may be unused and won't appear in the mapping. // If the data row features list does not mention all the features the model // needs, then this map will contain a -1 for the missing feature index. public int[] columnMapping( String[] features ) { int[] map = new int[_colNames.length]; for( int i=0; i<_colNames.length; i++ ) { map[i] = -1; // Assume it is missing for( int j=0; j<features.length; j++ ) { if( _colNames[i].equals(features[j]) ) { if( map[i] != -1 ) throw new IllegalArgumentException("duplicate feature "+_colNames[i]); map[i] = j; } } if( map[i] == -1 ) Log.warn(Sys.SCORM,"Model feature "+_colNames[i]+" not in the provided feature list from the data"); } return map; } /** Score this model on the specified row of data, where the data is * specified as a collection of K/V pairs - Values are one of String or * Boolean or Number (or subclasses of Number) */ public abstract double score(final HashMap<String, Comparable> row ); /** Score this model on the specified row of data, where the data is * specified as the members of arrays. MAP is used to map between the SS/DS * columns and the columns desired by the Model; this map can be made by a * single call to columnMapping. SS/DS hold either String values (for * enum/categorical data) or a primitive double. This format exchanges a * HashMap lookup for a bare array access, and can be faster (perhaps much * faster) for models that are alread quick to score. */ public abstract double score(int[] MAP, String[] SS, double[] DS); }