package water.api; import dontweave.gson.*; import org.apache.commons.math3.util.Pair; import water.*; import water.api.Models.ModelSummary; import water.fvec.Frame; import java.util.*; public class Frames extends Request2 { /////////////////////// // Request2 boilerplate /////////////////////// static final int API_WEAVER=1; // This file has auto-gen'd doc & json fields static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code. // This Request supports the HTML 'GET' command, and this is the help text // for GET. static final String DOC_GET = "Return the list of dataframes."; public static String link(Key k, String content){ return "<a href='/2/Frames'>" + content + "</a>"; } //////////////// // Query params: //////////////// @API(help="An existing H2O Frame key.", required=false, filter=Default.class) Frame key = null; @API(help="Find Models that are compatible with the Frame.", required=false, filter=Default.class) boolean find_compatible_models = false; @API(help="An existing H2O Model key to score with the Frame which is specified by the key parameter.", required=false, filter=Default.class) Model score_model = null; ///////////////// // The Code (tm): ///////////////// public static final Gson gson = new GsonBuilder().serializeSpecialFloatingPointValues().setPrettyPrinting().create(); public static final class FrameSummary { public String id = null; public String key = null; public long creation_epoch_time_millis = -1; public String[] column_names = { }; public Set<String> compatible_models = new HashSet<String>(); public boolean is_raw_frame = true; // guilty until proven innocent } // TODO: refactor, since this is duplicated private static Map whitelistJsonObject(JsonObject unfiltered, Set<String> whitelist) { // If we create a new JsonObject here and serialize it the key/value pairs are inside // a superflouous "members" object, so create a Map instead. JsonObject filtered = new JsonObject(); Set<Map.Entry<String,JsonElement>> entries = unfiltered.entrySet(); for (Map.Entry<String,JsonElement> entry : entries) { String key = entry.getKey(); if (whitelist.contains(key)) filtered.add(key, entry.getValue()); } return gson.fromJson(gson.toJson(filtered), Map.class); } /** * Fetch all the Models so we can see if they are compatible with our Frame(s). */ private Pair<Map<String, Model>, Map<String, Set<String>>> fetchModels() { Map<String, Model> all_models = null; Map<String, Set<String>> all_models_cols = null; if (this.find_compatible_models) { // caches for this request all_models = (new Models()).fetchAll(); all_models_cols = new TreeMap<String, Set<String>>(); for (Map.Entry<String, Model> entry : all_models.entrySet()) { all_models_cols.put(entry.getKey(), new TreeSet<String>(Arrays.asList(entry.getValue()._names))); } } return new Pair<Map<String, Model>, Map<String, Set<String>>>(all_models, all_models_cols); } private static Map<String, Model> findCompatibleModels(Frame frame, Map<String, Model> all_models, Map<String, Set<String>> all_models_cols) { Map<String, Model> compatible_models = new TreeMap<String, Model>(); Set<String> frame_column_names = new HashSet(Arrays.asList(frame._names)); for (Map.Entry<String, Set<String>> entry : all_models_cols.entrySet()) { Set<String> model_cols = entry.getValue(); if (frame_column_names.containsAll(model_cols)) { /// See if adapt throws an exception or not. try { Model model = all_models.get(entry.getKey()); Frame[] outputs = model.adapt(frame, false); // TODO: this does too much work; write canAdapt() Frame adapted = outputs[0]; Frame trash = outputs[1]; // adapted.delete(); // TODO: shouldn't we clean up adapted vecs? But we can't delete() the frame as a whole. . . trash.delete(); // A-Ok compatible_models.put(entry.getKey(), model); } catch (Exception e) { // skip } } } return compatible_models; } public static Map<String, FrameSummary> generateFrameSummaries(Set<String>keys, Map<String, Frame> frames, boolean find_compatible_models, Map<String, Model> all_models, Map<String, Set<String>> all_models_cols) { Map<String, FrameSummary> frameSummaries = new TreeMap<String, FrameSummary>(); if (null == keys) { keys = frames.keySet(); } for (String key : keys) { FrameSummary summary = new FrameSummary(); Frames.summarizeAndEnhanceFrame(summary, frames.get(key), find_compatible_models, all_models, all_models_cols); frameSummaries.put(key, summary); } return frameSummaries; } /** * Summarize fields in water.fvec.Frame. */ private static void summarizeAndEnhanceFrame(FrameSummary summary, Frame frame, boolean find_compatible_models, Map<String, Model> all_models, Map<String, Set<String>> all_models_cols) { UniqueId unique_id = frame.getUniqueId(); summary.id = unique_id.getId(); summary.key = unique_id.getKey(); summary.creation_epoch_time_millis = unique_id.getCreationEpochTimeMillis(); summary.column_names = frame._names; summary.is_raw_frame = frame.isRawData(); if (find_compatible_models) { Map<String, Model> compatible_models = findCompatibleModels(frame, all_models, all_models_cols); summary.compatible_models = compatible_models.keySet(); } } /** * Fetch all Frames from the KV store. */ protected static Map<String, Frame>fetchAll() { // Get all the fvec frame keys. return H2O.KeySnapshot.globalSnapshot().fetchAll(Frame.class); // Sort for pretty display and reliable ordering. } /** * For one or more Frame from the KV store, sumamrize and enhance them and Response containing a map of them. */ private Response serveOneOrAll(Map<String, Frame> framesMap) { // returns empty sets if !this.find_compatible_models Pair<Map<String, Model>, Map<String, Set<String>>> models_info = fetchModels(); Map<String, Model> all_models = models_info.getFirst(); Map<String, Set<String>> all_models_cols = models_info.getSecond(); Map<String, FrameSummary> frameSummaries = Frames.generateFrameSummaries(null, framesMap, find_compatible_models, all_models, all_models_cols); Map resultsMap = new LinkedHashMap(); resultsMap.put("frames", frameSummaries); // If find_compatible_models then include a map of the Model summaries. Should we put this on a separate switch? if (this.find_compatible_models) { Set<String> all_referenced_models = new TreeSet<String>(); for (Map.Entry<String, FrameSummary> entry: frameSummaries.entrySet()) { FrameSummary summary = entry.getValue(); all_referenced_models.addAll(summary.compatible_models); } Map<String, ModelSummary> modelSummaries = Models.generateModelSummaries(all_referenced_models, all_models, false, null, null); resultsMap.put("models", modelSummaries); } // TODO: temporary hack to get things going String json = gson.toJson(resultsMap); JsonObject result = gson.fromJson(json, JsonElement.class).getAsJsonObject(); return Response.done(result); } /** * Score a frame with the given model. */ protected static Response scoreOne(Frame frame, Model score_model) { water.ModelMetrics metrics = water.ModelMetrics.getFromDKV(score_model, frame); if (null == metrics) { // have to compute water.util.Log.debug("Cache miss: computing ModelMetrics. . ."); long before = System.currentTimeMillis(); Frame predictions = score_model.score(frame, true); // TODO: for now we're always calling adapt inside score long after = System.currentTimeMillis(); ConfusionMatrix cm = new ConfusionMatrix(); // for regression this computes the MSE AUC auc = null; HitRatio hr = null; if (score_model.isClassifier()) { auc = new AUC(); // hr = new HitRatio(); score_model.calcError(frame, frame.vec(score_model.responseName()), predictions, predictions, "Prediction error:", true, 20, cm, auc, hr); } else { score_model.calcError(frame, frame.vec(score_model.responseName()), predictions, predictions, "Prediction error:", true, 20, cm, null, null); } // Now call AUC and ConfusionMatrix and maybe HitRatio metrics = new water.ModelMetrics(score_model.getUniqueId(), score_model.getModelCategory(), frame.getUniqueId(), after - before, after, (auc == null ? null : auc.data()), cm); // Put the metrics into the KV store metrics.putInDKV(); } else { // it's already cached in the DKV water.util.Log.debug("using ModelMetrics from the cache. . ."); } JsonObject metricsJson = metrics.toJSON(); JsonArray metricsArray = new JsonArray(); metricsArray.add(metricsJson); JsonObject result = new JsonObject(); result.add("metrics", metricsArray); return Response.done(result); } @Override protected Response serve() { if (null == this.key) { return serveOneOrAll(fetchAll()); } else { if (null == this.score_model) { // just serve it Frame frame = this.key; Map<String, Frame> framesMap = new TreeMap(); // Sort for pretty display and reliable ordering. framesMap.put(frame._key.toString(), frame); return serveOneOrAll(framesMap); } else { // score it return scoreOne(this.key, this.score_model); } } } // serve() } // class Frames