package water.api.schemas3; import water.DKV; import water.Futures; import water.Key; import water.MemoryManager; import water.api.API; import water.api.schemas3.KeyV3.FrameKeyV3; import water.fvec.ByteVec; import water.fvec.Chunk; import water.fvec.Frame; import water.fvec.Frame.VecSpecifier; import water.fvec.Vec; import water.parser.BufferedString; import water.util.ChunkSummary; import water.util.FrameUtils; import water.util.Log; import water.util.PrettyPrint; /** * All the details on a Frame. Note that inside ColV3 there are fields which won't be * populated if we don't compute rollups, e.g. via * the REST API endpoint /Frames/<frameid>/columns/<colname>/summary. */ public class FrameV3 extends FrameBaseV3<Frame, FrameV3> { // Input fields @API(help="Row offset to display",direction=API.Direction.INPUT) public long row_offset; @API(help="Number of rows to display",direction=API.Direction.INOUT) public int row_count; @API(help="Column offset to return", direction=API.Direction.INOUT) public int column_offset; @API(help="Number of columns to return", direction=API.Direction.INOUT) public int column_count; @API(help="Total number of columns in the Frame", direction=API.Direction.INOUT) public int total_column_count; // Output fields @API(help="checksum", direction=API.Direction.OUTPUT) public long checksum; @API(help="Number of rows in the Frame", direction=API.Direction.OUTPUT) public long rows; @API(help="Number of columns in the Frame", direction=API.Direction.OUTPUT) public long num_columns; @API(help="Default percentiles, from 0 to 1", direction=API.Direction.OUTPUT) public double[] default_percentiles; @API(help="Columns in the Frame", direction=API.Direction.OUTPUT) public ColV3[] columns; @API(help="Compatible models, if requested", direction=API.Direction.OUTPUT) public String[] compatible_models; @API(help="Chunk summary", direction=API.Direction.OUTPUT) public TwoDimTableV3 chunk_summary; @API(help="Distribution summary", direction=API.Direction.OUTPUT) public TwoDimTableV3 distribution_summary; public static class ColSpecifierV3 extends SchemaV3<VecSpecifier, ColSpecifierV3> { public ColSpecifierV3() { } public ColSpecifierV3(String column_name) { this.column_name = column_name; } @API(help="Name of the column", direction= API.Direction.INOUT) public String column_name; @API(help="List of fields which specify columns that must contain this column", direction= API.Direction.INOUT) public String[] is_member_of_frames; } public static class ColV3 extends SchemaV3<Vec, ColV3> { public ColV3() {} @API(help="label", direction=API.Direction.OUTPUT) public String label; @API(help="missing", direction=API.Direction.OUTPUT) public long missing_count; @API(help="zeros", direction=API.Direction.OUTPUT) public long zero_count; @API(help="positive infinities", direction=API.Direction.OUTPUT) public long positive_infinity_count; @API(help="negative infinities", direction=API.Direction.OUTPUT) public long negative_infinity_count; @API(help="mins", direction=API.Direction.OUTPUT) public double[] mins; @API(help="maxs", direction=API.Direction.OUTPUT) public double[] maxs; @API(help="mean", direction=API.Direction.OUTPUT) public double mean; @API(help="sigma", direction=API.Direction.OUTPUT) public double sigma; @API(help="datatype: {enum, string, int, real, time, uuid}", direction=API.Direction.OUTPUT) public String type; @API(help="domain; not-null for categorical columns only", direction=API.Direction.OUTPUT) public String[] domain; @API(help="cardinality of this column's domain; not-null for categorical columns only", direction=API.Direction.OUTPUT) public int domain_cardinality; @API(help="data", direction=API.Direction.OUTPUT) public double[] data; @API(help="string data", direction=API.Direction.OUTPUT) public String[] string_data; @API(help="decimal precision, -1 for all digits", direction=API.Direction.OUTPUT) public byte precision; @API(help="Histogram bins; null if not computed", direction=API.Direction.OUTPUT) public long[] histogram_bins; @API(help="Start of histogram bin zero", direction=API.Direction.OUTPUT) public double histogram_base; @API(help="Stride per bin", direction=API.Direction.OUTPUT) public double histogram_stride; @API(help="Percentile values, matching the default percentiles", direction=API.Direction.OUTPUT) public double[] percentiles; transient Vec _vec; ColV3(String name, Vec vec, long off, int len) { label=name; missing_count = vec.naCnt(); zero_count = vec.length() - vec.nzCnt() - missing_count; positive_infinity_count = vec.pinfs(); negative_infinity_count = vec.ninfs(); mins = vec.mins(); maxs = vec.maxs(); mean = vec.mean(); sigma = vec.sigma(); // Histogram data is only computed on-demand. By default here we do NOT // compute it, but will return any prior computed & cached histogram. histogram_bins = vec.lazy_bins(); histogram_base = histogram_bins ==null ? 0 : vec.base(); histogram_stride= histogram_bins ==null ? 0 : vec.stride(); percentiles = histogram_bins ==null ? null : vec.pctiles(); type = vec.isUUID()? "uuid" : vec.isString()? "string" : vec.isCategorical()? "enum" : vec.isTime()? "time" : vec.isInt() ? "int" : "real"; domain = vec.domain(); if (vec.isCategorical()) { domain_cardinality = domain.length; } else { domain_cardinality = 0; } len = (int)Math.min(len,vec.length()-off); if( vec.isUUID() ) { string_data = new String[len]; for (int i = 0; i < len; i++) string_data[i] = vec.isNA(off + i) ? null : PrettyPrint.UUID(vec.at16l(off + i), vec.at16h(off + i)); data = null; } else if ( vec.isString() ) { string_data = new String[len]; BufferedString tmpStr = new BufferedString(); for (int i = 0; i < len; i++) string_data[i] = vec.isNA(off + i) ? null : vec.atStr(tmpStr,off + i).toString(); data = null; } else { data = MemoryManager.malloc8d(len); for( int i=0; i<len; i++ ) data[i] = vec.at(off+i); string_data = null; } _vec = vec; // Better HTML display, not in the JSON if (len > 0) // len == 0 is presumed to be a header file precision = vec.chunkForRow(0).precision(); } public void clearBinsField() { this.histogram_bins = null; } } public FrameV3() { super(); } /* Key-only constructor, for the times we only want to return the key. */ public FrameV3(Key<Frame> frame_id) { this.frame_id = new FrameKeyV3(frame_id); } public FrameV3(Frame fr) { this(fr, 1, (int) fr.numRows(), 0, 0); // NOTE: possible row len truncation } public FrameV3(Frame f, long row_offset, int row_count) { this(f, row_offset, row_count, 0, 0); } public FrameV3(Frame f, long row_offset, int row_count, int column_offset, int column_count) { this.fillFromImpl(f, row_offset, row_count, column_offset, column_count); } @Override public FrameV3 fillFromImpl(Frame f) { return fillFromImpl(f, 1, (int)f.numRows(), 0, 0); } public FrameV3 fillFromImpl(Frame f, long row_offset, int row_count, int column_offset, int column_count) { if( row_count == 0 ) row_count = 100; // 100 rows by default if( column_count == 0 ) column_count = f.numCols() - column_offset; // full width by default row_count = (int) Math.min(row_count, row_offset + f.numRows()); column_count = Math.min(column_count, column_offset + f.numCols()); this.frame_id = new FrameKeyV3(f._key); this.checksum = f.checksum(); this.byte_size = f.byteSize(); this.row_offset = row_offset; this.rows = f.numRows(); this.num_columns = f.numCols(); this.row_count = row_count; this.total_column_count = f.numCols(); this.column_offset = column_offset; this.column_count = column_count; this.columns = new ColV3[column_count]; Vec[] vecs = f.vecs(); Futures fs = new Futures(); // Compute rollups in parallel as needed, by starting all of them and using // them when filling in the ColV3 Schemas. // NOTE: SKIP deleted Vecs! The columns entry will be null for deleted Vecs. for( int i = 0; i < column_count; i++ ) if (null == DKV.get(vecs[column_offset + i]._key)) Log.warn("For Frame: " + f._key + ", Vec number: " + (column_offset + i) + " (" + f.name((column_offset + i))+ ") is missing; not returning it."); else vecs[column_offset + i].startRollupStats(fs); for( int i = 0; i < column_count; i++ ) if (null == DKV.get(vecs[column_offset + i]._key)) Log.warn("For Frame: " + f._key + ", Vec number: " + (column_offset + i) + " (" + f.name((column_offset + i))+ ") is missing; not returning it."); else columns[i] = new ColV3(f._names[column_offset + i], vecs[column_offset + i], this.row_offset, this.row_count); fs.blockForPending(); this.is_text = f.numCols()==1 && vecs[0] instanceof ByteVec; this.default_percentiles = Vec.PERCENTILES; ChunkSummary cs = FrameUtils.chunkSummary(f); this.chunk_summary = new TwoDimTableV3(cs.toTwoDimTableChunkTypes()); this.distribution_summary = new TwoDimTableV3(cs.toTwoDimTableDistribution()); this._fr = f; return this; } public void clearBinsField() { for (ColV3 col: columns) if (col != null) col.clearBinsField(); } private abstract static class ColOp { abstract String op(ColV3 v); } private String rollUpStr(ColV3 c, double d) { return formatCell(c.domain!=null || "uuid".equals(c.type) || "string".equals(c.type) ? Double.NaN : d,null,c,4); } private String formatCell( double d, String str, ColV3 c, int precision ) { if (Double.isNaN(d)) return "-"; if (c.domain != null) return c.domain[(int) d]; if ("uuid".equals(c.type) || "string".equals(c.type)) { // UUID and String handling if (str == null) return "-"; return "<b style=\"font-family:monospace;\">" + str + "</b>"; } else { Chunk chk = c._vec.chunkForRow(row_offset); return PrettyPrint.number(chk, d, precision); } } }