package water.api; import hex.NFoldFrameExtractor; import water.*; import water.fvec.Frame; import water.util.Utils; public class NFoldFrameExtractPage extends Func { static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code. @API(help = "Data frame", required = true, filter = Default.class) public Frame source; @API(help = "N-fold split", required = true, filter = Default.class, lmin=0) public int nfolds = 10; @API(help = "Split to extract", required = true, filter = Default.class, lmin=0) public int afold; @API(help = "Keys for each split partition.") public Key[] split_keys; @API(help = "Holds a number of rows per each output partition.") public long[] split_rows; @Override protected void init() throws IllegalArgumentException { super.init(); if (nfolds > source.numRows()) throw new IllegalArgumentException("Cannot provide more folds than number of rows in dataset!"); if (afold >= nfolds) throw new IllegalArgumentException("Request fold ("+afold+") is greater than number of folds ("+nfolds+")!"); } @Override protected void execImpl() { NFoldFrameExtractor extractor = new NFoldFrameExtractor(source, nfolds, afold, null, null); H2O.submitTask(extractor); Frame[] splits = extractor.getResult(); split_keys = new Key [splits.length]; split_rows = new long[splits.length]; long sum = 0; for(int i=0; i<splits.length; i++) { sum += splits[i].numRows(); split_keys[i] = splits[i]._key; split_rows[i] = splits[i].numRows(); } assert sum == source.numRows() : "Frame split produced wrong number of rows: nrows(source) != sum(nrows(splits))"; } @Override public boolean toHTML(StringBuilder sb) { int nsplits = split_keys.length; String [] headers = new String[nsplits+2]; headers[0] = ""; for(int i=0; i<nsplits; i++) headers[i+1] = "Split #"+i; headers[nsplits+1] = "Total"; DocGen.HTML.arrayHead(sb, headers); // Key table row sb.append("<tr><td>").append(DocGen.HTML.bold("Keys")).append("</td>"); for (int i=0; i<nsplits; i++) { Key k = split_keys[i]; sb.append("<td>").append(Inspect2.link(k)).append("</td>"); } sb.append("<td>").append(Inspect2.link(source._key)).append("</td>"); sb.append("</tr>"); // Number of rows row sb.append("<tr><td>").append(DocGen.HTML.bold("Rows")).append("</td>"); for (int i=0; i<nsplits; i++) { long r = split_rows[i]; sb.append("<td>").append(String.format("%,d", r)).append("</td>"); } sb.append("<td>").append(String.format("%,d", Utils.sum(split_rows))).append("</td>"); sb.append("</tr>"); DocGen.HTML.arrayTail(sb); return true; } }