package cookbook;
import water.*;
import water.fvec.Frame;
import water.exec.Flow;
import water.util.Utils.*;
import hex.Summary2.SummaryPerRow;
// Called from FlowTest, and is NOT a JUnit - so instances of this class will
// be lazily generated, so we do not need any TypeMap ID's before the H2O is up.
public class Cookbook2 {
// Use a static method so all my anonymous inner classes do not carry a hidden
// link to a FlowTest object.
public static void basicStatic( Key k, Frame fr ) {
try {
final int cyl_idx = fr.find("cylinders");
final int year_idx = fr.find("year");
//final int cyl_idx = fr.find("C54"); // Works great for finding covtype - average elevation by class
//final int year_idx = fr.find("C0");
SumCol sumcols = fr.
with(new SumCol(year_idx)).
doit();
System.out.println(sumcols._sum+"/"+sumcols._n+" = "+(sumcols._sum/sumcols._n));
System.out.println();
SumCol sumCol = new SumCol(year_idx);
Flow.FlowPerRow<SumCol> flowPerRow = fr.with(sumCol);
SumCol result = flowPerRow.doit();
System.out.println("TOM: " + result._sum + " " + result._n);
SumCol sumcols1 = fr.
with(new Flow.Filter() { public boolean filter(double ds[]) { return ds[cyl_idx]!=5; } }).
with(new SumCol(year_idx)).
doit();
System.out.println(sumcols1._sum+"/"+sumcols1._n+" = "+(sumcols1._sum/sumcols1._n));
System.out.println();
// Run all the rollups in parallel before doing summary
Futures fs = new Futures();
for( int i=0; i<fr.numCols(); i++ )
fr.vecs()[i].rollupStats(fs);
fs.blockForPending();
IcedHashMap<IcedLong,SumCol> sumcols2 = fr.
with(new Flow.GroupBy() { public long groupId(double ds[]) { return (long)ds[cyl_idx];} }).
with(new SumCol(year_idx)).
doit();
for( IcedLong gid : sumcols2.keySet() ) {
SumCol sumcol = sumcols2.get(gid);
System.out.println("Cyl="+gid._val+", "+sumcol._sum+"/"+sumcol._n+" = "+(sumcol._sum/sumcol._n));
}
System.out.println();
{
System.out.println("TOM ----- START");
class MyGroupBy extends Flow.GroupBy {
public long groupId(double ds[]) { return (long)ds[cyl_idx];}
}
SumCol sumCol10 = new SumCol(year_idx);
Flow.FlowGroupBy flowGroupBy = fr.with(new MyGroupBy());
Flow.FlowGroupPerRow flowGroupPerRow = flowGroupBy.with(sumCol10);
IcedHashMap<IcedLong,SumCol> hashMap = flowGroupPerRow.doit();
for( IcedLong gid : hashMap.keySet() ) {
SumCol sumcol = sumcols2.get(gid);
System.out.println("Cyl="+gid._val+", "+sumcol._sum+"/"+sumcol._n+" = "+(sumcol._sum/sumcol._n));
}
System.out.println("TOM ----- END");
System.out.println();
}
IcedHashMap<IcedLong,SumCol> sumcols3 = fr.
with(new Flow.Filter () { public boolean filter(double ds[]) { return ds[cyl_idx]!=5; } }).
with(new Flow.GroupBy() { public long groupId ( double ds[]) { return (long)ds[cyl_idx];} }).
with(new SumCol(year_idx)).
doit();
for( IcedLong gid : sumcols3.keySet() ) {
SumCol sumcol = sumcols3.get(gid);
System.out.println("Cyl="+gid._val+", "+sumcol._sum+"/"+sumcol._n+" = "+(sumcol._sum/sumcol._n));
}
System.out.println();
IcedHashMap<IcedLong,SumCol> sumcols4 = fr.
with(new Flow.GroupBy() { public long groupId(double ds[]) { return (long)ds[cyl_idx];} }).
with(new Flow.Filter() { public boolean filter(double ds[]) { return ds[cyl_idx]!=5; } }).
with(new SumCol(year_idx)).
doit();
for( IcedLong gid : sumcols4.keySet() ) {
SumCol sumcol = sumcols4.get(gid);
System.out.println("Cyl="+gid._val+", "+sumcol._sum+"/"+sumcol._n+" = "+(sumcol._sum/sumcol._n));
}
System.out.println();
// Percentiles
SummaryPerRow spr = fr.
with(new SummaryPerRow(fr)).
doit();
spr.finishUp();
System.out.println(spr);
System.out.println();
// Percentiles per-Group
IcedHashMap<IcedLong,SummaryPerRow> sprs = fr.
with(new Flow.GroupBy() { public long groupId(double ds[]) { return (long)ds[cyl_idx];} }).
with(new SummaryPerRow(fr)).
doit();
for( IcedLong gid : sprs.keySet() ) {
SummaryPerRow spr2 = sprs.get(gid);
spr2.finishUp();
System.out.println("Group ID="+gid._val);
System.out.println(spr2);
System.out.println();
}
System.out.println();
} finally {
UKV.remove(k);
}
}
public static class SumCol extends Flow.PerRow<SumCol> {
final int _col_idx;
double _sum, _n;
SumCol( int col_idx ) { _col_idx = col_idx; }
@Override public void mapreduce( double ds[] ) { _sum += ds[_col_idx]; _n++; }
@Override public void reduce( SumCol that ) { _sum += that._sum; _n += that._n; }
@Override public SumCol make() { return new SumCol(_col_idx); }
}
}