package cookbook; import java.io.File; import org.junit.Test; import water.Key; import water.MRTask2; import water.fvec.Chunk; import water.fvec.Frame; import water.fvec.NFSFileVec; import water.fvec.ParseDataset2; import water.fvec.Vec; import water.util.Log; /* * This example fills na's in a column with the column mean by traversing over each column in the map reduce call * and adding the new mean filled out columns to the existing frame */ public class FillNAsWithMeanDemo01 extends AbstractCookbook{ @Test public void frame_001() { //String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv"; //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit"; //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv"; String fileName = "./cookbookData/iris_withNA.csv"; File file = new File(fileName); Key fkey = NFSFileVec.make(file); Key okey = Key.make("iris.hex"); Frame fr; fr = ParseDataset2.parse(okey, new Key[]{fkey}); int len = fr.numCols(); for(int i=0; i<len; i++){ Vec vv = fr.vec(i); Vec output = vv.makeZero(); // creating a new vector same as original vector filled with zeros FillNasWithMean lr1 = new FillNasWithMean(vv.mean()).doAll(vv, output);// map reduce call fr.add("FilledNa"+i,output ); // adding the vector to the original frame } Log.info("frame : " + fr); //logThisH2OInstanceWebBrowserAddress(); //sleepForever(); Frame.delete(okey); } public static class FillNasWithMean extends MRTask2<FillNasWithMean>{ final double _meanX; FillNasWithMean( double meanX ) { _meanX = meanX; } @Override public void map( Chunk xs, Chunk ns) { for( int l=0; l<xs._len; l++ ) { double X = xs.at0(l); if( xs.isNA0(l)) { X = _meanX; ns.set0(l,_meanX); } else{ ns.set0(l,X); } } } } }