package cookbook; import java.io.File; import org.junit.Test; import water.DKV; import water.Key; import water.MRTask2; import water.fvec.Chunk; import water.fvec.Frame; import water.fvec.NFSFileVec; import water.fvec.ParseDataset2; import water.fvec.Vec; import water.util.Log; /* * This example fills na's in a column with the column mean and creates new columns * and add the new mean filled out columns to the original data frame that is passed * to the map reduce call with the required placeholder columns */ public class FillNAsWithMeanDemo02 extends AbstractCookbook { @Test public void frame_001() { String fileName = "./cookbookData/iris_withNA.csv"; //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv"; File file = new File(fileName); Key fkey = NFSFileVec.make(file); Key okey = Key.make("iris.hex"); Frame fr; fr = ParseDataset2.parse(okey, new Key[] { fkey }); Frame f = DKV.get(okey).get(); Log.info("frame : " + f); int len = f.numCols(); Vec vv[] = f.vecs(); double[] arrayofMeans = new double[len]; for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean(); // array of means to be passed as params to map reduce task Vec[] newVecs = vv[0].makeZeros(len); newVecs[4]._domain= vv[4]._domain; String[] newcolnames = {"1","2","3","4","5"}; Frame output = frame(newcolnames, newVecs); f.add(output, newcolnames); // the holder frame added to original frame FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(f); // map reduce call Log.info("frame : " + f); //logThisH2OInstanceWebBrowserAddress(); //sleepForever(); Frame.delete(okey); } public static class FillNasWithMean extends MRTask2<FillNasWithMean> { final double[] _meanX; FillNasWithMean(double[] meanX) { _meanX = meanX; } @Override public void map(Chunk[] xs) { for (int j = 0; j < xs.length/2; j++) { for (int l = 0; l < xs[j]._len; l++) { if (xs[j].isNA0(l)) { xs[j+xs.length/2].set0(l,_meanX[j]); // xs.set0(l, _meanX); // System.out.println("hello hello"); } else { xs[j+xs.length/2].set0(l,xs[j].at0(l)); } } } } } }