package cookbook;
import java.io.File;
import org.junit.Test;
import water.DKV;
import water.Futures;
import water.Key;
import water.MRTask2;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.NewChunk;
import water.fvec.ParseDataset2;
import water.fvec.Vec;
import water.util.Log;
/*
* This example fills na's in a column with the column mean and creates new columns
* by traversing over the original frame in the map reduce call
* and the new mean filled out columns are then added to a new data frame
*/
public class FillNAsWithMeanDemo03 extends AbstractCookbook {
@Test
public void frame_001() {
// String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
//String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
//String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Frame f = DKV.get(okey).get();
int len = f.numCols();
Vec vv[] = f.vecs();
double[] arrayofMeans = new double[len];
for (int i = 0; i < len; i++)
arrayofMeans[i] = vv[i].mean();
FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f); // map reduce call
Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
Futures fs = new Futures();
Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(),fs); //new frame
fs.blockForPending();
DKV.put(fk,outputFrame,fs); //puts the new frame in the KV store
fs.blockForPending();
Log.info(" new output frame : " + outputFrame);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
Frame.delete(okey);
outputFrame.delete();
}
public static class FillNasWithMean extends MRTask2<FillNasWithMean> {
final double[] _meanX;
FillNasWithMean(double[] meanX) {
_meanX = meanX;
}
@Override
public void map(Chunk[] xs, NewChunk[] ns) {
for (int j = 0; j < xs.length; j++) {
for (int l = 0; l < xs[j]._len; l++) {
if (xs[j].isNA0(l)) {
ns[j].addNum(_meanX[j]);
} else {
ns[j].addNum(xs[j].at0(l));
}
}
}
}
}
}