package cookbook; import java.io.File; import java.util.Arrays; import org.junit.Test; import static org.junit.Assert.assertTrue; import water.Key; import water.MRTask2; import water.fvec.Chunk; import water.fvec.Frame; import water.fvec.NFSFileVec; import water.fvec.ParseDataset2; import water.fvec.Vec; import water.util.Log; public class VecDemo extends AbstractCookbook{ @Test public void Vec(){ String fileName = "./cookbookData/iris_withNA.csv"; File file = new File(fileName); Key fkey = NFSFileVec.make(file); Key okey = Key.make("iris.hex"); Frame fr; fr = ParseDataset2.parse(okey, new Key[]{fkey}); Vec vv = fr.vec(0); //accessing the first vector from the frame int loop_indx = 0; if(vv.length() > 4){ loop_indx = 3; } //READING AN ELEMENT FROM A VEC for(int i =0; i<loop_indx;i++){ long k =i; double elemnt_D = vv.at(k); // element at index k returns a double System.out.println("element at index " + k +" as double: "+elemnt_D); if(!Double.isNaN(elemnt_D)){ long elemnt_L = vv.at8(k); // element at index k returns an (rounded) int, throws if a value is missing System.out.println("element at index " + k +" as integer: "+elemnt_L); } } //TESTING WHETHER A VEC OF INTEGERS IS AN ENUM (AKA CATEGORICAL) OR NOT for(int i = 0; i<fr.numCols();i++){ Vec vvec = fr.vec(i); boolean b= vvec.isInt(); //check if int System.out.println("Is "+ i +" an integer column ?" +" "+b); if(b==true){ int cardinality = vvec.cardinality(); // check if enum if(cardinality !=-1){ System.out.println("The vector " + i +" is an enum with cardinality "+ cardinality+ " and domain names: "); //PRINTING THE LIST OF DOMAINS OF AN ENUM VEC (AKA LEVELS OF A CATEGORICAL VEC) for(int j = 0; j<cardinality; j++) System.out.println( vvec.domain(j) ); } } } //UPDATING AN ELEMENT OF A VEC /* This sets the value in a very slow way, because it takes the vector goes to the chunk that has * the row index, decompress it, updates the value and then compress it again */ for(int i = 0; i<loop_indx;i++){ long k = i; double d = 1.23; vv.set(k, d); // set element as double System.out.println("setting element at index " + k +" as double: "+vv.at(k)); float f = 1.23f; vv.set(k, f); // set element as float System.out.println("setting element at index " + k +" as float: "+vv.at(k)); long l = 12345678910L; vv.set(k, l); // set element as long System.out.println("setting element at index " + k +" as long: "+vv.at(k)); vv.setNA(k); // set element as na System.out.println("setting element at index " + k +" as NAN: "+vv.at(k)); } //UPDATING A VEC ELEMENT WITH AN ENUM VALUE THAT HAS NEVER BEEN USED BEFORE Vec vvenum = fr.vec(4); final String [] newDomain = new String[]{"x", "y", "z"}; vvenum.changeDomain(newDomain); System.out.println( "The changed domain names are: "); for(int i = 0; i<vvenum.cardinality(); i++) System.out.println( vvenum.domain(i) ); //fr.vec(4).changeDomain(newDomain); //ACCESSING VEC STATS THAT ARE COMPUTED AUTOMATICALLY (LIKE MIN, MAX) System.out.println("Min for vector 0: "+vv.min()); System.out.println( "Max for vector 0: "+vv.max()); System.out.println( "Mean for vector 0: "+vv.mean()); System.out.println( "Standard deviation for vector 0: "+vv.sigma()); System.out.println( "NA count for vector 0: "+vv.naCnt()); //logThisH2OInstanceWebBrowserAddress(); //sleepForever(); //CLEANING THE KV STORE OF ALL DATA Frame.delete(okey); } }