package water.fvec; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import water.*; import water.util.ArrayUtils; import water.util.FileUtils; import water.util.StringUtils; import java.io.File; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class FVecTest extends TestUtil { @BeforeClass public static void setup() { stall_till_cloudsize(1); } static final double EPSILON = 1e-6; public static Key makeByteVec(Key k, String... data) { byte [][] chunks = new byte[data.length][]; long [] espc = new long[data.length+1]; for(int i = 0; i < chunks.length; ++i){ chunks[i] = StringUtils.bytesOf(data[i]); espc[i+1] = espc[i] + data[i].length(); } Futures fs = new Futures(); Key key = Vec.newKey(); ByteVec bv = new ByteVec(key,Vec.ESPC.rowLayout(key,espc)); for(int i = 0; i < chunks.length; ++i){ Key chunkKey = bv.chunkKey(i); DKV.put(chunkKey, new Value(chunkKey,chunks[i].length,chunks[i],TypeMap.C1NCHUNK,Value.ICE),fs); } DKV.put(bv._key,bv,fs); Frame fr = new Frame(k,new String[]{"makeByteVec"},new Vec[]{bv}); DKV.put(k, fr, fs); fs.blockForPending(); return k; } /* Test that we actually fail on failures. :-) @Test public void testBlammo() { assertEquals(1, 2); } */ // ========================================================================== @Test public void testBasicCRUD() { // Make and insert a FileVec to the global store File file = FileUtils.locateFile("./smalldata/junit/cars.csv"); NFSFileVec nfs = NFSFileVec.make(file); int sum = ArrayUtils.sum(new ByteHisto().doAll(nfs)._x); assertEquals(file.length(),sum); nfs.remove(); } private static class ByteHisto extends MRTask<ByteHisto> { public int[] _x; // Count occurrences of bytes @Override public void map( Chunk bv ) { _x = new int[256]; // One-time set histogram array for( int i=0; i< bv._len; i++ ) _x[(int)bv.atd(i)]++; } // ADD together all results @Override public void reduce( ByteHisto bh ) { ArrayUtils.add(_x,bh._x); } } // ========================================================================== @Test public void testSet() { Frame fr = null; try { fr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip"); double[] mins =new double[fr.numCols()]; for (int i=0; i < mins.length; i++) mins[i] = fr.vecs()[i].min(); // Scribble into a freshly parsed frame new SetDoubleInt(mins).doAll(fr); } finally { if( fr != null ) fr.delete(); } } static class SetDoubleInt extends MRTask { final double _mins[]; public SetDoubleInt(double [] mins) {_mins = mins;} @Override public void map( Chunk chks[] ) { Chunk c=null; int i; for(i=0; i < chks.length; i++) { if( chks[i].getClass()==water.fvec.C2Chunk.class ) { c=chks[i]; break; } } Assert.assertNotNull("Expect to find a C2Chunk", c); assertTrue(c._vec.writable()); double d=_mins[i]; for(i=0; i< c._len; i++ ) { double e = c.atd(i); c.set(i, d); d=e; } } } // ========================================================================== // Test making a appendable vector from a plain vector @Test public void testNewVec() { // Make and insert a File8Vec to the global store NFSFileVec nfs = TestUtil.makeNfsFileVec("./smalldata/junit/cars.csv"); Vec res = new TestNewVec().doAll(new byte[]{Vec.T_NUM},nfs).outputFrame(new String[]{"v"},new String[][]{null}).anyVec(); assertEquals(nfs.at8(0)+1,res.at8(0)); assertEquals(nfs.at8(1)+1,res.at8(1)); assertEquals(nfs.at8(2)+1,res.at8(2)); nfs.remove(); res.remove(); } private static class TestNewVec extends MRTask<TestNewVec> { @Override public void map( Chunk in, NewChunk out ) { for( int i=0; i< in._len; i++ ) out.addNum( in.at8_abs(i)+(in.at8_abs(i) >= ' ' ? 1 : 0),0); } } // ========================================================================== @Test public void testParse2() { Frame fr = null; Vec vz = null; try { fr = parse_test_file("smalldata/junit/syn_2659x1049.csv.gz"); assertEquals(fr.numCols(),1050); // Count of columns assertEquals(fr.numRows(),2659); // Count of rows double[] sums = new Sum().doAll(fr)._sums; assertEquals(3949,sums[0],EPSILON); assertEquals(3986,sums[1],EPSILON); assertEquals(3993,sums[2],EPSILON); // Create a temp column of zeros Vec v0 = fr.vecs()[0]; Vec v1 = fr.vecs()[1]; vz = v0.makeZero(); // Add column 0 & 1 into the temp column new PairSum().doAll(vz,v0,v1); // Add the temp to frame // Now total the temp col fr.delete(); // Remove all other columns fr = new Frame(Key.<Frame>make(), new String[]{"tmp"}, new Vec[]{vz}); // Add just this one sums = new Sum().doAll(fr)._sums; assertEquals(3949+3986,sums[0],EPSILON); } finally { if( vz != null ) vz.remove(); if( fr != null ) fr.delete(); } } // Sum each column independently private static class Sum extends MRTask<Sum> { double _sums[]; @Override public void map( Chunk[] bvs ) { _sums = new double[bvs.length]; int len = bvs[0]._len; for( int i=0; i<len; i++ ) for( int j=0; j<bvs.length; j++ ) _sums[j] += bvs[j].atd(i); } @Override public void reduce( Sum mrt ) { ArrayUtils.add(_sums, mrt._sums); } } // Simple vector sum C=A+B private static class PairSum extends MRTask<Sum> { @Override public void map( Chunk out, Chunk in1, Chunk in2 ) { for( int i=0; i< out._len; i++ ) out.set(i, in1.at8(i) + in2.at8(i)); } } @Test public void testRollups() { // Frame fr = null; // try { Key rebalanced = Key.make("rebalanced"); Vec v = null; Frame fr = null; try { v = Vec.makeVec(new double[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, Vec.newKey()); Futures fs = new Futures(); assertEquals(0, v.min(), 0); assertEquals(9, v.max(), 0); assertEquals(4.5,v.mean(),1e-8); H2O.submitTask(new RebalanceDataSet(new Frame(v), rebalanced, 10)).join(); fr = DKV.get(rebalanced).get(); Vec v2 = fr.anyVec(); assertEquals(0, v2.min(), 0); assertEquals(9, v2.max(), 0); assertEquals(4.5, v.mean(), 1e-8); v2.set(5, -100); assertEquals(-100, v2.min(), 0); v2.set(5, 5); // make several rollups requests in parallel with and without histo and then get histo v2.startRollupStats(fs); v2.startRollupStats(fs); v2.startRollupStats(fs,true); assertEquals(0, v2.min(), 0); long [] bins = v2.bins(); assertEquals(10,bins.length); // TODO: should test percentiles? for(long l:bins) assertEquals(1,l); Vec.Writer w = v2.open(); try { v2.min(); assertTrue("should have thrown IAE since we're requesting rollups while changing the Vec (got Vec.Writer)",false); // fail - should've thrown } catch( IllegalArgumentException ie ) { // if on local node can get iae directly } catch( RuntimeException re ) { assertTrue(re.getCause() instanceof IllegalArgumentException); // expect to get IAE since we're requesting rollups while also changing the vec } w.close(fs); fs.blockForPending(); assertEquals(0,v2.min(),0); fr.delete(); v.remove(); fr = null; } finally { if( v != null)v.remove(); if(fr != null)fr.delete(); } } // The rollups only compute approximate quantiles, not exact. @Test public void test50pct() { Vec vec = null; try { double[] d = new double[]{0.812834256224, 1.56386606237, 3.12702210880, 3.68417563302, 5.51277746586}; vec = Vec.makeVec(d,Vec.newKey()); double pct[] = vec.pctiles(); double eps = (vec.max()-vec.min())/1e-3; Assert.assertEquals(pct[0],d[0],eps); // 0.01 Assert.assertEquals(pct[1],d[0],eps); // 0.1 Assert.assertEquals(pct[2],d[0],eps); // 0.25 Assert.assertEquals(pct[3],d[1],eps); // 1/3 Assert.assertEquals(pct[4],d[2],eps); // 0.5 Assert.assertEquals(pct[5],d[2],eps); // 2/3 Assert.assertEquals(pct[6],d[3],eps); // 0.75 Assert.assertEquals(pct[7],d[4],eps); // 0.9 Assert.assertEquals(pct[8],d[4],eps); // 0.99 vec.remove(); d = new double[]{490,492,494,496,498}; vec = Vec.makeVec(d,Vec.newKey()); pct = vec.pctiles(); eps = (vec.max()-vec.min())/1e-3; System.out.println(java.util.Arrays.toString(pct)); Assert.assertEquals(pct[0],d[0],eps); // 0.01 } finally { if( vec != null ) vec.remove(); } } }