package water.fvec; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.junit.*; import water.*; public class NewVectorTest extends TestUtil { static final double EPSILON = 1e-6; @BeforeClass public static void stall() { stall_till_cloudsize(3); } private void testImpl( long[] ls, int[] xs, Class C, boolean hasFloat ) { int [] id = new int[xs.length]; for(int i = 0; i < xs.length; ++i)id[i] = i; testImpl(ls,xs,id,C,hasFloat); } private void testImpl( long[] ls, int[] xs, int [] id, Class C, boolean hasFloat ) { AppendableVec av = new AppendableVec(Vec.newKey()); NewChunk nv = new NewChunk(av,0); nv._ls = ls; nv._id = id; nv._xs = xs; nv._len= nv._sparseLen = ls.length; Chunk bv = nv.compress(); Futures fs = new Futures(); bv._vec = av.close(fs); fs.blockForPending(); // Compression returns the expected compressed-type: assertTrue( "Found chunk class "+bv.getClass()+" but expected "+C, C.isInstance(bv) ); assertEquals( hasFloat, bv.hasFloat() ); // Also, we can decompress correctly for( int i=0; i<ls.length; i++ ) if(ls[i] == Long.MAX_VALUE && xs[i] == Integer.MIN_VALUE) assertTrue(bv.isNA0(i)); else assertEquals(ls[i]*PrettyPrint.pow10(xs[i]), bv.at0(i), bv.at0(i)*EPSILON); UKV.remove(av._key); } // Test that various collections of parsed numbers compress as expected. @Test public void testCompression() { // A simple no-compress testImpl(new long[] {120, 12,120}, new int [] { 0, 1, 0}, C0LChunk.class,false); // A simple no-compress testImpl(new long[] {122, 3,44}, new int [] { 0, 0, 0}, C1NChunk.class,false); // A simple compressed boolean vector testImpl(new long[] {1, 0, 1}, new int [] {0, 0, 0}, CBSChunk.class,false); testImpl(new long[] {0,128,255}, // 12.2, -3.0, 4.4 ==> 122e-1, -30e-1, 44e-1 new int [] {0,0,0}, C1NChunk.class, false); // Scaled-byte compression testImpl(new long[] {0,Long.MAX_VALUE,128,254}, // 12.2, -3.0, 4.4 ==> 122e-1, -30e-1, 44e-1 new int [] {0,Integer.MIN_VALUE,0,0}, C1Chunk.class, false); testImpl(new long[] {0,Long.MAX_VALUE,128,255}, // 12.2, -3.0, 4.4 ==> 122e-1, -30e-1, 44e-1 new int [] {0,Integer.MIN_VALUE,0,0}, C2Chunk.class, false); testImpl(new long[] {0,Long.MAX_VALUE,128,255, Short.MAX_VALUE-1}, // 12.2, -3.0, 4.4 ==> 122e-1, -30e-1, 44e-1 new int [] {0,Integer.MIN_VALUE,0,0,0}, C2Chunk.class, false); // Scaled-byte compression testImpl(new long[] {122,-3,44}, // 12.2, -3.0, 4.4 ==> 122e-1, -30e-1, 44e-1 new int [] { -1, 0,-1}, C1SChunk.class, true); // Positive-scale byte compression testImpl(new long[] {0,10,254}, // 1000, 2000, 3000 ==> 1e3, 2e3, 3e3 new int [] { 0, 0, 0}, C1NChunk.class,false); // Positive-scale byte compression testImpl(new long[] {1000,200,30}, // 1000, 2000, 3000 ==> 1e3, 2e3, 3e3 new int [] { 0, 1, 2}, C1SChunk.class,false); // A simple no-compress short testImpl(new long[] {1000,200,32767, -32767,32}, new int [] { 0, 1, 0, 0, 3}, C2Chunk.class,false); // Scaled-byte compression testImpl(new long[] {50100,50101,50123,49999}, // 50100, 50101, 50123, 49999 new int [] { 0, 0, 0, 0}, C1SChunk.class,false); // Scaled-byte compression testImpl(new long[] {51000,50101,50123,49999}, // 51000, 50101, 50123, 49999 new int [] { 0, 0, 0, 0}, C2SChunk.class,false); // Scaled-short compression testImpl(new long[] {501000,501001,50123,49999}, // 50100.0, 50100.1, 50123, 49999 new int [] { -1, -1, 0, 0}, C2SChunk.class, true); // Integers testImpl(new long[] {123456,2345678,34567890}, new int [] { 0, 0, 0}, C4Chunk.class,false); // // Floats testImpl(new long[] {1234,2345,314}, new int [] { -1, -5, -2}, C4SChunk.class, true); // Doubles testImpl(new long[] {1234,2345678,31415}, new int [] { 40, 10, -40}, C8DChunk.class, true); testImpl(new long[] {-581504,-477862,342349}, new int[] {-5,-18,-5}, C8DChunk.class,true); } // Testing writes to an existing Chunk causing inflation @Test public void testWrites() { Key key = Vec.newKey(); AppendableVec av = new AppendableVec(key); NewChunk nv = new NewChunk(av,0); nv._ls = new long[]{0,0,0,0}; // A 4-row chunk nv._xs = new int []{0,0,0,0}; nv._len= nv._sparseLen = nv._ls.length; long [] ls = nv._ls; nv.close(0,null); Futures fs = new Futures(); Vec vec = av.close(fs); fs.blockForPending(); assertEquals( nv._len, vec.length() ); // Compression returns the expected constant-compression-type: Chunk c0 = vec.chunkForChunkIdx(0); assertTrue( "Found chunk class "+c0.getClass()+" but expected C0LChunk", c0 instanceof C0LChunk ); assertEquals( false, c0.hasFloat() ); // Also, we can decompress correctly for( int i=0; i<ls.length; i++ ) assertEquals(0, c0.at0(i), c0.at0(i)*EPSILON); // Now write a zero into slot 0 vec.set(0,0); assertEquals(0,vec.at8(0)); Chunk c1 = vec.chunkForChunkIdx(0); assertTrue( "Found chunk class "+c1.getClass()+" but expected C0LChunk", c1 instanceof C0LChunk ); // Now write a one into slot 1; chunk should inflate into boolean vector. vec.set(1,1); assertEquals(1,vec.at8(1)); // Immediate visibility in current thread Chunk c2 = vec.chunkForChunkIdx(0); // Look again at the installed chunk assertTrue( "Found chunk class "+c2.getClass()+" but expected CBSChunk", c2 instanceof CBSChunk ); // Now write a two into slot 2; chunk should inflate into byte vector vec.set(2,2); assertEquals(2,vec.at8(2)); // Immediate visibility in current thread Chunk c3 = vec.chunkForChunkIdx(0); // Look again at the installed chunk assertTrue( "Found chunk class "+c3.getClass()+" but expected C1NChunk", c3 instanceof C1NChunk ); vec.set(3,3); assertEquals(3,vec.at8(3)); // Immediate visibility in current thread Chunk c4 = vec.chunkForChunkIdx(0); // Look again at the installed chunk assertTrue( "Found chunk class "+c4.getClass()+" but expected C1NChunk", c4 instanceof C1NChunk ); // Now doing the same for multiple writes, close() only at the end for better speed Vec.Writer vw = vec.open(); vw.set(1,4); vw.set(2,5); vw.set(3,6); vw.close(); //update chunks in DKV assertEquals(4,vec.at8(1)); assertEquals(5,vec.at8(2)); assertEquals(6,vec.at8(3)); UKV.remove(av._key); } }