package water.fvec; import org.junit.*; import java.util.Arrays; import java.util.Random; import java.util.TreeSet; import water.TestUtil; import water.util.UnsafeUtils; /** * Created by tomasnykodym on 3/28/14. */ public class SparseTest extends TestUtil { @BeforeClass() public static void setup() { stall_till_cloudsize(1); } private static void test_at(Chunk c, double [] vals, int [] nzs_ary){ Random rnd = new Random(54321); // test atd for(int i =0 ; i < vals.length; ++i) { Assert.assertEquals(vals[i],c.atd(i),0); } for(int i =0 ; i < vals.length; ++i) { int j = rnd.nextInt(vals.length); Assert.assertEquals(vals[j],c.atd(j),0); } // test at8 for(int i =0 ; i < vals.length; ++i) { if(Double.isNaN(vals[i])){ Assert.assertTrue(c.isNA(i)); try{ c.at8(i); c.at8(i); Assert.assertFalse("should've thrown", true); } catch(RuntimeException rex){} } else { Assert.assertFalse(c.isNA(i)); Assert.assertEquals((long) vals[i], c.at8(i), 0); } } // test random access for(int i =0 ; i < vals.length; ++i) { int j = rnd.nextInt(vals.length); Assert.assertEquals(vals[j],c.atd(j),0); } } private static void test_next_nz(Chunk c, int len, int [] nzs_ary){ Random rnd = new Random(54321); // test next nz int x = -1; for(int i = 0; i < nzs_ary.length; ++i) { Assert.assertEquals(nzs_ary[i], c.nextNZ(x)); x = nzs_ary[i]; } Assert.assertEquals(len,c.nextNZ(x)); for(int i = 0; i < nzs_ary.length; ++i) { int j = rnd.nextInt(len); int k = Arrays.binarySearch(nzs_ary,j); if(k < 0) k = -k-1; else k = k +1; Assert.assertEquals(nzs_ary[k], c.nextNZ(j)); } } private static void test_get_doubles(Chunk c, double [] vals, int [] nzs_ary, boolean isSparse){ double [] x = new double[vals.length]; double NA = Double.MAX_VALUE; c.getDoubles(x,0,vals.length); Assert.assertArrayEquals(vals,x,0); Arrays.fill(x,0); c.getDoubles(x,0,vals.length,NA); for(int i =0 ; i < x.length; ++i) if(Double.isNaN(vals[i])){ Assert.assertEquals(NA,x[i],0); } else Assert.assertEquals(vals[i],x[i],0); // test sparse doubles if(isSparse) { int[] ids = new int[x.length]; int nzs = c.getSparseDoubles(x, ids); Assert.assertEquals(nzs_ary.length, nzs); Assert.assertArrayEquals(nzs_ary, Arrays.copyOf(ids, nzs)); for (int i = 0; i < nzs; ++i) { Assert.assertEquals(vals[nzs_ary[i]], x[i], 0); } } } private static void test_extract_rows(Chunk c, double [] vals, int [] nzs_ary){ NewChunk nc = new NewChunk(null, 0); c.extractRows(nc,0,vals.length); Assert.assertEquals(vals.length , nc.len()); Chunk c2 = nc.compress(); Assert.assertTrue(Arrays.equals(c.asBytes(), c2.asBytes())); nc = new NewChunk(null, 0); c.extractRows(nc,128,512); NewChunk nc2 = new NewChunk(null, 0); for(int i = 128; i < 512; i++) nc2.addNum(vals[i]); c2 = nc.compress(); Chunk c3 = nc2.compress(); Assert.assertTrue(Arrays.equals(c3.asBytes(), c2.asBytes())); int [] ids = new int[vals.length]; int k = 0; int l = 0; for(int i = 0; i < ids.length; i += 8) { while(l < nzs_ary.length && nzs_ary[l] < i) ids[k++] = nzs_ary[l++]; if(l < nzs_ary.length && nzs_ary[l] == i) ids[k++] = nzs_ary[l++]; else ids[k++] = i; } ids = Arrays.copyOf(ids,k); nc = new NewChunk(null,0); nc2 = new NewChunk(null,0); c.extractRows(nc,ids); for(int i = 0; i < ids.length; i++){ nc2.addNum(vals[ids[i]]); } c2 = nc.compress(); c3 = nc2.compress(); Assert.assertTrue(Arrays.equals(c2.asBytes(), c3.asBytes())); } public static Chunk makeAndTestSparseChunk(Class clz, double [] vals, int [] nzs_ary, boolean isNA, int off){ NewChunk nc = new NewChunk(null,0); nc.addZeros(off); for(int i = 0; i < vals.length; ++i) nc.addNum(vals[i]); Chunk c = nc.compress(); nzs_ary = nzs_ary.clone(); for(int i =0; i < nzs_ary.length; ++i) nzs_ary[i] += off; Assert.assertTrue(clz.isInstance(c)); if(isNA){ Assert.assertTrue(c.isSparseNA()); Assert.assertFalse(c.isSparseZero()); Assert.assertEquals(nzs_ary.length,c.sparseLenNA()); Assert.assertEquals(vals.length+off,c.sparseLenZero()); } else { Assert.assertTrue(c.isSparseZero()); Assert.assertFalse(c.isSparseNA()); Assert.assertEquals(nzs_ary.length,c.sparseLenZero()); Assert.assertEquals(vals.length+off,c.sparseLenNA()); } // just test nzs test_next_nz(c,vals.length+off,nzs_ary); return c; } public static Chunk makeAndTestSparseChunk(Class clz, double [] vals, int [] nzs_ary, boolean isNA){ return makeAndTestSparseChunk(clz,vals,nzs_ary,isNA,true); } public static Chunk makeAndTestSparseChunk(Class clz, double [] vals, int [] nzs_ary, boolean isNA, boolean isSparse){ NewChunk nc = new NewChunk(null,0); for(int i = 0; i < vals.length; ++i) nc.addNum(vals[i]); Chunk c = nc.compress(); Assert.assertTrue(clz.isInstance(c)); if(isSparse) { if (isNA) { Assert.assertEquals(isSparse, c.isSparseNA()); Assert.assertFalse(c.isSparseZero()); Assert.assertEquals(nzs_ary.length, c.sparseLenNA()); Assert.assertEquals(vals.length, c.sparseLenZero()); } else { Assert.assertEquals(isSparse, c.isSparseZero()); Assert.assertFalse(c.isSparseNA()); Assert.assertEquals(nzs_ary.length, c.sparseLenZero()); Assert.assertEquals(vals.length, c.sparseLenNA()); } } test_at(c,vals,nzs_ary); if(isSparse) test_next_nz(c,vals.length,nzs_ary); test_extract_rows(c,vals,nzs_ary); test_get_doubles(c,vals,nzs_ary,isSparse); return c; } @Test public void doChunkTest() { double [] binary_vals = new double[1024]; double [] valsZeroSmall; double [] valsZero; double [] valsNA; double [] float_vals; double [] double_vals; int [] nzs_ary; stall_till_cloudsize(1); valsZeroSmall = new double[1024]; valsZero = new double[1024]; valsNA = new double[1024]; double [] valsNASmall = new double[1024]; double [] valsBig = new double[1024]; double [] valsNABig = new double[1024]; float_vals = new double[1024]; double_vals = new double[1024]; double [] float_vals_na = new double[1024]; double [] double_vals_na = new double[1024]; Arrays.fill(float_vals_na,Double.NaN); Arrays.fill(double_vals_na,Double.NaN); Arrays.fill(valsNA,Double.NaN); Arrays.fill(valsNASmall,Double.NaN); Arrays.fill(valsNABig,Double.NaN); Random rnd = new Random(54321); TreeSet<Integer> nzs = new TreeSet<>(); for(int i = 0; i < 96; i++) { int x = rnd.nextInt(valsZero.length); if(nzs.add(x)) { binary_vals[x] = 1; valsNA[x] = rnd.nextDouble() < .95?rnd.nextInt():0; valsZero[x] = rnd.nextDouble() < .95?rnd.nextInt():Double.NaN; valsZeroSmall[x] = rnd.nextDouble() < .95?(rnd.nextInt(60000)-30000):Double.NaN; valsNASmall[x] = rnd.nextDouble() < .95?(rnd.nextInt(60000)-30000):0; valsBig[x] = rnd.nextDouble() < .95?((double)(long)(rnd.nextDouble()*Long.MAX_VALUE)):Double.NaN; valsNABig[x] = rnd.nextDouble() < .95?((double)(long)(rnd.nextDouble()*Long.MAX_VALUE)):0; float_vals[x] = rnd.nextDouble() < .95?rnd.nextFloat():Double.NaN; double_vals[x] = rnd.nextDouble() < .95?rnd.nextDouble():Double.NaN; float_vals_na[x] = rnd.nextDouble() < .95?rnd.nextFloat():0; double_vals_na[x] = rnd.nextDouble() < .95?rnd.nextDouble():0; } } nzs_ary = new int[nzs.size()]; int k = 0; for(Integer i:nzs) nzs_ary[k++] = i; CXIChunk binaryChunk = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,binary_vals,nzs_ary,false); Assert.assertEquals(2,binaryChunk._elem_sz); CXIChunk binaryChunkLong = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,binary_vals,nzs_ary,false,1<<20); Assert.assertEquals(4,binaryChunkLong._elem_sz); SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsZero,nzs_ary,false); SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsNA,nzs_ary,true); CXIChunk smallZero = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsZeroSmall,nzs_ary,false); Assert.assertEquals(4,smallZero._elem_sz); CXIChunk smallZero2 = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsZeroSmall,nzs_ary,false,60000); Assert.assertEquals(4,smallZero2._elem_sz); CXIChunk smallZeroLong = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsZeroSmall,nzs_ary,false,1<<20); Assert.assertEquals(8,smallZeroLong._elem_sz); CXIChunk smallNA = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsNASmall,nzs_ary,true); Assert.assertEquals(4,smallNA._elem_sz); CXIChunk bigZero = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsBig,nzs_ary,false); Assert.assertEquals(12,bigZero._elem_sz); CXIChunk bigNAZero = (CXIChunk) SparseTest.makeAndTestSparseChunk(CXIChunk.class,valsNABig,nzs_ary,true); Assert.assertEquals(12,bigNAZero._elem_sz); CXFChunk floats = (CXFChunk) SparseTest.makeAndTestSparseChunk(CXFChunk.class,float_vals,nzs_ary,false); Assert.assertEquals(8,floats._elem_sz); CXFChunk doubles = (CXFChunk) SparseTest.makeAndTestSparseChunk(CXFChunk.class,double_vals,nzs_ary,false); Assert.assertEquals(12,doubles._elem_sz); CXFChunk floats_na = (CXFChunk) SparseTest.makeAndTestSparseChunk(CXFChunk.class,float_vals_na,nzs_ary,true); Assert.assertEquals(8,floats_na._elem_sz); CXFChunk doubles_na = (CXFChunk) SparseTest.makeAndTestSparseChunk(CXFChunk.class,double_vals_na,nzs_ary,true); Assert.assertEquals(12,doubles_na._elem_sz); } }