package water.fvec;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.Futures;
import water.Scope;
import water.TestUtil;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeSet;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/** Test for CBSChunk implementation.
*
* The objective of the test is to verify compression method, not the H2O environment.
*
* NOTE: The test is attempt to not require H2O infrastructure to run.
* It tries to use Mockito (perhaps PowerMock in the future) to wrap
* expected results. In this case expectation is little bit missused
* since it is used to avoid DKV call.
* */
public class CBSChunkTest extends TestUtil {
@BeforeClass() public static void setup() { stall_till_cloudsize(1); }
void testImpl(long[] ls, int[] xs, int expBpv, int expGap, int expClen, int expNA) {
AppendableVec av = new AppendableVec(Vec.newKey(), Vec.T_NUM);
// Create a new chunk
NewChunk nc = new NewChunk(av,0, ls, xs, null, null);
for(int i = 0; i < ls.length; ++i)
if(ls[i] == Long.MIN_VALUE)
nc.setNA_impl(i);
nc.type(); // Compute rollups, including NA
assertEquals(expNA, nc.naCnt());
// Compress chunk
Chunk cc = nc.compress();
assert cc instanceof CBSChunk;
Futures fs = new Futures();
cc._vec = av.layout_and_close(fs);
fs.blockForPending();
Assert.assertTrue("Found chunk class " + cc.getClass() + " but expected " + CBSChunk.class, CBSChunk.class.isInstance(cc));
assertEquals(nc._len, cc._len);
assertEquals(expBpv, ((CBSChunk)cc).bpv());
assertEquals(expGap, ((CBSChunk)cc).gap());
assertEquals(expClen, cc._mem.length - CBSChunk._OFF);
// Also, we can decompress correctly
for( int i=0; i<ls.length; i++ )
if(ls[i]!=Long.MIN_VALUE)assertEquals(ls[i], cc.at8(i));
else assertTrue(cc.isNA(i));
// materialize the vector (prerequisite to free the memory)
Vec vv = av.layout_and_close(fs);
fs.blockForPending();
vv.remove();
}
@Test
public void testSet(){
Scope.enter();
// with NAs
double [] x = new double[]{0,1,Double.NaN};
double [] vals = new double[1024];
Random rnd = new Random(54321);
for(int i = 0; i < vals.length; ++i)
vals[i] = x[rnd.nextInt(3)];
Chunk c = Vec.makeVec(vals, Vec.VectorGroup.VG_LEN1.addVec()).chunkForChunkIdx(0);
Chunk c2 = c.deepCopy();
c2._vec = c._vec;
assertTrue(c instanceof CBSChunk);
for(int i = 0; i < vals.length; ++i) {
assertEquals(vals[i], c.atd(i), 0);
assertEquals(vals[i], c2.atd(i), 0);
}
for(int i = 0; i < vals.length; ++i) {
c.set(i, vals[i] = x[rnd.nextInt(3)]);
if(Double.isNaN(vals[i]))c2.setNA_impl(i); else c2.set(i, (long)vals[i]);
}
for(int i = 0; i < vals.length; ++i) {
assertEquals(vals[i], c.atd(i), 0);
assertEquals(vals[i], c2.atd(i), 0);
}
// without NAS
for(int i = 0; i < vals.length; ++i)
vals[i] = x[rnd.nextInt(2)];
c = Vec.makeVec(vals, Vec.VectorGroup.VG_LEN1.addVec()).chunkForChunkIdx(0);
c2 = c.deepCopy();
c2._vec = c._vec;
assertTrue(c instanceof CBSChunk);
for(int i = 0; i < vals.length; ++i)
assertEquals(vals[i],c.atd(i),0);
for(int i = 0; i < vals.length; ++i) {
c.set(i, vals[i] = x[rnd.nextInt(2)]);
c2.set(i, (long)vals[i]);
}
for(int i = 0; i < vals.length; ++i) {
assertEquals(vals[i], c.atd(i), 0);
assertEquals(vals[i], c2.at8(i), 0);
}
// set some NAs
int i = vals.length >> 2;
int j = vals.length >> 1;
c.setNA(i);
c.set(j,Double.NaN);
vals[j] = Double.NaN;
vals[i] = Double.NaN;
Assert.assertTrue(c.isNA(i));
Assert.assertTrue(c.isNA(j));
for(int k = 0; k < vals.length; ++k) {
assertEquals(vals[k], c.atd(k), 0);
}
Scope.exit();
}
// Test one bit per value compression which is used
// for data without NAs
@Test public void test1BPV() {
// Simple case only compressing into 4bits of one byte
testImpl(new long[] {1,0,1,1},
new int [] {0,0,0,0},
1, 4, 1, 0);
// Filling whole byte
testImpl(new long[] {1,0,0,0,1,1,1,0},
new int [] {0,0,0,0,0,0,0,0},
1, 0, 1, 0);
// Crossing the border of two bytes by 1bit
testImpl(new long[] {1,0,0,0,1,1,1,0, 1},
new int [] {0,0,0,0,0,0,0,0, 0},
1, 7, 2, 0);
}
// Test two bits per value compression used for case with NAs
// used for data containing NAs
@Test public void test2BPV() {
// Simple case only compressing 2*3bits into 1byte including 1 NA
testImpl(new long[] {0,Long.MIN_VALUE, 1},
new int [] {0,0,0},
2, 2, 1, 1);
// Filling whole byte, one NA
testImpl(new long[] {1,Long.MIN_VALUE ,0,1},
new int [] {0,0,0,0},
2, 0, 1, 1);
// crossing the border of two bytes by 4bits, one NA
testImpl(new long[] {1,0,Long.MIN_VALUE, 1, 0,0},
new int [] {0,0,0,0, 0,0},
2, 4, 2, 1);
// Two full bytes, 5 NAs
testImpl(new long[] {Long.MIN_VALUE,Long.MIN_VALUE,Long.MIN_VALUE,1, 0,Long.MIN_VALUE,1,Long.MIN_VALUE},
new int [] {0,0,0,0, 0,0,0,0},
2, 0, 2, 5);
}
@Test public void test_inflate_impl() {
for (int l=0; l<2; ++l) {
NewChunk nc = new NewChunk(null, 0);
int[] vals = new int[]{0, 1, 0, 1, 0, 0, 1};
if (l==1) nc.addNA();
for (int v : vals) nc.addNum(v);
nc.addNA();
int len = nc.len();
Chunk cc = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc._len);
Assert.assertTrue(cc instanceof CBSChunk);
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8_abs(l + i));
Assert.assertTrue(cc.isNA(vals.length + l));
Assert.assertTrue(cc.isNA_abs(vals.length + l));
nc = new NewChunk(null, 0);
cc.extractRows(nc, 0,len);
Assert.assertEquals(vals.length+l+1, nc._sparseLen);
Assert.assertEquals(vals.length+l+1, nc._len);
if (l==1) {
Assert.assertTrue(nc.isNA(0));
Assert.assertTrue(nc.isNA_abs(0));
}
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.at8(l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.at8_abs(l + i));
Assert.assertTrue(nc.isNA(vals.length + l));
Assert.assertTrue(nc.isNA_abs(vals.length + l));
double[] densevals = new double[cc.len()];
cc.getDoubles(densevals,0,cc.len());
for (int i = 0; i < densevals.length; ++i) {
if (cc.isNA(i)) Assert.assertTrue(Double.isNaN(densevals[i]));
else Assert.assertTrue(cc.at8(i)==(int)densevals[i]);
}
Chunk cc2 = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc._len);
Assert.assertTrue(cc2 instanceof CBSChunk);
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.at8(l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.at8_abs(l + i));
Assert.assertTrue(cc2.isNA(vals.length + l));
Assert.assertTrue(cc2.isNA_abs(vals.length + l));
Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
}
}
@Test public void test_setNA() {
// Create a vec with one chunk with 15 elements, and set its numbers
water.Key key = Vec.newKey();
Vec vec = new Vec(key, Vec.ESPC.rowLayout(key,new long[]{0,15})).makeZero();
int[] vals = new int[]{0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1};
Vec.Writer w = vec.open();
for (int i =0; i<vals.length; ++i) w.set(i, vals[i]);
w.close();
Chunk cc = vec.chunkForChunkIdx(0);
assert cc instanceof CBSChunk;
Futures fs = new Futures();
fs.blockForPending();
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8_abs(i));
int[] NAs = new int[]{1, 5, 2};
int[] notNAs = new int[]{0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14};
for (int na : NAs) cc.setNA_abs(na);
for (int na : NAs) Assert.assertTrue(cc.isNA(na));
for (int na : NAs) Assert.assertTrue(cc.isNA_abs(na));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA_abs(notna));
NewChunk nc = new NewChunk(null, 0);
cc.extractRows(nc, 0,(int)vec.length());
Assert.assertEquals(vals.length, nc._sparseLen);
Assert.assertEquals(vals.length, nc._len);
for (int na : NAs) Assert.assertTrue(cc.isNA(na));
for (int na : NAs) Assert.assertTrue(cc.isNA_abs(na));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA_abs(notna));
Chunk cc2 = nc.compress();
Assert.assertEquals(vals.length, cc._len);
Assert.assertTrue(cc2 instanceof CBSChunk);
for (int na : NAs) Assert.assertTrue(cc.isNA(na));
for (int na : NAs) Assert.assertTrue(cc.isNA_abs(na));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA(notna));
for (int notna : notNAs) Assert.assertTrue(!cc.isNA_abs(notna));
Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
vec.remove();
}
@Test public void testSparseAndVisitorInterface(){
double [] vals = new double[1024];
double [] valsNA = new double[1024];
TreeSet<Integer> nzs = new TreeSet<>();
Random rnd = new Random(54321);
for(int i = 0; i < 512; i++) {
int x = rnd.nextInt(vals.length);
if(nzs.add(x)) {
vals[x] = 1;
valsNA[x] = rnd.nextDouble() < .95?1:Double.NaN;
}
}
int [] nzs_ary = new int[nzs.size()];
int k = 0;
for(Integer i:nzs)
nzs_ary[k++] = i;
SparseTest.makeAndTestSparseChunk(CBSChunk.class,vals,nzs_ary,false,false);
SparseTest.makeAndTestSparseChunk(CBSChunk.class,valsNA,nzs_ary,false,false);
}
}