package water.fvec;
import org.junit.Assert;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import water.Futures;
import water.TestUtil;
import water.UKV;
import java.util.Arrays;
import java.util.Iterator;
/** Test for CBSChunk implementation.
*
* The objective of the test is to verify compression method, not the H2O environment.
*
* NOTE: The test is attempt to not require H2O infrastructure to run.
* It tries to use Mockito (perhaps PowerMock in the future) to wrap
* expected results. In this case expectation is little bit missused
* since it is used to avoid DKV call.
* */
public class CBSChunkTest extends TestUtil {
@BeforeClass public static void stall() { stall_till_cloudsize(1); }
void testImpl(long[] ls, int[] xs, int expBpv, int expGap, int expClen, int expNA) {
AppendableVec av = new AppendableVec(Vec.newKey());
Futures fs = new Futures();
Vec vv = av.close(fs);
fs.blockForPending();
// Create a new chunk
NewChunk nc = new NewChunk(av,0);
nc._ls = ls;
nc._xs = xs;
nc._len = nc._sparseLen = ls.length;
nc.type(); // Compute rollups, including NA
assertEquals(expNA, nc._naCnt);
// Compress chunk
Chunk cc = nc.compress();
assert cc instanceof CBSChunk;
cc._vec = av.close(fs);
fs.blockForPending();
assertTrue( "Found chunk class "+cc.getClass()+" but expected " + CBSChunk.class, CBSChunk.class.isInstance(cc) );
assertEquals(nc._len, cc._len);
assertEquals(expGap, ((CBSChunk)cc)._gap);
assertEquals(expBpv, ((CBSChunk)cc)._bpv);
assertEquals(expClen, cc._mem.length - CBSChunk.OFF);
// Also, we can decompress correctly
for( int i=0; i<ls.length; i++ )
if(xs[i]==0)assertEquals(ls[i], cc.at80(i));
else assertTrue(cc.isNA0(i));
UKV.remove(vv._key);
}
// Test one bit per value compression which is used
// for data without NAs
@Test @Ignore public void test1BPV() {
// Simple case only compressing into 4bits of one byte
testImpl(new long[] {0,0,0,1},
new int [] {0,0,0,0},
1, 4, 1, 0);
// Filling whole byte
testImpl(new long[] {1,0,0,0,1,1,1,0},
new int [] {0,0,0,0,0,0,0,0},
1, 0, 1, 0);
// Crossing the border of two bytes by 1bit
testImpl(new long[] {1,0,0,0,1,1,1,0, 1},
new int [] {0,0,0,0,0,0,0,0, 0},
1, 7, 2, 0);
}
// Test two bits per value compression used for case with NAs
// used for data containing NAs
@Test public void test2BPV() {
// Simple case only compressing 2*3bits into 1byte including 1 NA
testImpl(new long[] {0,Long.MAX_VALUE, 1},
new int [] {0,Integer.MIN_VALUE,0},
2, 2, 1, 1);
// Filling whole byte, one NA
testImpl(new long[] {1,Long.MAX_VALUE ,0,1},
new int [] {0,Integer.MIN_VALUE,0,0},
2, 0, 1, 1);
// crossing the border of two bytes by 4bits, one NA
testImpl(new long[] {1,0,Long.MAX_VALUE, 1, 0,0},
new int [] {0,0,Integer.MIN_VALUE,0, 0,0},
2, 4, 2, 1);
// Two full bytes, 5 NAs
testImpl(new long[] {Long.MAX_VALUE,Long.MAX_VALUE,Long.MAX_VALUE,1, 0,Long.MAX_VALUE,1,Long.MAX_VALUE},
new int [] {Integer.MIN_VALUE,Integer.MIN_VALUE,Integer.MIN_VALUE,0, 0,Integer.MIN_VALUE,0,Integer.MIN_VALUE},
2, 0, 2, 5);
}
@Test public void test_inflate_impl() {
for (int l=0; l<2; ++l) {
NewChunk nc = new NewChunk(null, 0);
int[] vals = new int[]{0, 1, 0, 1, 0, 0, 1};
if (l==1) nc.addNA();
for (int v : vals) nc.addNum(v);
nc.addNA();
Chunk cc = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc.len());
Assert.assertTrue(cc instanceof CBSChunk);
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at80(l+i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.at8(l+i));
Assert.assertTrue(cc.isNA0(vals.length+l));
Assert.assertTrue(cc.isNA(vals.length+l));
nc = new NewChunk(null, 0);
cc.inflate_impl(nc);
nc.values(0, nc.len());
Assert.assertEquals(vals.length+l+1, nc.sparseLen());
Assert.assertEquals(vals.length+l+1, nc.len());
Iterator<NewChunk.Value> it = nc.values(0, vals.length+1+l);
for (int i = 0; i < vals.length+1+l; ++i) Assert.assertTrue(it.next().rowId0() == i);
Assert.assertTrue(!it.hasNext());
if (l==1) {
Assert.assertTrue(nc.isNA0(0));
Assert.assertTrue(nc.isNA(0));
}
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.at80(l+i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.at8(l+i));
Assert.assertTrue(nc.isNA0(vals.length+l));
Assert.assertTrue(nc.isNA(vals.length+l));
Chunk cc2 = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc.len());
Assert.assertTrue(cc2 instanceof CBSChunk);
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.at80(l+i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.at8(l+i));
Assert.assertTrue(cc2.isNA0(vals.length + l));
Assert.assertTrue(cc2.isNA(vals.length + l));
Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
}
}
}