package water.fvec;
import static org.junit.Assert.*;
import java.io.File;
import org.junit.BeforeClass;
import org.junit.Test;
import water.*;
public class FVecTest extends TestUtil {
static final double EPSILON = 1e-6;
@BeforeClass public static void stall() { stall_till_cloudsize(2); }
public static Key makeByteVec(String kname, String... data) {
return makeByteVec(Key.make(kname), data);
}
public static Key makeByteVec(Key k, String... data) {
byte [][] chunks = new byte[data.length][];
long [] espc = new long[data.length+1];
for(int i = 0; i < chunks.length; ++i){
chunks[i] = data[i].getBytes();
espc[i+1] = espc[i] + data[i].length();
}
Futures fs = new Futures();
ByteVec bv = new ByteVec(Vec.newKey(),espc);
for(int i = 0; i < chunks.length; ++i){
Key chunkKey = bv.chunkKey(i);
DKV.put(chunkKey, new Value(chunkKey,chunks[i].length,chunks[i],TypeMap.C1NCHUNK,Value.ICE),fs);
}
DKV.put(bv._key,bv,fs);
Frame fr = new Frame(k,new String[]{"makeByteVec"},new Vec[]{bv});
DKV.put(k, fr, fs);
fs.blockForPending();
return k;
}
// ==========================================================================
@Test public void testBasicCRUD() {
// Make and insert a FileVec to the global store
File file = TestUtil.find_test_file("./smalldata/cars.csv");
Key key = NFSFileVec.make(file);
NFSFileVec nfs=DKV.get(key).get();
int[] x = new ByteHisto().doAll(nfs)._x;
int sum=0;
for( int i : x )
sum += i;
assertEquals(file.length(),sum);
UKV.remove(key);
}
public static class ByteHisto extends MRTask2<ByteHisto> {
public int[] _x;
// Count occurrences of bytes
@Override public void map( Chunk bv ) {
_x = new int[256]; // One-time set histogram array
for( int i=0; i<bv._len; i++ )
_x[(int)bv.at0(i)]++;
}
// ADD together all results
@Override public void reduce( ByteHisto bh ) { water.util.Utils.add(_x,bh._x); }
}
// ==========================================================================
@Test public void testSet() {
File file = TestUtil.find_test_file("./smalldata/airlines/allyears2k_headers.zip");
Key fkey = NFSFileVec.make(file);
Key dest = Key.make("air.hex");
Frame fr = ParseDataset2.parse(dest, new Key[]{fkey});
try {
// Scribble into a freshly parsed frame
new SetDoubleInt().doAll(fr);
} finally {
fr.delete();
}
}
static class SetDoubleInt extends MRTask2 {
@Override public void map( Chunk chks[] ) {
Chunk c=null;
for( Chunk x : chks )
if( x.getClass()==water.fvec.C2Chunk.class )
{ c=x; break; }
assertNotNull("Expect to find a C2Chunk",c);
assertTrue(c.writable());
double d=c._vec.min();
for( int i=0; i<c._len; i++ ) {
double e = c.at0(i);
c.set0(i,d);
d=e;
}
}
}
// ==========================================================================
// Test making a appendable vector from a plain vector
@Test public void testNewVec() {
// Make and insert a File8Vec to the global store
File file = TestUtil.find_test_file("./smalldata/cars.csv");
Key key = NFSFileVec.make(file);
NFSFileVec nfs=DKV.get(key).get();
Vec res = new TestNewVec().doAll(1,nfs).outputFrame(new String[]{"v"},new String[][]{null}).anyVec();
assertEquals(nfs.at8(0)+1,res.at8(0));
assertEquals(nfs.at8(1)+1,res.at8(1));
assertEquals(nfs.at8(2)+1,res.at8(2));
UKV.remove(key );
UKV.remove(res._key);
}
public static class TestNewVec extends MRTask2<TestNewVec> {
@Override public void map( Chunk in, NewChunk out ) {
for( int i=0; i<in._len; i++ )
out.append2( in.at8(i)+(in.at8(i) >= ' ' ? 1 : 0),0);
}
}
// ==========================================================================
@Test public void testParse2() {
File file = TestUtil.find_test_file("../smalldata/logreg/syn_2659x1049.csv");
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("syn.hex");
Frame fr = ParseDataset2.parse(okey,new Key[]{fkey});
Vec vz = null;
try {
assertEquals(fr.numCols(),1050); // Count of columns
assertEquals(fr.numRows(),2659); // Count of rows
double[] sums = new Sum().doAll(fr)._sums;
assertEquals(3949,sums[0],EPSILON);
assertEquals(3986,sums[1],EPSILON);
assertEquals(3993,sums[2],EPSILON);
// Create a temp column of zeros
Vec v0 = fr.vecs()[0];
Vec v1 = fr.vecs()[1];
vz = v0.makeZero();
// Add column 0 & 1 into the temp column
new PairSum().doAll(vz,v0,v1);
// Add the temp to frame
// Now total the temp col
fr.delete(); // Remove all other columns
fr = new Frame(new String[]{"tmp"},new Vec[]{vz}); // Add just this one
sums = new Sum().doAll(fr)._sums;
assertEquals(3949+3986,sums[0],EPSILON);
} finally {
if( vz != null ) UKV.remove(vz._key);
fr.delete();
}
}
// Sum each column independently
private static class Sum extends MRTask2<Sum> {
double _sums[];
@Override public void map( Chunk[] bvs ) {
_sums = new double[bvs.length];
int len = bvs[0]._len;
for( int i=0; i<len; i++ )
for( int j=0; j<bvs.length; j++ )
_sums[j] += bvs[j].at0(i);
}
@Override public void reduce( Sum mrt ) {
assert _sums != null;
assert mrt._sums != null;
water.util.Utils.add(_sums,mrt._sums); }
}
// Simple vector sum C=A+B
private static class PairSum extends MRTask2<Sum> {
@Override public void map( Chunk out, Chunk in1, Chunk in2 ) {
for( int i=0; i<out._len; i++ )
out.set0(i,in1.at80(i)+in2.at80(i));
}
}
// ==========================================================================
@Test public void testLargeCats() {
File file = TestUtil.find_test_file("./smalldata/categoricals/40k_categoricals.csv.gz");
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("cat.hex");
Frame fr = ParseDataset2.parse(okey,new Key[]{fkey});
UKV.remove(fkey);
Vec vz = null;
try {
assertEquals(fr.numRows(),40000); // Count of rows
assertEquals(fr.vecs()[0].domain().length,40000);
} finally {
if( vz != null ) UKV.remove(vz._key);
fr.delete();
}
}
}