package water.fvec; import org.junit.*; import water.IcedUtils; import water.TestUtil; import water.parser.BufferedString; import java.util.Arrays; import java.util.List; import static org.junit.Assert.*; public class CStrChunkTest extends TestUtil { @BeforeClass() public static void setup() { stall_till_cloudsize(1); } @Test public void test_addStr() { for (int l=0; l<2; ++l) { NewChunk nc = new NewChunk(null, 0); BufferedString[] vals = new BufferedString[1000001]; for (int i = 0; i < vals.length; i++) { vals[i] = new BufferedString("Foo"+i); } if (l==1) nc.addNA(); for (BufferedString v : vals) nc.addStr(v); nc.addNA(); Chunk cc = nc.compress(); assertEquals(vals.length + 1 + l, cc._len); assertTrue(cc instanceof CStrChunk); if (l==1) assertTrue(cc.isNA(0)); if (l==1) assertTrue(cc.isNA_abs(0)); BufferedString tmpStr = new BufferedString(); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc.atStr(tmpStr, l + i)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc.atStr_abs(tmpStr, l + i)); assertTrue(cc.isNA(vals.length + l)); assertTrue(cc.isNA_abs(vals.length + l)); Chunk cc2 = IcedUtils.deepCopy(cc); assertEquals(vals.length + 1 + l, cc2._len); assertTrue(cc2 instanceof CStrChunk); if (l==1) assertTrue(cc2.isNA(0)); if (l==1) assertTrue(cc2.isNA_abs(0)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc2.atStr(tmpStr, l + i)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i)); assertTrue(cc2.isNA(vals.length + l)); assertTrue(cc2.isNA_abs(vals.length + l)); nc = cc.extractRows(new NewChunk(null, 0),0,nc.len()); assertEquals(vals.length + 1 + l, nc.len()); if (l==1) assertTrue(nc.isNA(0)); if (l==1) assertTrue(nc.isNA_abs(0)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], nc.atStr(tmpStr, l + i)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], nc.atStr_abs(tmpStr, l + i)); assertTrue(nc.isNA(vals.length + l)); assertTrue(nc.isNA_abs(vals.length + l)); cc2 = nc.compress(); assertEquals(vals.length + 1 + l, cc._len); assertTrue(cc2 instanceof CStrChunk); if (l==1) assertTrue(cc2.isNA(0)); if (l==1) assertTrue(cc2.isNA_abs(0)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc2.atStr(tmpStr, l + i)); for (int i = 0; i < vals.length; ++i) assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i)); assertTrue(cc2.isNA(vals.length + l)); assertTrue(cc2.isNA_abs(vals.length + l)); assertTrue(Arrays.equals(cc._mem, cc2._mem)); } } @Test public void test_writer(){ Frame frame = null; try { frame = parse_test_file("smalldata/junit/iris.csv"); //Create a label vector byte[] typeArr = {Vec.T_STR}; Vec labels = frame.lastVec().makeCons(1, 0, null, typeArr)[0]; Vec.Writer writer = labels.open(); int rowCnt = (int)frame.lastVec().length(); for (int r = 0; r < rowCnt; r++) // adding labels in reverse order writer.set(rowCnt-r-1, "Foo"+(r+1)); writer.close(); //Append label vector and spot check frame.add("Labels", labels); assertTrue("Failed to create a new String based label column", frame.lastVec().atStr(new BufferedString(), 42).compareTo(new BufferedString("Foo108"))==0); } finally { if (frame != null) frame.delete(); } } @Test public void test_sparse() { NewChunk nc = new NewChunk(null, 0); for( int i=0; i<100; i++ ) nc.addNA(); nc.addStr(new BufferedString("foo")); nc.addNA(); nc.addStr(new BufferedString("bar")); Chunk c = nc.compress(); assertTrue("first 100 entries are NA",c.isNA(0) && c.isNA(99)); assertTrue("Sparse string has values",c.atStr(new BufferedString(),100).sameString("foo")); assertTrue("NA",c.isNA(101)); final BufferedString bufferedString = c.atStr(new BufferedString(), 102); Assert.assertTrue("Sparse string has values: expected `bar`, got " + bufferedString, bufferedString.sameString("bar")); } @Test public void test_lstrip() { final List<String> content = Arrays.asList( " empty left", "empty right ", "some string", "", "mystring", " xxx ", "cray tweet"); TextChunk sut = new TextChunk(content); sut.lstrip(); assertEquals("empty left", sut.at(0)); assertEquals("empty right ", sut.at(1)); assertEquals("some string", sut.at(2)); assertEquals("", sut.at(3)); assertEquals("mystring", sut.at(4)); assertEquals("xxx ", sut.at(5)); assertEquals("cray tweet", sut.at(6)); } @Test public void test_rstrip() { TextChunk sut = new TextChunk(Arrays.asList("", " ", " empty left", "empty right ", "some string", "mystring", " xxx ", "cray tweet")); sut.rstrip(); assertEquals("", sut.at(0)); assertEquals("", sut.at(1)); assertEquals(" empty left", sut.at(2)); assertEquals("empty right", sut.at(3)); assertEquals("some string", sut.at(4)); assertEquals("mystring", sut.at(5)); assertEquals(" xxx", sut.at(6)); assertEquals("cray tweet", sut.at(7)); } @Test public void test_rstrip_was_failing() { TextChunk sut = new TextChunk(Arrays.asList("")); sut.rstrip(); assertEquals("", sut.at(0)); } class TextChunk { CStrChunk cc; TextChunk(Iterable<String> content) { NewChunk nc = newChunk(); for (String s : content) nc.addStr(s); updateFrom(nc); } private NewChunk newChunk() { return new NewChunk(null, 0); } private void updateFrom(NewChunk nc) { cc = (CStrChunk) nc.compress(); } String at(int i) { return cc.atStr(new BufferedString(), i).toString(); } void lstrip() { updateFrom(cc.asciiLStrip(newChunk(), " ")); } void rstrip() { updateFrom(cc.asciiRStrip(newChunk(), " \0")); } } }