package water.rapids.ast.prims.string; import org.junit.BeforeClass; import org.junit.Test; import water.TestUtil; import water.fvec.Frame; import water.fvec.TestFrameBuilder; import water.fvec.Vec; import water.rapids.Rapids; import water.rapids.vals.ValFrame; public class AstTokenizeTest extends TestUtil { @BeforeClass static public void setup() { stall_till_cloudsize(1); } @Test public void testTokenize() { Frame fr = makeTestFrame(); Vec expected = svec( "Foot", "on", "the", "pedal", "Never", "ever", "false", "metal", null, "Engine", "running", "hotter", "than", "a", "boiling", "kettle", "My", "job", "ain't", "a", "job", null, "It's", "a", "damn", "good", "time", "City", "to", "city", "I'm", "running", "my", "rhymes", null); Frame res = null; try { ValFrame val = (ValFrame) Rapids.exec("(tmp= py_1 (tokenize data \"\\\\s\"))"); res = val.getFrame(); Vec actual = res.anyVec(); assertStringVecEquals(expected, actual); } finally { fr.remove(); expected.remove(); if (res != null) res.remove(); } } private Frame makeTestFrame() { return new TestFrameBuilder() .withName("data") .withColNames("ColA", "ColB") .withVecTypes(Vec.T_STR, Vec.T_STR) .withDataForCol(0, ar("Foot on the pedal", "Engine running hotter than a boiling kettle", "It's a damn good time")) .withDataForCol(1, ar("Never ever false metal", "My job ain't a job", "City to city I'm running my rhymes")) .withChunkLayout(2, 1) .build(); } }