package water.rapids.ast.prims.string; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import water.TestUtil; import water.fvec.Frame; import water.fvec.TestFrameBuilder; import water.fvec.Vec; import water.rapids.Rapids; import water.rapids.Val; import java.util.Arrays; import static org.junit.Assert.*; @RunWith(Parameterized.class) public class AstGrepTest extends TestUtil { @Parameterized.Parameters(name= "{index}: {4}") public static Iterable<? extends Object> data() { return Arrays.asList( new Object[]{"[B-D]", 0, 1, 0, "insensitive,strings"}, new Object[]{"[b-d]", 1, 1, 0, "sensitive,strings"}, new Object[]{"[B-D]", 0, 2, 0, "insensitive,categoricals"}, new Object[]{"[b-d]", 1, 2, 0, "sensitive,categoricals"}, new Object[]{"[B-D]", 0, 1, 1, "insensitive,strings,invert"}, new Object[]{"[b-d]", 1, 1, 1, "sensitive,strings,invert"}, new Object[]{"[B-D]", 0, 2, 1, "insensitive,categoricals,invert"}, new Object[]{"[b-d]", 1, 2, 1, "sensitive,categoricals,invert"} ); } @BeforeClass static public void setup() { stall_till_cloudsize(1); } @Parameterized.Parameter( ) public String _regex; @Parameterized.Parameter(1) public int _ignoreCase; @Parameterized.Parameter(2) public int _col; @Parameterized.Parameter(3) public int _invert; @Parameterized.Parameter(4) public String _description; // not used @Test public void testGrep() throws Exception { final Frame data = makeTestFrame(); Frame output = null; try { String rapids = "(tmp= tst (grep (cols data [" + _col + "]) \"" + _regex + "\" " + _ignoreCase + " " + _invert + " 0))"; Val val = Rapids.exec(rapids); output = val.getFrame(); int length = (int) output.vec(0).length(); int lastPos = -1; for (int i = 0; i < length; i++) { int pos = (int) output.vec(0).at8(i); for (int j = lastPos + 1; j < pos; j++) { assertEquals(0L, data.vec(0).at8(j)); } assertEquals(1L, data.vec(0).at8(pos)); lastPos = pos; } } finally { data.delete(); if (output != null) { output.delete(); } } } @Test public void testGrep_outputLogical() throws Exception { final Frame data = makeTestFrame(); Frame output = null; try { String rapids = "(tmp= tst (grep (cols data [" + _col + "]) \"" + _regex + "\" " + _ignoreCase + " " + _invert + " 1))"; Val val = Rapids.exec(rapids); output = val.getFrame(); assertVecEquals(data.vec(0), output.vec(0), 0.0); } finally { data.delete(); if (output != null) { output.delete(); } } } private Frame makeTestFrame() { int len = 'Z' - 'A'; double numData[] = new double[len]; String[] strData = new String[len]; String[] catData = new String[len]; for (int i = 0; i < len; i++) { char c = (char) ('A' + i); numData[i] = ((c >= 'B' && c <= 'D' ? 1 : 0) + _invert) % 2; strData[i] = Character.toString(c); catData[i] = Character.toString(c); } return new TestFrameBuilder() .withName("data") .withColNames("Expected", "Str", "Cat") .withVecTypes(Vec.T_NUM, Vec.T_STR, Vec.T_CAT) .withDataForCol(0, numData) .withDataForCol(1, strData) .withDataForCol(2, catData) .withChunkLayout(10, 2, len - 12) .build(); } }