/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans.transform;
import static org.junit.Assert.assertArrayEquals;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.data.MockInputColumn;
import org.datacleaner.data.MockInputRow;
import org.datacleaner.test.MockOutputRowCollector;
import junit.framework.TestCase;
public class RegexParserTransformerTest extends TestCase {
public void testTransform() throws Exception {
final MockInputColumn<String> col = new MockInputColumn<>("foobar", String.class);
final RegexParserTransformer t = new RegexParserTransformer();
t.column = col;
t.pattern = Pattern.compile("(a+)(b+)|(c+)");
final OutputColumns outputColumns = t.getOutputColumns();
assertEquals(4, outputColumns.getColumnCount());
assertEquals("foobar (matched part)", outputColumns.getColumnName(0));
assertEquals("foobar (group 1)", outputColumns.getColumnName(1));
assertEquals("foobar (group 2)", outputColumns.getColumnName(2));
assertEquals("foobar (group 3)", outputColumns.getColumnName(3));
assertArrayEquals(new String[] { "aabb", "aa", "bb", null }, t.transform(new MockInputRow().put(col, "aabb")));
assertArrayEquals(new String[] { "cccc", null, null, "cccc" },
t.transform(new MockInputRow().put(col, "cccc")));
assertArrayEquals(new String[] { null, null, null, null }, t.transform(new MockInputRow().put(col, "dddd")));
}
public void testExpressionForDimensions() throws Exception {
final MockInputColumn<String> col = new MockInputColumn<>("foobar", String.class);
final RegexParserTransformer t = new RegexParserTransformer();
t.column = col;
t.pattern = Pattern.compile("(\\d+\\,?\\d+?)(x|X)([0-9]+\\,?\\d+?)");
t.mode = RegexParserTransformer.Mode.FIND_FIRST;
assertEquals("[12x34, 12, x, 34]", Arrays.toString(t.transform(new MockInputRow().put(col, "foo 12x34 bar"))));
assertEquals("[12X34, 12, X, 34]", Arrays.toString(t.transform(new MockInputRow().put(col, "foo 12X34 bar"))));
assertEquals("[1,2x3,4, 1,2, x, 3,4]",
Arrays.toString(t.transform(new MockInputRow().put(col, "foo 1,2x3,4 bar"))));
}
public void testFindAllMultiMatch() throws Exception {
final MockOutputRowCollector outputRowCollector = new MockOutputRowCollector();
final MockInputColumn<String> col = new MockInputColumn<>("foobar", String.class);
final RegexParserTransformer t = new RegexParserTransformer();
t.column = col;
t.pattern = Pattern.compile("(\\d+\\,?\\d*)(x|X)(\\d+\\,?\\d*)");
t.mode = RegexParserTransformer.Mode.FIND_ALL;
t.outputRowCollector = outputRowCollector;
final String[] transformResult1 = t.transform(new MockInputRow().put(col, "foo 12x34 bar 56x78 baz 9x10 "));
assertEquals("[12x34, 12, x, 34]", Arrays.toString(transformResult1));
final List<Object[]> output1 = outputRowCollector.getOutput();
assertEquals(2, output1.size());
assertEquals("[56x78, 56, x, 78]", Arrays.toString(output1.get(0)));
assertEquals("[9x10, 9, x, 10]", Arrays.toString(output1.get(1)));
}
public void testFindAllNoMatch() throws Exception {
final MockOutputRowCollector outputRowCollector = new MockOutputRowCollector();
final MockInputColumn<String> col = new MockInputColumn<>("foobar", String.class);
final RegexParserTransformer t = new RegexParserTransformer();
t.column = col;
t.pattern = Pattern.compile("(\\d+\\,?\\d*)(x|X)(\\d+\\,?\\d*)");
t.mode = RegexParserTransformer.Mode.FIND_ALL;
t.outputRowCollector = outputRowCollector;
final String[] transformResult1 = t.transform(new MockInputRow().put(col, "foo"));
assertEquals("[null, null, null, null]", Arrays.toString(transformResult1));
final List<Object[]> output1 = outputRowCollector.getOutput();
assertEquals(0, output1.size());
}
}