/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job.builder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Set;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.MutableColumn;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.HasNameMapper;
import org.datacleaner.api.InputColumn;
import org.datacleaner.components.convert.ConvertToNumberTransformer;
import org.datacleaner.components.mock.TransformerMock;
import org.datacleaner.components.tablelookup.TableLookupTransformer;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.data.ConstantInputColumn;
import org.datacleaner.data.MockInputColumn;
import org.datacleaner.data.MutableInputColumn;
import org.datacleaner.descriptors.ConfiguredPropertyDescriptor;
import org.datacleaner.descriptors.Descriptors;
import org.datacleaner.descriptors.TransformerDescriptor;
import org.datacleaner.job.IdGenerator;
import org.datacleaner.job.PrefixedIdGenerator;
import org.datacleaner.util.InputColumnComparator;
import junit.framework.TestCase;
public class TransformerComponentBuilderTest extends TestCase {
private DataCleanerConfiguration configuration;
private AnalysisJobBuilder ajb;
@Override
protected void setUp() throws Exception {
super.setUp();
configuration = new DataCleanerConfigurationImpl();
ajb = new AnalysisJobBuilder(configuration);
ajb.addSourceColumn(new MutableColumn("fooInt", ColumnType.INTEGER));
ajb.addSourceColumn(new MutableColumn("fooStr", ColumnType.VARCHAR));
}
public void testGetOutputColumnsRetainingAndSorting() throws Exception {
final TransformerComponentBuilder<?> tjb1 = ajb.addTransformer(TransformerMockForOutputColumnChanges.class);
tjb1.addInputColumn(ajb.getSourceColumnByName("fooStr"));
final TransformerComponentBuilder<?> tjb2 = ajb.addTransformer(TransformerMockForOutputColumnChanges.class);
tjb2.addInputColumn(ajb.getSourceColumnByName("fooStr"));
assertEquals("[foo, bar, foo, bar]", getSortedOutputColumns(tjb1, tjb2));
// Column ordering is retained when configuration changes output column
// names
tjb1.setConfiguredProperty("Output column names", new String[] { "Hello", "There" });
tjb2.setConfiguredProperty("Output column names", new String[] { "Big", "World" });
assertEquals("[Hello, There, Big, World]", getSortedOutputColumns(tjb1, tjb2));
// Column ordering is retained when user changes output column name
tjb1.getOutputColumns().get(0).setName("Howdy");
tjb1.getOutputColumns().get(1).setName("There");
tjb2.getOutputColumns().get(0).setName("Column3");
tjb2.getOutputColumns().get(1).setName("Column4");
assertEquals("[Howdy, There, Column3, Column4]", getSortedOutputColumns(tjb1, tjb2));
// Column names and ordering is reset when configuration changes output
// columns size
tjb1.setConfiguredProperty("Output column names", new String[] { "Hello", "To", "You" });
assertEquals("[Howdy, To, You, Column3, Column4]", getSortedOutputColumns(tjb1, tjb2));
}
private String getSortedOutputColumns(final TransformerComponentBuilder<?> tjb1,
final TransformerComponentBuilder<?> tjb2) {
final List<MutableInputColumn<?>> cols1 = tjb1.getOutputColumns();
final List<MutableInputColumn<?>> cols2 = tjb2.getOutputColumns();
final List<InputColumn<?>> list = new ArrayList<>();
list.addAll(cols1);
list.addAll(cols2);
Collections.sort(list, new InputColumnComparator());
final List<String> names = CollectionUtils.map(list, new HasNameMapper());
return names.toString();
}
public void testSetInvalidPropertyType() throws Exception {
final TransformerComponentBuilder<TransformerMock> tjb = ajb.addTransformer(TransformerMock.class);
try {
tjb.setConfiguredProperty("Input", "hello");
fail("Exception expected");
} catch (final IllegalArgumentException e) {
assertEquals("Invalid value type: java.lang.String, expected: org.datacleaner.api.InputColumn",
e.getMessage());
}
}
public void testIsConfigured() throws Exception {
final TransformerComponentBuilder<TransformerMock> tjb = ajb.addTransformer(TransformerMock.class);
assertFalse(tjb.isConfigured());
tjb.setConfiguredProperty("Some integer", null);
tjb.addInputColumn(ajb.getSourceColumns().get(1));
assertFalse(tjb.isConfigured());
try {
tjb.isConfigured(true);
fail("Exception occurred");
} catch (final UnconfiguredConfiguredPropertyException e) {
assertEquals("Property 'Some integer' is not properly configured (TransformerComponentBuilder"
+ "[transformer=Transformer mock,inputColumns=[MetaModelInputColumn[fooStr]]])", e.getMessage());
}
tjb.setConfiguredProperty("Some integer", 10);
assertTrue(tjb.isConfigured());
tjb.removeInputColumn(ajb.getSourceColumns().get(1));
assertFalse(tjb.isConfigured());
}
public void testClearInputColumnsArray() throws Exception {
final TransformerComponentBuilder<TransformerMock> tjb = ajb.addTransformer(TransformerMock.class);
tjb.addInputColumn(ajb.getSourceColumns().get(1));
tjb.addInputColumn(new ConstantInputColumn("foo"));
assertEquals(2, tjb.getInputColumns().size());
tjb.clearInputColumns();
assertEquals(0, tjb.getInputColumns().size());
}
public void testAddNonRequiredColumn() throws Exception {
final TransformerComponentBuilder<TableLookupTransformer> tjb =
ajb.addTransformer(TableLookupTransformer.class);
final Set<ConfiguredPropertyDescriptor> inputProperties =
tjb.getDescriptor().getConfiguredPropertiesForInput(true);
assertEquals(1, inputProperties.size());
final ConfiguredPropertyDescriptor inputProperty = inputProperties.iterator().next();
assertFalse(inputProperty.isRequired());
assertNull(tjb.getConfiguredProperty(inputProperty));
tjb.addInputColumn(ajb.getSourceColumns().get(0));
assertNotNull(tjb.getConfiguredProperty(inputProperty));
}
public void testClearInputColumnsSingle() throws Exception {
final TransformerComponentBuilder<SingleInputColumnTransformer> tjb =
ajb.addTransformer(SingleInputColumnTransformer.class);
tjb.addInputColumn(ajb.getSourceColumns().get(1));
assertEquals(1, tjb.getInputColumns().size());
tjb.clearInputColumns();
assertEquals(0, tjb.getInputColumns().size());
}
public void testGetAvailableInputColumns() throws Exception {
assertEquals(2, ajb.getAvailableInputColumns(Object.class).size());
assertEquals(2, ajb.getAvailableInputColumns((Class<?>) null).size());
assertEquals(1, ajb.getAvailableInputColumns(String.class).size());
assertEquals(0, ajb.getAvailableInputColumns(Date.class).size());
}
public void testInvalidInputColumnType() throws Exception {
final TransformerComponentBuilder<SingleInputColumnTransformer> tjb =
ajb.addTransformer(SingleInputColumnTransformer.class);
assertEquals(0, tjb.getInputColumns().size());
assertFalse(tjb.isConfigured());
try {
tjb.addInputColumn(ajb.getSourceColumns().get(0));
fail("Exception expected");
} catch (final IllegalArgumentException e) {
assertEquals("Unsupported InputColumn type: class java.lang.Integer, expected: class java.lang.String",
e.getMessage());
}
assertFalse(tjb.isConfigured());
tjb.addInputColumn(ajb.getSourceColumns().get(1));
assertEquals(1, tjb.getInputColumns().size());
assertTrue(tjb.isConfigured());
}
public void testNoOutputWhenNotConfigured() throws Exception {
final TransformerComponentBuilder<SingleInputColumnTransformer> tjb =
ajb.addTransformer(SingleInputColumnTransformer.class);
// not yet configured
assertEquals(0, tjb.getOutputColumns().size());
tjb.addInputColumn(new MockInputColumn<>("email", String.class));
assertEquals(2, tjb.getOutputColumns().size());
}
public void testConfigureByConfigurableBean() throws Exception {
final IdGenerator IdGenerator = new PrefixedIdGenerator("");
final TransformerDescriptor<ConvertToNumberTransformer> descriptor =
Descriptors.ofTransformer(ConvertToNumberTransformer.class);
final TransformerComponentBuilder<ConvertToNumberTransformer> builder =
new TransformerComponentBuilder<>(new AnalysisJobBuilder(null), descriptor, IdGenerator);
assertFalse(builder.isConfigured());
final ConvertToNumberTransformer configurableBean = builder.getComponentInstance();
final InputColumn<String> input = new MockInputColumn<>("foo", String.class);
configurableBean.setInput(input);
assertTrue(builder.isConfigured(true));
final ConfiguredPropertyDescriptor propertyDescriptor =
descriptor.getConfiguredPropertiesForInput().iterator().next();
final InputColumn<?>[] value = (InputColumn<?>[]) builder.getConfiguredProperties().get(propertyDescriptor);
assertEquals("[MockInputColumn[name=foo]]", Arrays.toString(value));
}
public void testReplaceAutomaticOutputColumnNames() throws Exception {
final IdGenerator IdGenerator = new PrefixedIdGenerator("");
final TransformerDescriptor<TransformerMock> descriptor = Descriptors.ofTransformer(TransformerMock.class);
final TransformerComponentBuilder<TransformerMock> builder =
new TransformerComponentBuilder<>(new AnalysisJobBuilder(new DataCleanerConfigurationImpl()),
descriptor, IdGenerator);
final MockInputColumn<String> colA = new MockInputColumn<>("A", String.class);
final MockInputColumn<String> colB = new MockInputColumn<>("B", String.class);
final MockInputColumn<String> colC = new MockInputColumn<>("C", String.class);
builder.addInputColumn(colA);
builder.addInputColumn(colB);
builder.addInputColumn(colC);
List<MutableInputColumn<?>> outputColumns = builder.getOutputColumns();
assertEquals(3, outputColumns.size());
assertEquals("[TransformedInputColumn[id=trans-0001-0002,name=Transformer mock (1)], "
+ "TransformedInputColumn[id=trans-0001-0003,name=Transformer mock (2)], "
+ "TransformedInputColumn[id=trans-0001-0004,name=Transformer mock (3)]]", outputColumns.toString());
builder.removeInputColumn(colB);
outputColumns.get(0).setName("Foo A");
outputColumns = builder.getOutputColumns();
assertEquals(2, outputColumns.size());
assertEquals("[TransformedInputColumn[id=trans-0001-0002,name=Foo A], "
+ "TransformedInputColumn[id=trans-0001-0003,name=Transformer mock (2)]]", outputColumns.toString());
builder.addInputColumn(colB);
outputColumns = builder.getOutputColumns();
assertEquals(3, outputColumns.size());
assertEquals("[TransformedInputColumn[id=trans-0001-0002,name=Foo A], "
+ "TransformedInputColumn[id=trans-0001-0003,name=Transformer mock (2)], "
+ "TransformedInputColumn[id=trans-0001-0005,name=Transformer mock (3)]]", outputColumns.toString());
final ConfiguredPropertyDescriptor inputColumnProperty =
descriptor.getConfiguredPropertiesForInput().iterator().next();
builder.setConfiguredProperty(inputColumnProperty, new InputColumn[] { colA, colB, colC });
outputColumns = builder.getOutputColumns();
assertEquals(3, outputColumns.size());
assertEquals("[TransformedInputColumn[id=trans-0001-0002,name=Foo A], "
+ "TransformedInputColumn[id=trans-0001-0003,name=Transformer mock (2)], "
+ "TransformedInputColumn[id=trans-0001-0005,name=Transformer mock (3)]]", outputColumns.toString());
assertEquals("Transformer mock (1)", outputColumns.get(0).getInitialName());
}
}