/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.test.full.scenarios; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.metamodel.DataContext; import org.apache.metamodel.MetaModelHelper; import org.apache.metamodel.data.Row; import org.apache.metamodel.schema.Table; import org.datacleaner.api.AnalyzerResult; import org.datacleaner.api.InputRow; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.DatastoreConnection; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.ComponentJob; import org.datacleaner.job.CompoundComponentRequirement; import org.datacleaner.job.FilterOutcome; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.builder.AnalyzerComponentBuilder; import org.datacleaner.job.builder.FilterComponentBuilder; import org.datacleaner.job.builder.TransformerComponentBuilder; import org.datacleaner.job.runner.AnalysisResultFuture; import org.datacleaner.job.runner.AnalysisRunner; import org.datacleaner.job.runner.AnalysisRunnerImpl; import org.datacleaner.result.ListResult; import org.datacleaner.test.MockAnalyzer; import org.datacleaner.test.MockTransformer; import org.datacleaner.test.TestHelper; import org.datacleaner.test.mock.EvenOddFilter; import org.datacleaner.test.mock.EvenOddFilter.Category; import junit.framework.TestCase; /** * A test case that simulates a "cleansing street", ie. a series of (two) * transformations with filters inbetween. There are two paths: A successful * "street" and a "bucket" for all the rejections. The component requirement for * the bucket is a {@link CompoundComponentRequirement} consisting of all the * non-successful filter outcomes.. */ public class CompoundComponentRequirementRejectionStreetTest extends TestCase { private final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb"); private final DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withDatastores(datastore); private final int recordsInTable = 214; public void testScenario() throws Throwable { final AnalysisJob job; try (DatastoreConnection connection = datastore.openConnection()) { final DataContext dataContext = connection.getDataContext(); final Table table = dataContext.getTableByQualifiedLabel("PUBLIC.CUSTOMERS"); final Row row = MetaModelHelper .executeSingleRowQuery(dataContext, dataContext.query().from(table).selectCount().toQuery()); assertEquals(recordsInTable, ((Number) row.getValue(0)).intValue()); try (AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration)) { jobBuilder.setDatastore(datastore); jobBuilder.addSourceColumns("CUSTOMERS.CONTACTFIRSTNAME"); jobBuilder.addSourceColumns("CUSTOMERS.CONTACTLASTNAME"); // although not semantically correct, we pretend that EVEN is // the // success-state in our cleansing street and that ODD is the // reject-state. final Category valid = org.datacleaner.test.mock.EvenOddFilter.Category.EVEN; final Category invalid = org.datacleaner.test.mock.EvenOddFilter.Category.ODD; final TransformerComponentBuilder<MockTransformer> trans1 = jobBuilder.addTransformer(MockTransformer.class); trans1.setName("trans1"); trans1.addInputColumn(jobBuilder.getSourceColumns().get(0)); final FilterComponentBuilder<EvenOddFilter, org.datacleaner.test.mock.EvenOddFilter.Category> filter1 = jobBuilder.addFilter(EvenOddFilter.class); filter1.setName("filter1"); filter1.addInputColumn(trans1.getOutputColumns().get(0)); final TransformerComponentBuilder<MockTransformer> trans2 = jobBuilder.addTransformer(MockTransformer.class); trans2.setName("trans2"); trans2.addInputColumn(jobBuilder.getSourceColumns().get(1)); trans2.setRequirement(filter1, valid); final FilterComponentBuilder<EvenOddFilter, org.datacleaner.test.mock.EvenOddFilter.Category> filter2 = jobBuilder.addFilter(EvenOddFilter.class); filter2.setName("filter2"); filter2.addInputColumn(trans2.getOutputColumns().get(0)); final AnalyzerComponentBuilder<MockAnalyzer> analyzer1 = jobBuilder.addAnalyzer(MockAnalyzer.class); analyzer1.setName("success"); analyzer1.addInputColumn(jobBuilder.getSourceColumns().get(0)); analyzer1.addInputColumn(jobBuilder.getSourceColumns().get(1)); analyzer1.addInputColumn(trans1.getOutputColumns().get(0)); analyzer1.addInputColumn(trans2.getOutputColumns().get(0)); analyzer1.setRequirement(filter2, valid); final FilterOutcome invalid1 = filter1.getFilterOutcome(invalid); final FilterOutcome invalid2 = filter2.getFilterOutcome(invalid); final AnalyzerComponentBuilder<MockAnalyzer> analyzer2 = jobBuilder.addAnalyzer(MockAnalyzer.class); analyzer2.setName("rejects"); analyzer2.addInputColumn(jobBuilder.getSourceColumns().get(0)); analyzer2.addInputColumn(jobBuilder.getSourceColumns().get(1)); analyzer2.setComponentRequirement(new CompoundComponentRequirement(invalid1, invalid2)); job = jobBuilder.toAnalysisJob(); } } final AnalysisRunner runner = new AnalysisRunnerImpl(configuration); final AnalysisResultFuture resultFuture = runner.run(job); resultFuture.await(); if (resultFuture.isErrornous()) { throw resultFuture.getErrors().get(0); } int recordsInResults = 0; final Map<ComponentJob, AnalyzerResult> map = resultFuture.getResultMap(); for (final Entry<ComponentJob, AnalyzerResult> entry : map.entrySet()) { final ComponentJob componentJob = entry.getKey(); @SuppressWarnings("unchecked") final ListResult<InputRow> result = (ListResult<InputRow>) entry.getValue(); final List<InputRow> values = result.getValues(); final int recordsInResult = values.size(); recordsInResults += recordsInResult; switch (componentJob.getName()) { case "success": case "rejects": // expected states assertTrue("Expected records in all buckets of the cleansing street, but did not find any in: " + componentJob, recordsInResult > 0); assertTrue("Expected records to be distributed across buckets, but found all in: " + componentJob, recordsInResult != recordsInTable); break; default: fail("Unexpected component in result map: " + componentJob); } } assertEquals(recordsInTable, recordsInResults); } }