/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.job.runner; import org.datacleaner.beans.NumberAnalyzer; import org.datacleaner.beans.filter.EqualsFilter; import org.datacleaner.beans.filter.NullCheckFilter; import org.datacleaner.components.maxrows.MaxRowsFilter; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.DatastoreCatalogImpl; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.builder.FilterComponentBuilder; import org.datacleaner.job.concurrent.TaskListener; import org.datacleaner.job.concurrent.TaskRunner; import org.datacleaner.job.tasks.Task; import org.datacleaner.lifecycle.LifeCycleHelper; import org.datacleaner.test.TestHelper; import org.junit.Assert; import junit.framework.TestCase; public class RowProcessingMetricsImplTest extends TestCase { private Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb"); private DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withDatastoreCatalog(new DatastoreCatalogImpl(datastore)); private AnalysisJob job; public void testGetExpectedRowCountNoFilter() throws Exception { final AnalysisJobBuilder ajb = createAnalysisJobBuilder(); job = ajb.toAnalysisJob(); assertEquals(23, getExpectedRowCount()); } private AnalysisJobBuilder createAnalysisJobBuilder() { final AnalysisJobBuilder ajb = new AnalysisJobBuilder(configuration); ajb.setDatastore(datastore); ajb.addSourceColumns("PUBLIC.EMPLOYEES.EMPLOYEENUMBER"); ajb.addAnalyzer(NumberAnalyzer.class).addInputColumns(ajb.getSourceColumns()); return ajb; } public void testGetExpectedRowCountMaxRows() throws Exception { final AnalysisJobBuilder ajb = createAnalysisJobBuilder(); final FilterComponentBuilder<MaxRowsFilter, MaxRowsFilter.Category> filter = ajb.addFilter(MaxRowsFilter.class); filter.getComponentInstance().setMaxRows(10); ajb.setDefaultRequirement(filter.getFilterOutcome(MaxRowsFilter.Category.VALID)); job = ajb.toAnalysisJob(); assertEquals(10, getExpectedRowCount()); } public void testGetExpectedRowCountEquals() throws Exception { final AnalysisJobBuilder ajb = createAnalysisJobBuilder(); final FilterComponentBuilder<EqualsFilter, EqualsFilter.Category> filter = ajb.addFilter(EqualsFilter.class); filter.addInputColumns(ajb.getSourceColumns()); filter.getComponentInstance().setValues(new String[] { "1002", "1165" }); ajb.setDefaultRequirement(filter.getFilterOutcome(EqualsFilter.Category.EQUALS)); job = ajb.toAnalysisJob(); assertEquals(2, getExpectedRowCount()); } public void testGetExpectedRowCountMultipleFilters() throws Exception { final AnalysisJobBuilder ajb = createAnalysisJobBuilder(); // there's 21 records that are not 1056 or 1165 final FilterComponentBuilder<EqualsFilter, EqualsFilter.Category> filter1 = ajb.addFilter(EqualsFilter.class); filter1.addInputColumns(ajb.getSourceColumns()); filter1.getComponentInstance().setValues(new String[] { "1056", "1165" }); // there's 1 record which has a reportsto value of null. final FilterComponentBuilder<NullCheckFilter, NullCheckFilter.NullCheckCategory> filter2 = ajb.addFilter(NullCheckFilter.class); ajb.addSourceColumns("PUBLIC.EMPLOYEES.REPORTSTO"); filter2.addInputColumn(ajb.getSourceColumnByName("reportsto")); filter2.getComponentInstance().setConsiderEmptyStringAsNull(true); filter2.setRequirement(filter1.getFilterOutcome(EqualsFilter.Category.NOT_EQUALS)); ajb.getAnalyzerComponentBuilders().get(0) .setRequirement(filter2.getFilterOutcome(NullCheckFilter.NullCheckCategory.NOT_NULL)); job = ajb.toAnalysisJob(); assertEquals(21 - 1, getExpectedRowCount()); } private int getExpectedRowCount() { final AnalysisListener analysisListener = new InfoLoggingAnalysisListener(); final TaskRunner taskRunner = configuration.getEnvironment().getTaskRunner(); final ErrorAwareAnalysisListener errorListener = new ErrorAwareAnalysisListener(); final LifeCycleHelper lifeCycleHelper = new LifeCycleHelper(configuration, job, true); final RowProcessingPublishers publishers = new RowProcessingPublishers(job, analysisListener, errorListener, taskRunner, lifeCycleHelper); final RowProcessingPublisher publisher = publishers.getRowProcessingPublisher(publishers.getStreams()[0]); publisher.initializeConsumers(new TaskListener() { @Override public void onError(final Task arg0, final Throwable t) { Assert.fail(t.getMessage()); } @Override public void onComplete(final Task arg0) { } @Override public void onBegin(final Task arg0) { } }); final RowProcessingMetricsImpl metrics = new RowProcessingMetricsImpl(publishers, publisher); return metrics.getExpectedRows(); } }