/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.beans.referentialintegrity; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.datacleaner.api.InputColumn; import org.datacleaner.api.InputRow; import org.datacleaner.components.maxrows.MaxRowsFilter; import org.datacleaner.components.maxrows.MaxRowsFilter.Category; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.DatastoreCatalogImpl; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.builder.AnalyzerComponentBuilder; import org.datacleaner.job.builder.FilterComponentBuilder; import org.datacleaner.job.runner.AnalysisResultFuture; import org.datacleaner.job.runner.AnalysisRunnerImpl; import org.datacleaner.test.TestHelper; import org.junit.Test; public class ReferentialIntegrityAnalyzerReducerTest { @Test public void testVanilla() throws Throwable { final AnalysisJobBuilder jobBuilder1 = getAnalysisJobBuilder(); final AnalysisJobBuilder jobBuilder2 = getAnalysisJobBuilder(); final AnalysisJobBuilder jobBuilder3 = getAnalysisJobBuilder(); final ReferentialIntegrityAnalyzerResult partialResult1 = getPartialResult(jobBuilder1, 1, 100); final ReferentialIntegrityAnalyzerResult partialResult2 = getPartialResult(jobBuilder2, 101, 1); final ReferentialIntegrityAnalyzerResult partialResult3 = getPartialResult(jobBuilder3, 102, null); // Assert what we have in the first partial result { final InputColumn<?> salesRepEmployeeNumber = jobBuilder1.getSourceColumnByName("SALESREPEMPLOYEENUMBER"); final int annotatedRowCount = partialResult1.getAnnotatedRowCount(); assertEquals(1, annotatedRowCount); final List<InputRow> rows = partialResult1.getSampleRows(); assertEquals(1, rows.size()); assertEquals(-1, rows.get(0).getValue(salesRepEmployeeNumber)); } // Assert what we have in the second partial result { final InputColumn<?> salesRepEmployeeNumber = jobBuilder2.getSourceColumnByName("SALESREPEMPLOYEENUMBER"); final int annotatedRowCount = partialResult2.getAnnotatedRowCount(); assertEquals(1, annotatedRowCount); final List<InputRow> rows = partialResult2.getSampleRows(); assertEquals(1, rows.size()); assertEquals(-1, rows.get(0).getValue(salesRepEmployeeNumber)); } // Assert what we have in the thrird partial result { final InputColumn<?> salesRepEmployeeNumber = jobBuilder3.getSourceColumnByName("SALESREPEMPLOYEENUMBER"); final int annotatedRowCount = partialResult3.getAnnotatedRowCount(); assertEquals(1, annotatedRowCount); final List<InputRow> rows = partialResult3.getSampleRows(); assertEquals(1, rows.size()); assertEquals(0, rows.get(0).getValue(salesRepEmployeeNumber)); } final Collection<ReferentialIntegrityAnalyzerResult> partialResults = new ArrayList<>(); partialResults.add(partialResult1); partialResults.add(partialResult2); partialResults.add(partialResult3); final ReferentialIntegrityAnalyzerReducer reducer = new ReferentialIntegrityAnalyzerReducer(); final ReferentialIntegrityAnalyzerResult reducedResult = reducer.reduce(partialResults); // Assert what we have in the reduced result { final InputColumn<?> salesRepEmployeeNumber = jobBuilder1.getSourceColumnByName("SALESREPEMPLOYEENUMBER"); final int annotatedRowCount = reducedResult.getAnnotatedRowCount(); assertEquals(3, annotatedRowCount); final List<InputRow> rows = reducedResult.getSampleRows(); assertEquals(3, rows.size()); assertEquals(-1, rows.get(0).getValue(salesRepEmployeeNumber)); assertEquals(-1, rows.get(1).getValue(salesRepEmployeeNumber)); assertEquals(0, rows.get(2).getValue(salesRepEmployeeNumber)); } } private AnalysisJobBuilder getAnalysisJobBuilder() { final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb"); final DataCleanerConfigurationImpl configuration = new DataCleanerConfigurationImpl().withDatastoreCatalog(new DatastoreCatalogImpl(datastore)); final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration); jobBuilder.setDatastore(datastore); jobBuilder.addSourceColumns("customers.CUSTOMERNUMBER"); jobBuilder.addSourceColumns("customers.SALESREPEMPLOYEENUMBER"); return jobBuilder; } private ReferentialIntegrityAnalyzerResult getPartialResult(final AnalysisJobBuilder jobBuilder, final Integer firstRow, final Integer maxRows) throws Throwable { final InputColumn<?> salesRepEmployeeNumber = jobBuilder.getSourceColumnByName("SALESREPEMPLOYEENUMBER"); final FilterComponentBuilder<MaxRowsFilter, Category> maxRowsFilter = jobBuilder.addFilter(MaxRowsFilter.class); maxRowsFilter.addInputColumn(salesRepEmployeeNumber); if (firstRow != null) { maxRowsFilter.setConfiguredProperty("First row", firstRow); } if (maxRows != null) { maxRowsFilter.setConfiguredProperty("Max rows", maxRows); } final AnalyzerComponentBuilder<ReferentialIntegrityAnalyzer> referentialIntegrityAnalyzer = jobBuilder.addAnalyzer(ReferentialIntegrityAnalyzer.class); referentialIntegrityAnalyzer.setRequirement(maxRowsFilter.getFilterOutcome(MaxRowsFilter.Category.VALID)); final ReferentialIntegrityAnalyzer referentialIntegrity = referentialIntegrityAnalyzer.getComponentInstance(); referentialIntegrity.foreignKey = salesRepEmployeeNumber; referentialIntegrity.cacheLookups = true; referentialIntegrity.datastore = jobBuilder.getDatastore(); referentialIntegrity.schemaName = "PUBLIC"; referentialIntegrity.tableName = "employees"; referentialIntegrity.columnName = "EMPLOYEENUMBER"; final AnalysisJob analysisJob = jobBuilder.toAnalysisJob(); jobBuilder.close(); final AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(jobBuilder.getConfiguration()).run(analysisJob); resultFuture.await(); if (resultFuture.isErrornous()) { throw resultFuture.getErrors().get(0); } return resultFuture.getResults(ReferentialIntegrityAnalyzerResult.class).get(0); } }