/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.test.full.scenarios;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.datacleaner.api.AnalyzerResult;
import org.datacleaner.beans.valuedist.ValueDistributionAnalyzerResult;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.configuration.DataCleanerEnvironmentImpl;
import org.datacleaner.connection.DatastoreCatalog;
import org.datacleaner.connection.DatastoreCatalogImpl;
import org.datacleaner.descriptors.ClasspathScanDescriptorProvider;
import org.datacleaner.descriptors.DescriptorProvider;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.JaxbJobReader;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.job.runner.AnalysisRunner;
import org.datacleaner.job.runner.AnalysisRunnerImpl;
import org.datacleaner.result.CrosstabResult;
import org.datacleaner.result.renderer.CrosstabTextRenderer;
import org.datacleaner.test.TestHelper;
import junit.framework.TestCase;
public class AnalyzerJobPartitioningTest extends TestCase {
public void testScenario() throws Exception {
final DatastoreCatalog datastoreCatalog =
new DatastoreCatalogImpl(TestHelper.createSampleDatabaseDatastore("my database"));
final DescriptorProvider descriptorProvider =
new ClasspathScanDescriptorProvider().scanPackage("org.datacleaner.beans", true);
final DataCleanerConfiguration conf = new DataCleanerConfigurationImpl().withDatastoreCatalog(datastoreCatalog)
.withEnvironment(new DataCleanerEnvironmentImpl().withDescriptorProvider(descriptorProvider));
final AnalysisRunner runner = new AnalysisRunnerImpl(conf);
final AnalysisJobBuilder jobBuilder =
new JaxbJobReader(conf).create(new File("src/test/resources/example-job-partitioning.xml"));
final AnalysisJob analysisJob = jobBuilder.toAnalysisJob();
assertEquals(6, analysisJob.getAnalyzerJobs().size());
final AnalysisResultFuture resultFuture = runner.run(analysisJob);
assertTrue(resultFuture.isSuccessful());
final List<AnalyzerResult> results = resultFuture.getResults();
int vdResults = 0;
final List<CrosstabResult> saResults = new ArrayList<>();
for (final AnalyzerResult analyzerResult : results) {
if (analyzerResult instanceof ValueDistributionAnalyzerResult) {
vdResults++;
} else if (analyzerResult instanceof CrosstabResult) {
saResults.add((CrosstabResult) analyzerResult);
} else {
fail("Unexpected result: " + analyzerResult);
}
}
assertEquals(4, vdResults);
assertEquals(2, saResults.size());
final int dimensionIndex = saResults.get(0).getCrosstab().getDimensionIndex("Column");
Collections.sort(saResults, (o1, o2) -> {
final int count1 = o1.getCrosstab().getDimension(dimensionIndex).getCategoryCount();
final int count2 = o2.getCrosstab().getDimension(dimensionIndex).getCategoryCount();
return count1 - count2;
});
final String[] resultLines1 = new CrosstabTextRenderer().render(saResults.get(0)).split("\n");
assertEquals(" CUSTOMERNAME ", resultLines1[0]);
assertEquals("Row count 214 ", resultLines1[1]);
final String[] resultLines2 = new CrosstabTextRenderer().render(saResults.get(1)).split("\n");
assertEquals(" FIRSTNAME LASTNAME EMAIL ", resultLines2[0]);
assertEquals("Row count 23 23 23 ", resultLines2[1]);
}
}