/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.metamodel.util.FileResource;
import org.datacleaner.api.InputColumn;
import org.datacleaner.components.convert.ConvertToBooleanTransformer;
import org.datacleaner.components.maxrows.MaxRowsFilter;
import org.datacleaner.components.maxrows.MaxRowsFilter.Category;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreCatalogImpl;
import org.datacleaner.data.MutableInputColumn;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.FilterOutcome;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.builder.FilterComponentBuilder;
import org.datacleaner.job.builder.TransformerComponentBuilder;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.job.runner.AnalysisRunnerImpl;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.renderer.CrosstabTextRenderer;
import org.junit.Test;
public class BooleanAnalyzerReducerTest {
@Test
public void test() throws Throwable {
final AnalysisJobBuilder jobBuilder = getAnalysisJobBuilder();
final AnalysisJobBuilder jobBuilder1 = getAnalysisJobBuilder();
final AnalysisJobBuilder jobBuilder2 = getAnalysisJobBuilder();
final AnalysisJobBuilder jobBuilder3 = getAnalysisJobBuilder();
final BooleanAnalyzerResult fullResult = getPartialResult(jobBuilder, 1, 16);
final BooleanAnalyzerResult partialResult1 = getPartialResult(jobBuilder1, 1, 5);
final BooleanAnalyzerResult partialResult2 = getPartialResult(jobBuilder2, 6, 5);
final BooleanAnalyzerResult partialResult3 = getPartialResult(jobBuilder3, 11, 5);
{ // assert partial result1
final Crosstab<Number> partialResult1columnStatisticsCrosstab =
partialResult1.getColumnStatisticsCrosstab();
final String[] resultLines =
new CrosstabTextRenderer().render(partialResult1columnStatisticsCrosstab).split("\n");
assertEquals(5, resultLines.length);
assertEquals(" b1 (as boolean) b2 (as boolean) ", resultLines[0]);
assertEquals("Row count 5 5 ", resultLines[1]);
assertEquals("Null count 0 0 ", resultLines[2]);
assertEquals("True count 3 2 ", resultLines[3]);
assertEquals("False count 2 3 ", resultLines[4]);
final String[] resultLines1 =
new CrosstabTextRenderer().render(partialResult1.getValueCombinationCrosstab()).split("\n");
assertEquals(3, resultLines1.length);
assertEquals(" b1 (as boolean) b2 (as boolean) Frequency ", resultLines1[0]);
assertEquals("Most frequent 1 0 3 ", resultLines1[1]);
assertEquals("Least frequent 0 1 2 ", resultLines1[2]);
}
{ // assert partial result2
final Crosstab<Number> partialResult2columnStatisticsCrosstab =
partialResult2.getColumnStatisticsCrosstab();
final String[] resultLines =
new CrosstabTextRenderer().render(partialResult2columnStatisticsCrosstab).split("\n");
assertEquals(5, resultLines.length);
assertEquals(" b1 (as boolean) b2 (as boolean) ", resultLines[0]);
assertEquals("Row count 5 5 ", resultLines[1]);
assertEquals("Null count 1 0 ", resultLines[2]);
assertEquals("True count 2 3 ", resultLines[3]);
assertEquals("False count 2 2 ", resultLines[4]);
final String[] resultLines1 =
new CrosstabTextRenderer().render(partialResult2.getValueCombinationCrosstab()).split("\n");
assertEquals(5, resultLines1.length);
assertEquals(" b1 (as boolean) b2 (as boolean) Frequency ", resultLines1[0]);
assertEquals("Most frequent 1 1 2 ", resultLines1[1]);
assertEquals("Combination 1 <null> 0 1 ", resultLines1[2]);
assertEquals("Combination 2 0 1 1 ", resultLines1[3]);
assertEquals("Least frequent 0 0 1 ", resultLines1[4]);
}
{ // assert partial result3
final Crosstab<Number> partialResult3columnStatisticsCrosstab =
partialResult3.getColumnStatisticsCrosstab();
final String[] resultLines =
new CrosstabTextRenderer().render(partialResult3columnStatisticsCrosstab).split("\n");
assertEquals(5, resultLines.length);
assertEquals(" b1 (as boolean) b2 (as boolean) ", resultLines[0]);
assertEquals("Row count 5 5 ", resultLines[1]);
assertEquals("Null count 0 2 ", resultLines[2]);
assertEquals("True count 2 1 ", resultLines[3]);
assertEquals("False count 3 2 ", resultLines[4]);
final String[] resultLines1 =
new CrosstabTextRenderer().render(partialResult3.getValueCombinationCrosstab()).split("\n");
assertEquals(6, resultLines1.length);
assertEquals(" b1 (as boolean) b2 (as boolean) Frequency ", resultLines1[0]);
assertEquals("Most frequent 1 <null> 1 ", resultLines1[1]);
assertEquals("Combination 1 1 0 1 ", resultLines1[2]);
assertEquals("Combination 2 0 <null> 1 ", resultLines1[3]);
assertEquals("Combination 3 0 1 1 ", resultLines1[4]);
assertEquals("Least frequent 0 0 1 ", resultLines1[5]);
}
final List<BooleanAnalyzerResult> partialResults = new ArrayList<>();
partialResults.add(partialResult1);
partialResults.add(partialResult2);
partialResults.add(partialResult3);
final BooleanAnalyzerReducer booleanAnalyzerReducer = new BooleanAnalyzerReducer();
final BooleanAnalyzerResult reducedResults = booleanAnalyzerReducer.reduce(partialResults);
{ // assert reduced results
final Crosstab<Number> reducedResultcolumnStatisticsCrosstab = reducedResults.getColumnStatisticsCrosstab();
final String[] resultLinesReduced =
new CrosstabTextRenderer().render(reducedResultcolumnStatisticsCrosstab).split("\n");
assertEquals(5, resultLinesReduced.length);
assertEquals(" b1 (as boolean) b2 (as boolean) ", resultLinesReduced[0]);
assertEquals("Row count 15 15 ", resultLinesReduced[1]);
assertEquals("Null count 1 2 ", resultLinesReduced[2]);
assertEquals("True count 7 6 ", resultLinesReduced[3]);
assertEquals("False count 7 7 ", resultLinesReduced[4]);
final String[] resultLinesReduced1 =
new CrosstabTextRenderer().render(reducedResults.getValueCombinationCrosstab()).split("\n");
assertEquals(8, resultLinesReduced1.length);
assertEquals(" b1 (as boolean) b2 (as boolean) Frequency ", resultLinesReduced1[0]);
assertEquals("Most frequent 1 0 4 ", resultLinesReduced1[1]);
assertEquals("Combination 1 0 1 4 ", resultLinesReduced1[2]);
assertEquals("Combination 2 1 1 2 ", resultLinesReduced1[3]);
assertEquals("Combination 3 0 0 2 ", resultLinesReduced1[4]);
assertEquals("Combination 4 <null> 0 1 ", resultLinesReduced1[5]);
assertEquals("Combination 5 1 <null> 1 ", resultLinesReduced1[6]);
assertEquals("Least frequent 0 <null> 1 ", resultLinesReduced1[7]);
}
{ // assert full result
final Crosstab<Number> fullResultcolumnStatisticsCrosstab = fullResult.getColumnStatisticsCrosstab();
final String[] resultLinesFull =
new CrosstabTextRenderer().render(fullResultcolumnStatisticsCrosstab).split("\n");
assertEquals(5, resultLinesFull.length);
assertEquals(" b1 (as boolean) b2 (as boolean) ", resultLinesFull[0]);
assertEquals("Row count 15 15 ", resultLinesFull[1]);
assertEquals("Null count 1 2 ", resultLinesFull[2]);
assertEquals("True count 7 6 ", resultLinesFull[3]);
assertEquals("False count 7 7 ", resultLinesFull[4]);
final String[] resultLines =
new CrosstabTextRenderer().render(fullResult.getValueCombinationCrosstab()).split("\n");
assertEquals(8, resultLines.length);
assertEquals(" b1 (as boolean) b2 (as boolean) Frequency ", resultLines[0]);
assertEquals("Most frequent 1 0 4 ", resultLines[1]);
assertEquals("Combination 1 0 1 4 ", resultLines[2]);
assertEquals("Combination 2 1 1 2 ", resultLines[3]);
assertEquals("Combination 3 0 0 2 ", resultLines[4]);
assertEquals("Combination 4 <null> 0 1 ", resultLines[5]);
assertEquals("Combination 5 1 <null> 1 ", resultLines[6]);
assertEquals("Least frequent 0 <null> 1 ", resultLines[7]);
}
}
@SuppressWarnings("unchecked")
private BooleanAnalyzerResult getPartialResult(final AnalysisJobBuilder jobBuilder, final Integer firstRow,
final Integer maxRows) throws Throwable {
final InputColumn<?>[] inputColumns = jobBuilder.getSourceColumns().toArray(new InputColumn[2]);
final FilterComponentBuilder<MaxRowsFilter, Category> maxRowsFilter = jobBuilder.addFilter(MaxRowsFilter.class);
maxRowsFilter.addInputColumn(inputColumns[0]);
if (firstRow != null) {
maxRowsFilter.setConfiguredProperty("First row", firstRow);
}
if (maxRows != null) {
maxRowsFilter.setConfiguredProperty("Max rows", maxRows);
}
maxRowsFilter.setConfiguredProperty("Order column", null);
final TransformerComponentBuilder<ConvertToBooleanTransformer> convertToBoolean =
jobBuilder.addTransformer(ConvertToBooleanTransformer.class);
convertToBoolean.addInputColumns(inputColumns);
final FilterOutcome filterOutcome = maxRowsFilter.getFilterOutcome(MaxRowsFilter.Category.VALID);
convertToBoolean.setRequirement(filterOutcome);
final AnalyzerComponentBuilder<BooleanAnalyzer> booleanAnalyzerBuilder =
jobBuilder.addAnalyzer(BooleanAnalyzer.class);
final BooleanAnalyzer booleanAnalyzer = booleanAnalyzerBuilder.getComponentInstance();
booleanAnalyzer._columns = convertToBoolean.getOutputColumns().toArray(new MutableInputColumn[2]);
final AnalysisJob analysisJob = jobBuilder.toAnalysisJob();
jobBuilder.close();
final AnalysisResultFuture resultFuture =
new AnalysisRunnerImpl(jobBuilder.getConfiguration()).run(analysisJob);
resultFuture.await();
if (resultFuture.isErrornous()) {
throw resultFuture.getErrors().get(0);
}
return resultFuture.getResults(BooleanAnalyzerResult.class).get(0);
}
private AnalysisJobBuilder getAnalysisJobBuilder() {
final File file = new File("src/test/resources/testBooleanAnalyzer.txt");
assertTrue(file.exists());
final Datastore datastore = new CsvDatastore("test", new FileResource(file));
final DataCleanerConfigurationImpl configuration =
new DataCleanerConfigurationImpl().withDatastoreCatalog(new DatastoreCatalogImpl(datastore));
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore(datastore);
jobBuilder.addSourceColumns("b1");
jobBuilder.addSourceColumns("b2");
return jobBuilder;
}
}