/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.cluster;
import java.util.List;
import org.datacleaner.cluster.virtual.VirtualClusterManager;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.connection.Datastore;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.test.TestHelper;
import junit.framework.TestCase;
public class DistributedAnalysisRunnerTest extends TestCase {
public void testNoRecords() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
ClusterTestHelper.runNoExpectedRecordsJob(configuration);
}
public void testCancel() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
ClusterTestHelper.runCancelJobJob(configuration, new VirtualClusterManager(configuration, 2));
}
public void testExistingMaxRowsScenario() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
ClusterTestHelper.runExistingMaxRowsJob(configuration, new VirtualClusterManager(configuration, 2));
}
public void testVanillaScenarioSingleSlave() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
ClusterTestHelper.runConcatAndInsertJob(configuration, new VirtualClusterManager(configuration, 1));
}
public void testVanillaScenarioFourSlaves() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
ClusterTestHelper.runConcatAndInsertJob(configuration, new VirtualClusterManager(configuration, 4));
}
public void testRunCompletenessAnalyzer() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
// run with only a single node to verify a baseline scenario
ClusterTestHelper
.runCompletenessAndValueMatcherAnalyzerJob(configuration, new VirtualClusterManager(configuration, 1));
ClusterTestHelper
.runCompletenessAndValueMatcherAnalyzerJob(configuration, new VirtualClusterManager(configuration, 10));
ClusterTestHelper
.runCompletenessAndValueMatcherAnalyzerJob(configuration, new VirtualClusterManager(configuration, 3));
}
public void testRunBasicAnalyzers() throws Throwable {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
// run with only a single node to verify a baseline scenario
ClusterTestHelper.runBasicAnalyzersJob(configuration, new VirtualClusterManager(configuration, 1));
ClusterTestHelper.runBasicAnalyzersJob(configuration, new VirtualClusterManager(configuration, 6));
ClusterTestHelper.runBasicAnalyzersJob(configuration, new VirtualClusterManager(configuration, 10));
}
public void testErrorHandlingSingleSlave() throws Exception {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), false);
final List<Throwable> errors =
ClusterTestHelper.runErrorHandlingJob(configuration, new VirtualClusterManager(configuration, 1));
assertEquals("I am just a dummy transformer!", errors.get(0).getMessage());
assertEquals("A previous exception has occurred", errors.get(1).getMessage());
assertEquals(2, errors.size());
}
public void testErrorHandlingFourSlaves() throws Exception {
final DataCleanerConfiguration configuration = ClusterTestHelper.createConfiguration(getName(), true);
final List<Throwable> errors =
ClusterTestHelper.runErrorHandlingJob(configuration, new VirtualClusterManager(configuration, 4));
for (final Throwable throwable : errors) {
final String message = throwable.getMessage();
if (!"I am just a dummy transformer!".equals(message) && !"A previous exception has occurred"
.equals(message)) {
fail("Unexpected exception: " + message + " (" + throwable.getClass().getName() + ")");
}
}
// there might be (a lot) more than 8 errors since each node was
// multi-threaded
assertTrue(errors.size() >= 8);
}
public void testUndistributableAnalyzer() throws Exception {
final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb");
final DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withDatastores(datastore);
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore(datastore);
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNAME");
// The String Analyzer is (currently) not distributable
final AnalyzerComponentBuilder<MockAnalyzerWithoutReducer> analyzer =
jobBuilder.addAnalyzer(MockAnalyzerWithoutReducer.class);
analyzer.addInputColumns(jobBuilder.getSourceColumns());
final AnalysisJob job = jobBuilder.toAnalysisJob();
final DistributedAnalysisRunner runner =
new DistributedAnalysisRunner(configuration, new VirtualClusterManager(configuration, 2));
try {
runner.run(job);
fail("Exception expected");
} catch (final UnsupportedOperationException e) {
assertEquals("Job is not distributable!", e.getMessage());
} finally {
jobBuilder.close();
}
}
public void testErrorHandlingInReductionPhase() throws Exception {
final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb");
final DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withDatastores(datastore);
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore(datastore);
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNAME");
// The String Analyzer is (currently) not distributable
final AnalyzerComponentBuilder<MockAnalyzerWithBadReducer> analyzer =
jobBuilder.addAnalyzer(MockAnalyzerWithBadReducer.class);
analyzer.addInputColumns(jobBuilder.getSourceColumns());
final AnalysisJob job = jobBuilder.toAnalysisJob();
jobBuilder.close();
final DistributedAnalysisRunner runner =
new DistributedAnalysisRunner(configuration, new VirtualClusterManager(configuration, 2));
final AnalysisResultFuture result = runner.run(job);
if (result.isSuccessful()) {
fail("Expected result to be erroneous. Got result: " + result.getResults());
}
final List<Throwable> errors = result.getErrors();
assertEquals("Failed to reduce results for ImmutableAnalyzerJob[name=null,analyzer=Analyzer with bad reducer]: "
+ "Damn, I failed during reduction phase", errors.get(0).getMessage());
assertEquals(1, errors.size());
}
}