/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.test.full.scenarios; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import javax.inject.Named; import org.apache.metamodel.schema.Column; import org.datacleaner.api.Analyzer; import org.datacleaner.api.Close; import org.datacleaner.api.Configured; import org.datacleaner.api.InputColumn; import org.datacleaner.api.InputRow; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.configuration.DataCleanerEnvironment; import org.datacleaner.configuration.DataCleanerEnvironmentImpl; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.SchemaNavigator; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.concurrent.PreviousErrorsExistException; import org.datacleaner.job.runner.AnalysisJobFailedException; import org.datacleaner.job.runner.AnalysisResultFuture; import org.datacleaner.job.runner.AnalysisRunnerImpl; import org.datacleaner.result.NumberResult; import org.datacleaner.test.ActivityAwareMultiThreadedTaskRunner; import org.datacleaner.test.TestHelper; import org.datacleaner.util.CollectionUtils2; import junit.framework.AssertionFailedError; import junit.framework.TestCase; /** * Tests that a job where one of the row processing consumers fail is gracefully * error handled. * * */ public class ErrorInRowProcessingConsumerTest extends TestCase { @Named("Errornous analyzer") public static class ErrornousAnalyzer implements Analyzer<NumberResult> { private final AtomicInteger counter = new AtomicInteger(0); @Configured InputColumn<String> inputColumn; @Override public NumberResult getResult() { return new NumberResult(counter.get()); } @Override public void run(final InputRow row, final int distinctCount) { assertNotNull(inputColumn); assertNotNull(row); assertEquals(1, distinctCount); final String value = row.getValue(inputColumn); assertNotNull(value); final int count = counter.incrementAndGet(); if (count == 3) { throw new IllegalStateException("This analyzer can only analyze two rows!"); } } @Close public void close() { closed.set(true); } } private static final AtomicBoolean closed = new AtomicBoolean(); public void testScenario() throws Exception { closed.set(false); final ActivityAwareMultiThreadedTaskRunner taskRunner = new ActivityAwareMultiThreadedTaskRunner(); final Datastore datastore = TestHelper.createSampleDatabaseDatastore("my db"); final DataCleanerEnvironment environment = new DataCleanerEnvironmentImpl().withTaskRunner(taskRunner); final DataCleanerConfiguration conf = new DataCleanerConfigurationImpl().withDatastores(datastore).withEnvironment(environment); final AnalysisJob job; try (AnalysisJobBuilder ajb = new AnalysisJobBuilder(conf)) { ajb.setDatastore(datastore); final SchemaNavigator schemaNavigator = datastore.openConnection().getSchemaNavigator(); final Column column = schemaNavigator.convertToColumn("PUBLIC.EMPLOYEES.EMAIL"); assertNotNull(column); ajb.addSourceColumn(column); ajb.addAnalyzer(ErrornousAnalyzer.class).addInputColumn(ajb.getSourceColumns().get(0)); job = ajb.toAnalysisJob(); } final AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(conf).run(job); assertTrue(resultFuture.isErrornous()); // isErrornous should be blocking assertTrue(resultFuture.isDone()); try { resultFuture.getResults(); fail("Exception expected"); } catch (final AnalysisJobFailedException e) { final String message = e.getMessage(); assertEquals("The analysis ended with 2 errors: [" + "IllegalStateException: This analyzer can only analyze two rows!," + "PreviousErrorsExistException: A previous exception has occurred]", message); } List<Throwable> errors = resultFuture.getErrors(); // the amount of errors may vary depending on the thread scheduling final int numErrors = errors.size(); assertTrue(numErrors == 2 || numErrors == 3); // sort the errors to make the order deterministic errors = CollectionUtils2.sorted(errors, (o1, o2) -> o1.getClass().getName().compareTo(o2.getClass().getName())); assertEquals(IllegalStateException.class, errors.get(0).getClass()); assertEquals("This analyzer can only analyze two rows!", errors.get(0).getMessage()); assertTrue(numErrors + " errors found, 2 or 3 expected!", numErrors == 2 || numErrors == 3); if (numErrors == 3) { // this is caused by the assertion // ("assertEquals(1, distinctCount);") // above assertEquals(AssertionFailedError.class, errors.get(1).getClass()); assertEquals("expected:<1> but was:<2>", errors.get(1).getMessage()); assertEquals(PreviousErrorsExistException.class, errors.get(2).getClass()); assertEquals("A previous exception has occurred", errors.get(2).getMessage()); } else { assertEquals(PreviousErrorsExistException.class, errors.get(1).getClass()); assertEquals("A previous exception has occurred", errors.get(1).getMessage()); } final int taskCount = taskRunner.assertAllBegunTasksFinished(500); assertTrue("taskCount was: " + taskCount, taskCount > 4); assertTrue(closed.get()); } }