/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.test.full.scenarios;
import static org.junit.Assert.*;
import java.util.List;
import org.apache.commons.lang.SerializationUtils;
import org.datacleaner.api.OutputDataStream;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.configuration.DataCleanerEnvironment;
import org.datacleaner.connection.Datastore;
import org.datacleaner.data.MetaModelInputColumn;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.AnalyzerJob;
import org.datacleaner.job.OutputDataStreamJob;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.job.runner.AnalysisRunnerImpl;
import org.datacleaner.result.ListResult;
import org.datacleaner.result.SimpleAnalysisResult;
import org.datacleaner.test.MockAnalyzer;
import org.datacleaner.test.MockOutputDataStreamAnalyzer;
import org.datacleaner.test.TestEnvironment;
import org.datacleaner.test.TestHelper;
import org.junit.Test;
/**
* Basic acceptance test for DC issue #224: Output DataSet (renamed to 'data
* stream') producers and jobs. This test uses the additions to the builder API
* to build a job with {@link OutputDataStream}s and executes it to verify the
* invocation and completion of the {@link OutputDataStreamJob}s.
*/
public class JobWithOutputDataStreamsTest {
private final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb");
private DataCleanerEnvironment environment = TestEnvironment.getEnvironment();
private final DataCleanerConfiguration configuration =
new DataCleanerConfigurationImpl().withDatastores(datastore).withEnvironment(environment);
@Test(timeout = 30 * 1000)
public void testSimpleBuildAndExecuteScenario() throws Throwable {
final AnalysisJob job;
try (AnalysisJobBuilder ajb = new AnalysisJobBuilder(configuration)) {
ajb.setDatastore(datastore);
ajb.addSourceColumns("customers.contactfirstname");
ajb.addSourceColumns("customers.contactlastname");
ajb.addSourceColumns("customers.city");
final AnalyzerComponentBuilder<MockOutputDataStreamAnalyzer> analyzer1 =
ajb.addAnalyzer(MockOutputDataStreamAnalyzer.class);
// analyzer is still unconfigured
assertEquals(0, analyzer1.getOutputDataStreams().size());
// now configure it
final List<MetaModelInputColumn> sourceColumns = ajb.getSourceColumns();
analyzer1.setName("analyzer1");
analyzer1.addInputColumn(sourceColumns.get(0));
assertTrue(analyzer1.isConfigured());
final List<OutputDataStream> dataStreams = analyzer1.getOutputDataStreams();
assertEquals(2, dataStreams.size());
assertEquals("foo bar records", dataStreams.get(0).getName());
assertEquals("counter records", dataStreams.get(1).getName());
final OutputDataStream dataStream = analyzer1.getOutputDataStream("foo bar records");
// assert that the same instance is reused when re-referred to
assertSame(dataStreams.get(0), dataStream);
// the stream is still not "consumed" yet
assertFalse(analyzer1.isOutputDataStreamConsumed(dataStream));
final AnalysisJobBuilder outputDataStreamJobBuilder = analyzer1.getOutputDataStreamJobBuilder(dataStream);
final List<MetaModelInputColumn> outputDataStreamColumns = outputDataStreamJobBuilder.getSourceColumns();
assertEquals(2, outputDataStreamColumns.size());
assertEquals("MetaModelInputColumn[foo bar records.foo]", outputDataStreamColumns.get(0).toString());
assertEquals("MetaModelInputColumn[foo bar records.bar]", outputDataStreamColumns.get(1).toString());
// the stream is still not "consumed" because no components exist in
// the output stream
assertFalse(analyzer1.isOutputDataStreamConsumed(dataStream));
final AnalyzerComponentBuilder<MockAnalyzer> analyzer2 =
outputDataStreamJobBuilder.addAnalyzer(MockAnalyzer.class);
analyzer2.addInputColumns(outputDataStreamColumns);
analyzer2.setName("analyzer2");
assertTrue(analyzer2.isConfigured());
// now the stream is consumed
assertTrue(analyzer1.isOutputDataStreamConsumed(dataStream));
job = ajb.toAnalysisJob();
}
// do some assertions on the built job to check that the data stream is
// represented there also
assertEquals(1, job.getAnalyzerJobs().size());
final AnalyzerJob analyzerJob1 = job.getAnalyzerJobs().get(0);
assertEquals("analyzer1", analyzerJob1.getName());
final OutputDataStreamJob[] outputDataStreamJobs = analyzerJob1.getOutputDataStreamJobs();
assertEquals(1, outputDataStreamJobs.length);
final OutputDataStreamJob outputDataStreamJob = outputDataStreamJobs[0];
assertEquals("foo bar records", outputDataStreamJob.getOutputDataStream().getName());
final AnalysisJob job2 = outputDataStreamJob.getJob();
assertEquals(2, job2.getSourceColumns().size());
assertEquals("foo", job2.getSourceColumns().get(0).getName());
assertEquals("bar", job2.getSourceColumns().get(1).getName());
assertEquals(1, job2.getAnalyzerJobs().size());
final AnalyzerJob analyzerJob2 = job2.getAnalyzerJobs().get(0);
assertEquals("analyzer2", analyzerJob2.getName());
// now run the job(s)
final AnalysisRunnerImpl runner = new AnalysisRunnerImpl(configuration);
final AnalysisResultFuture resultFuture = runner.run(job);
resultFuture.await();
if (resultFuture.isErrornous()) {
throw resultFuture.getErrors().get(0);
}
assertEquals(2, resultFuture.getResults().size());
final byte[] serialized = SerializationUtils.serialize(new SimpleAnalysisResult(resultFuture.getResultMap()));
final SimpleAnalysisResult deSerializedResult =
(SimpleAnalysisResult) SerializationUtils.deserialize(serialized);
// the first result should be trivial - it was also there before issue
// #224
final ListResult<?> result1 = (ListResult<?>) deSerializedResult.getResult(analyzerJob1);
assertNotNull(result1);
assertEquals(71, result1.getValues().size());
// this result is the "new part" of issue #224
final ListResult<?> result2 = (ListResult<?>) deSerializedResult.getResult(analyzerJob2);
assertNotNull(result2);
assertEquals(144, result2.getValues().size());
final Object lastElement = result2.getValues().get(result2.getValues().size() - 1);
assertEquals("MetaModelInputRow[Row[values=[baz, null]]]", lastElement.toString());
}
}