/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.cluster;
import java.util.List;
import java.util.Map;
import org.eobjects.analyzer.beans.CompletenessAnalyzer;
import org.eobjects.analyzer.beans.CompletenessAnalyzer.Condition;
import org.eobjects.analyzer.beans.CompletenessAnalyzerResult;
import org.eobjects.analyzer.beans.NumberAnalyzer;
import org.eobjects.analyzer.beans.NumberAnalyzerResult;
import org.eobjects.analyzer.beans.StringAnalyzer;
import org.eobjects.analyzer.beans.StringAnalyzerResult;
import org.eobjects.analyzer.beans.filter.EqualsFilter;
import org.eobjects.analyzer.beans.filter.MaxRowsFilter;
import org.eobjects.analyzer.beans.filter.MaxRowsFilter.Category;
import org.eobjects.analyzer.beans.filter.ValidationCategory;
import org.eobjects.analyzer.beans.transform.ConcatenatorTransformer;
import org.eobjects.analyzer.beans.valuematch.ValueMatchAnalyzer;
import org.eobjects.analyzer.beans.valuematch.ValueMatchAnalyzerResult;
import org.eobjects.analyzer.beans.writers.InsertIntoTableAnalyzer;
import org.eobjects.analyzer.beans.writers.WriteBufferSizeOption;
import org.eobjects.analyzer.configuration.AnalyzerBeansConfiguration;
import org.eobjects.analyzer.configuration.AnalyzerBeansConfigurationImpl;
import org.eobjects.analyzer.connection.Datastore;
import org.eobjects.analyzer.connection.DatastoreCatalog;
import org.eobjects.analyzer.connection.DatastoreCatalogImpl;
import org.eobjects.analyzer.connection.DatastoreConnection;
import org.eobjects.analyzer.connection.JdbcDatastore;
import org.eobjects.analyzer.connection.UpdateableDatastoreConnection;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.data.MetaModelInputColumn;
import org.eobjects.analyzer.descriptors.Descriptors;
import org.eobjects.analyzer.descriptors.SimpleDescriptorProvider;
import org.eobjects.analyzer.job.AnalysisJob;
import org.eobjects.analyzer.job.ComponentJob;
import org.eobjects.analyzer.job.builder.AnalysisJobBuilder;
import org.eobjects.analyzer.job.builder.AnalyzerJobBuilder;
import org.eobjects.analyzer.job.builder.FilterJobBuilder;
import org.eobjects.analyzer.job.builder.TransformerJobBuilder;
import org.eobjects.analyzer.job.concurrent.MultiThreadedTaskRunner;
import org.eobjects.analyzer.job.concurrent.SingleThreadedTaskRunner;
import org.eobjects.analyzer.job.concurrent.TaskRunner;
import org.eobjects.analyzer.job.runner.AnalysisResultFuture;
import org.eobjects.analyzer.job.runner.JobStatus;
import org.eobjects.analyzer.result.AnalyzerResult;
import org.eobjects.analyzer.test.TestHelper;
import org.apache.metamodel.UpdateCallback;
import org.apache.metamodel.UpdateScript;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.Schema;
import org.junit.Assert;
public class ClusterTestHelper {
/**
* Creates a {@link AnalyzerBeansConfiguration} object (based on a few
* parameters), typically to use in test methods of this class.
*
* @param testName
* @param multiThreaded
* @return
*/
public static AnalyzerBeansConfiguration createConfiguration(String testName, boolean multiThreaded) {
final JdbcDatastore csvDatastore = new JdbcDatastore("csv", "jdbc:h2:mem:" + testName, "org.h2.Driver", "SA",
"", true);
final UpdateableDatastoreConnection con = csvDatastore.openConnection();
con.getUpdateableDataContext().executeUpdate(new UpdateScript() {
@Override
public void run(UpdateCallback callback) {
Schema schema = callback.getDataContext().getDefaultSchema();
if (schema.getTableByName("testtable") != null) {
return;
}
callback.createTable(schema, "testtable").withColumn("id").ofType(ColumnType.INTEGER).asPrimaryKey()
.withColumn("name").ofType(ColumnType.VARCHAR).execute();
}
});
con.close();
final Datastore databaseDatastore = TestHelper.createSampleDatabaseDatastore("orderdb");
final DatastoreCatalog datastoreCatalog = new DatastoreCatalogImpl(databaseDatastore, csvDatastore);
final TaskRunner taskRunner;
if (multiThreaded) {
taskRunner = new MultiThreadedTaskRunner(20);
} else {
taskRunner = new SingleThreadedTaskRunner();
}
final SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider(true);
descriptorProvider.addFilterBeanDescriptor(Descriptors.ofFilter(MaxRowsFilter.class));
descriptorProvider.addTransformerBeanDescriptor(Descriptors.ofTransformer(MockTransformerThatWillFail.class));
descriptorProvider.addTransformerBeanDescriptor(Descriptors.ofTransformer(ConcatenatorTransformer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(InsertIntoTableAnalyzer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(CompletenessAnalyzer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(ValueMatchAnalyzer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(MockAnalyzerWithBadReducer.class));
final AnalyzerBeansConfiguration configuration = new AnalyzerBeansConfigurationImpl().replace(taskRunner)
.replace(datastoreCatalog).replace(descriptorProvider);
return configuration;
}
/**
* Runs a job that verifies that errors (caused by the
* {@link MockTransformerThatWillFail} dummy component) are picked up
* correctly from the slave nodes.
*
* @param configuration
* @param virtualClusterManager
* @return the list of errors returned, to perform further assertions
*/
public static List<Throwable> runErrorHandlingJob(AnalyzerBeansConfiguration configuration,
ClusterManager clusterManager) {
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER");
final TransformerJobBuilder<MockTransformerThatWillFail> transformer = jobBuilder
.addTransformer(MockTransformerThatWillFail.class);
transformer.addInputColumns(jobBuilder.getSourceColumns());
final AnalyzerJobBuilder<CompletenessAnalyzer> analyzer = jobBuilder.addAnalyzer(CompletenessAnalyzer.class);
analyzer.addInputColumns(transformer.getOutputColumns());
analyzer.setConfiguredProperty("Conditions",
new CompletenessAnalyzer.Condition[] { CompletenessAnalyzer.Condition.NOT_BLANK_OR_NULL });
// build the job
final AnalysisJob job = jobBuilder.toAnalysisJob();
// run the job in a distributed fashion
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
final AnalysisResultFuture resultFuture = runner.run(job);
switch (resultFuture.getStatus()) {
case NOT_FINISHED:
case ERRORNOUS:
break;
default:
Assert.fail("Unexpected job status: " + resultFuture.getStatus());
}
resultFuture.await();
if (resultFuture.isSuccessful()) {
Assert.fail("Job that was supposed to fail was succesful! Results: " + resultFuture.getResultMap());
}
Assert.assertEquals(JobStatus.ERRORNOUS, resultFuture.getStatus());
final List<Throwable> errors = resultFuture.getErrors();
Assert.assertNotNull(errors);
Assert.assertFalse(errors.isEmpty());
jobBuilder.close();
return errors;
}
public static void runBasicAnalyzersJob(AnalyzerBeansConfiguration configuration, ClusterManager clusterManager)
throws Throwable {
// build a job that concats names and inserts the concatenated names
// into a file
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER");
jobBuilder.addSourceColumns("CUSTOMERS.COUNTRY");
final AnalyzerJobBuilder<StringAnalyzer> stringAnalyzer = jobBuilder.addAnalyzer(StringAnalyzer.class);
stringAnalyzer.addInputColumns(jobBuilder.getAvailableInputColumns(String.class));
final AnalyzerJobBuilder<NumberAnalyzer> numberAnalyzer = jobBuilder.addAnalyzer(NumberAnalyzer.class);
numberAnalyzer.addInputColumns(jobBuilder.getAvailableInputColumns(Number.class));
final AnalysisJob job = jobBuilder.toAnalysisJob();
// run the job in a distributed fashion
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
final AnalysisResultFuture resultFuture = runner.run(job);
Assert.assertTrue(resultFuture.getStatus() == JobStatus.NOT_FINISHED
|| resultFuture.getStatus() == JobStatus.SUCCESSFUL);
jobBuilder.close();
resultFuture.await();
if (resultFuture.isErrornous()) {
List<Throwable> errors = resultFuture.getErrors();
throw errors.get(0);
}
Assert.assertEquals(JobStatus.SUCCESSFUL, resultFuture.getStatus());
final List<AnalyzerResult> results = resultFuture.getResults();
Assert.assertEquals(2, results.size());
for (AnalyzerResult analyzerResult : results) {
Assert.assertNotNull(analyzerResult);
if (analyzerResult instanceof StringAnalyzerResult) {
final StringAnalyzerResult stringAnalyzerResult = (StringAnalyzerResult) analyzerResult;
final InputColumn<String>[] columns = stringAnalyzerResult.getColumns();
Assert.assertEquals(1, columns.length);
final InputColumn<String> column = columns[0];
Assert.assertEquals("COUNTRY", column.getName());
// test reduction: various ways of aggregating crosstab metrics
// - min, max, avg, sum
Assert.assertEquals(122, stringAnalyzerResult.getRowCount(column));
Assert.assertEquals(1, stringAnalyzerResult.getMinWords(column));
Assert.assertEquals(2, stringAnalyzerResult.getMaxWords(column));
Assert.assertEquals(5.71, stringAnalyzerResult.getAvgChars(column), 0.1d);
Assert.assertEquals(697, stringAnalyzerResult.getTotalCharCount(column));
} else if (analyzerResult instanceof NumberAnalyzerResult) {
final NumberAnalyzerResult numberAnalyzerResult = (NumberAnalyzerResult) analyzerResult;
final InputColumn<? extends Number>[] columns = numberAnalyzerResult.getColumns();
Assert.assertEquals(1, columns.length);
final InputColumn<? extends Number> column = columns[0];
Assert.assertEquals("CUSTOMERNUMBER", column.getName());
Assert.assertEquals(122, numberAnalyzerResult.getRowCount(column));
Assert.assertEquals(36161.0, numberAnalyzerResult.getSum(column).doubleValue(), 0.1);
Assert.assertEquals(296.4, numberAnalyzerResult.getMean(column).doubleValue(), 0.1);
Assert.assertEquals(496, numberAnalyzerResult.getHighestValue(column).doubleValue(), 0.1);
Assert.assertEquals(103.0, numberAnalyzerResult.getLowestValue(column).doubleValue(), 0.1);
Assert.assertEquals(117.0, numberAnalyzerResult.getStandardDeviation(column).doubleValue(), 0.8);
Assert.assertEquals(null, numberAnalyzerResult.getMedian(column));
} else {
Assert.fail("Unexpected analyzer result found: " + analyzerResult);
}
}
}
public static void runCompletenessAndValueMatcherAnalyzerJob(AnalyzerBeansConfiguration configuration,
ClusterManager clusterManager) throws Throwable {
// build a job that concats names and inserts the concatenated names
// into a file
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER", "CUSTOMERS.CONTACTFIRSTNAME",
"CUSTOMERS.CONTACTLASTNAME", "CUSTOMERS.COUNTRY", "CUSTOMERS.ADDRESSLINE2");
List<MetaModelInputColumn> cols = jobBuilder.getSourceColumns();
AnalyzerJobBuilder<CompletenessAnalyzer> completeness = jobBuilder.addAnalyzer(CompletenessAnalyzer.class);
completeness.addInputColumns(cols);
Condition[] conditions = new CompletenessAnalyzer.Condition[cols.size()];
for (int i = 0; i < conditions.length; i++) {
conditions[i] = Condition.NOT_BLANK_OR_NULL;
}
completeness.setConfiguredProperty("Conditions", conditions);
AnalyzerJobBuilder<ValueMatchAnalyzer> valueMatch = jobBuilder.addAnalyzer(ValueMatchAnalyzer.class);
valueMatch.addInputColumn(jobBuilder.getSourceColumnByName("COUNTRY"));
valueMatch.setConfiguredProperty("Expected values", new String[] { "United States", "USA", "Denmark",
"Danmark", "Netherlands" });
final AnalysisJob job = jobBuilder.toAnalysisJob();
// run the job in a distributed fashion
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
final AnalysisResultFuture resultFuture = runner.run(job);
Assert.assertEquals(JobStatus.NOT_FINISHED, resultFuture.getStatus());
jobBuilder.close();
resultFuture.await();
if (resultFuture.isErrornous()) {
List<Throwable> errors = resultFuture.getErrors();
throw errors.get(0);
}
Assert.assertEquals(JobStatus.SUCCESSFUL, resultFuture.getStatus());
final List<AnalyzerResult> results = resultFuture.getResults();
Assert.assertEquals(2, results.size());
for (AnalyzerResult analyzerResult : results) {
Assert.assertNotNull(analyzerResult);
if (analyzerResult instanceof CompletenessAnalyzerResult) {
// Check completeness analyzer result
CompletenessAnalyzerResult completenessAnalyzerResult = (CompletenessAnalyzerResult) analyzerResult;
Assert.assertEquals(109, completenessAnalyzerResult.getInvalidRowCount());
InputRow[] rows = completenessAnalyzerResult.getRows();
Assert.assertNotNull(rows);
Assert.assertTrue("No annotated rows available in CompletenessAnalyzer's result", rows.length > 0);
} else if (analyzerResult instanceof ValueMatchAnalyzerResult) {
ValueMatchAnalyzerResult valueMatchAnalyzerResult = (ValueMatchAnalyzerResult) analyzerResult;
Assert.assertEquals(0, valueMatchAnalyzerResult.getNullCount());
Assert.assertEquals(83, valueMatchAnalyzerResult.getUnexpectedValueCount().intValue());
InputRow[] rows = valueMatchAnalyzerResult.getAnnotatedRowsForUnexpectedValues().getRows();
Assert.assertTrue(rows.length > 0);
Assert.assertTrue(rows.length <= 83);
Assert.assertEquals(2, valueMatchAnalyzerResult.getCount("Denmark").intValue());
rows = valueMatchAnalyzerResult.getAnnotatedRowsForValue("Denmark").getRows();
Assert.assertEquals(2, rows.length);
for (InputRow row : rows) {
String rowString = row.toString();
boolean assert1 = rowString
.equals("MetaModelInputRow[Row[values=[145, Jytte, Petersen, Denmark, null]]]");
boolean assert2 = rowString
.equals("MetaModelInputRow[Row[values=[227, Palle, Ibsen, Denmark, null]]]");
Assert.assertTrue("Unexpected 'Denmark' row: " + rowString, assert1 || assert2);
}
} else {
Assert.fail("Unexpected analyzer result found: " + analyzerResult);
}
}
}
public static void runExistingMaxRowsJob(AnalyzerBeansConfiguration configuration, ClusterManager clusterManager)
throws Throwable {
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER", "CUSTOMERS.CONTACTFIRSTNAME",
"CUSTOMERS.CONTACTLASTNAME");
final InputColumn<?> col1 = jobBuilder.getSourceColumnByName("CONTACTFIRSTNAME");
final InputColumn<?> col2 = jobBuilder.getSourceColumnByName("CONTACTLASTNAME");
final FilterJobBuilder<MaxRowsFilter, Category> filter = jobBuilder.addFilter(MaxRowsFilter.class);
filter.getComponentInstance().setFirstRow(5);
filter.getComponentInstance().setMaxRows(20);
final AnalyzerJobBuilder<StringAnalyzer> analyzer = jobBuilder.addAnalyzer(StringAnalyzer.class);
analyzer.addInputColumn(col1);
analyzer.addInputColumn(col2);
analyzer.setRequirement(filter, MaxRowsFilter.Category.VALID);
final AnalysisJob job = jobBuilder.toAnalysisJob();
jobBuilder.close();
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
try {
runner.run(job);
Assert.fail("Exception expected");
} catch (Exception e) {
Assert.assertEquals("Component is not distributable: ImmutableFilterJob[name=null,filter=Max rows]",
e.getMessage());
}
}
/**
* Runs a simple job that is fully distributable and should be able to
* execute in all contexts. The job does one transformation (concatenates
* two fields) and inserts this field, together with a source field, into
* another table.
*
* @param configuration
* @param clusterManager
* @throws Throwable
*/
public static void runConcatAndInsertJob(AnalyzerBeansConfiguration configuration, ClusterManager clusterManager)
throws Throwable {
// build a job that concats names and inserts the concatenated names
// into a file
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER", "CUSTOMERS.CONTACTFIRSTNAME",
"CUSTOMERS.CONTACTLASTNAME");
// concatenate firstname + lastname
final TransformerJobBuilder<ConcatenatorTransformer> concatenator = jobBuilder
.addTransformer(ConcatenatorTransformer.class);
concatenator.addInputColumn(jobBuilder.getSourceColumnByName("CONTACTFIRSTNAME"));
concatenator.addInputColumn(jobBuilder.getSourceColumnByName("CONTACTLASTNAME"));
concatenator.setConfiguredProperty("Separator", " ");
// insert into CSV file
final Datastore csvDatastore = configuration.getDatastoreCatalog().getDatastore("csv");
final Datastore dbDatastore = configuration.getDatastoreCatalog().getDatastore("orderdb");
final DatastoreConnection csvCon = csvDatastore.openConnection();
final DatastoreConnection dbCon = dbDatastore.openConnection();
try {
final Schema schema = csvCon.getDataContext().getDefaultSchema();
final String schemaName = schema.getName();
final String tableName = schema.getTable(0).getName();
final AnalyzerJobBuilder<InsertIntoTableAnalyzer> insert = jobBuilder
.addAnalyzer(InsertIntoTableAnalyzer.class);
insert.setConfiguredProperty("Datastore", csvDatastore);
insert.addInputColumn(jobBuilder.getSourceColumnByName("CUSTOMERNUMBER"));
insert.addInputColumn(concatenator.getOutputColumns().get(0));
insert.setConfiguredProperty("Schema name", schemaName);
insert.setConfiguredProperty("Table name", tableName);
insert.setConfiguredProperty("Column names", new String[] { "id", "name" });
insert.setConfiguredProperty("Buffer size", WriteBufferSizeOption.TINY);
// build the job
final AnalysisJob job = jobBuilder.toAnalysisJob();
// run the job in a distributed fashion
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
final AnalysisResultFuture resultFuture = runner.run(job);
Assert.assertEquals(JobStatus.NOT_FINISHED, resultFuture.getStatus());
resultFuture.await();
if (resultFuture.isErrornous()) {
List<Throwable> errors = resultFuture.getErrors();
throw errors.get(0);
}
Assert.assertEquals(JobStatus.SUCCESSFUL, resultFuture.getStatus());
// check that the file created has the same amount of records as the
// CUSTOMER table of orderdb.
DataSet ds1 = dbCon.getDataContext().query().from("CUSTOMERS").selectCount().execute();
DataSet ds2 = csvCon.getDataContext().query().from(tableName).selectCount().execute();
try {
Assert.assertTrue(ds1.next());
Assert.assertTrue(ds2.next());
Assert.assertEquals(ds1.getRow().toString(), ds2.getRow().toString());
} finally {
ds1.close();
ds2.close();
}
// await multiple times to ensure that second time isn't distorting
// the result
resultFuture.await();
resultFuture.await();
// check that the analysis result elements are there...
final Map<ComponentJob, AnalyzerResult> resultMap = resultFuture.getResultMap();
Assert.assertEquals(1, resultMap.size());
Assert.assertEquals("{ImmutableAnalyzerJob[name=null,analyzer=Insert into table]=122 inserts executed}",
resultMap.toString());
} finally {
dbCon.close();
csvCon.close();
jobBuilder.close();
}
}
public static void runNoExpectedRecordsJob(AnalyzerBeansConfiguration configuration) throws Throwable {
final AnalysisJob job;
{
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
try {
// build a job that concats names and inserts the concatenated
// names
// into a file
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER", "CUSTOMERS.CONTACTFIRSTNAME",
"CUSTOMERS.CONTACTLASTNAME");
final FilterJobBuilder<EqualsFilter, ValidationCategory> equalsFilter = jobBuilder
.addFilter(EqualsFilter.class);
equalsFilter.addInputColumn(jobBuilder.getSourceColumnByName("CUSTOMERNUMBER"));
equalsFilter.getComponentInstance().setValues(new String[] { "-1000000" });
final AnalyzerJobBuilder<StringAnalyzer> stringAnalyzer = jobBuilder.addAnalyzer(StringAnalyzer.class);
stringAnalyzer.addInputColumns(jobBuilder.getAvailableInputColumns(String.class));
stringAnalyzer.setRequirement(equalsFilter, ValidationCategory.VALID);
job = jobBuilder.toAnalysisJob();
} finally {
jobBuilder.close();
}
}
final DistributedAnalysisRunner analysisRunner = new DistributedAnalysisRunner(configuration,
new ClusterManager() {
@Override
public JobDivisionManager getJobDivisionManager() {
throw new IllegalStateException(
"Since this job should yield 0 expected records, this method should not be invoked");
}
@Override
public AnalysisResultFuture dispatchJob(AnalysisJob job, DistributedJobContext context)
throws Exception {
throw new IllegalStateException(
"Since this job should yield 0 expected records, this method should not be invoked");
}
});
final AnalysisResultFuture resultFuture = analysisRunner.run(job);
resultFuture.await();
if (resultFuture.isErrornous()) {
throw resultFuture.getErrors().get(0);
}
final List<AnalyzerResult> results = resultFuture.getResults();
Assert.assertEquals(1, results.size());
final AnalyzerResult analyzerResult = results.get(0);
Assert.assertTrue(analyzerResult instanceof StringAnalyzerResult);
}
public static void runCancelJobJob(AnalyzerBeansConfiguration configuration, ClusterManager clusterManager)
throws Throwable {
// build a job that concats names and inserts the concatenated names
// into a file
final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration);
jobBuilder.setDatastore("orderdb");
jobBuilder.addSourceColumns("CUSTOMERS.CUSTOMERNUMBER", "CUSTOMERS.CONTACTFIRSTNAME",
"CUSTOMERS.CONTACTLASTNAME");
// concatenate firstname + lastname
final TransformerJobBuilder<ConcatenatorTransformer> concatenator = jobBuilder
.addTransformer(ConcatenatorTransformer.class);
concatenator.addInputColumn(jobBuilder.getSourceColumnByName("CONTACTFIRSTNAME"));
concatenator.addInputColumn(jobBuilder.getSourceColumnByName("CONTACTLASTNAME"));
concatenator.setConfiguredProperty("Separator", " ");
// insert into CSV file
final Datastore csvDatastore = configuration.getDatastoreCatalog().getDatastore("csv");
final Datastore dbDatastore = configuration.getDatastoreCatalog().getDatastore("orderdb");
final DatastoreConnection csvCon = csvDatastore.openConnection();
final DatastoreConnection dbCon = dbDatastore.openConnection();
try {
final Schema schema = csvCon.getDataContext().getDefaultSchema();
final String schemaName = schema.getName();
final String tableName = schema.getTable(0).getName();
final AnalyzerJobBuilder<InsertIntoTableAnalyzer> insert = jobBuilder
.addAnalyzer(InsertIntoTableAnalyzer.class);
insert.setConfiguredProperty("Datastore", csvDatastore);
insert.addInputColumn(jobBuilder.getSourceColumnByName("CUSTOMERNUMBER"));
insert.addInputColumn(concatenator.getOutputColumns().get(0));
insert.setConfiguredProperty("Schema name", schemaName);
insert.setConfiguredProperty("Table name", tableName);
insert.setConfiguredProperty("Column names", new String[] { "id", "name" });
insert.setConfiguredProperty("Buffer size", WriteBufferSizeOption.TINY);
// build the job
final AnalysisJob job = jobBuilder.toAnalysisJob();
// run the job in a distributed fashion
final DistributedAnalysisRunner runner = new DistributedAnalysisRunner(configuration, clusterManager);
final AnalysisResultFuture resultFuture = runner.run(job);
resultFuture.cancel();
Assert.assertTrue(resultFuture.isCancelled());
} finally {
dbCon.close();
csvCon.close();
jobBuilder.close();
}
}
}