/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.job;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import javax.xml.datatype.DatatypeFactory;
import junit.framework.TestCase;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.FileHelper;
import org.easymock.EasyMock;
import org.eobjects.analyzer.beans.StringAnalyzer;
import org.eobjects.analyzer.beans.dategap.DateGapAnalyzer;
import org.eobjects.analyzer.beans.filter.NullCheckFilter;
import org.eobjects.analyzer.beans.filter.SingleWordFilter;
import org.eobjects.analyzer.beans.filter.ValidationCategory;
import org.eobjects.analyzer.beans.standardize.EmailStandardizerTransformer;
import org.eobjects.analyzer.beans.stringpattern.PatternFinderAnalyzer;
import org.eobjects.analyzer.beans.transform.ConcatenatorTransformer;
import org.eobjects.analyzer.configuration.AnalyzerBeansConfiguration;
import org.eobjects.analyzer.configuration.AnalyzerBeansConfigurationImpl;
import org.eobjects.analyzer.connection.CsvDatastore;
import org.eobjects.analyzer.connection.Datastore;
import org.eobjects.analyzer.connection.DatastoreCatalogImpl;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.MutableInputColumn;
import org.eobjects.analyzer.descriptors.Descriptors;
import org.eobjects.analyzer.descriptors.SimpleDescriptorProvider;
import org.eobjects.analyzer.job.builder.AnalysisJobBuilder;
import org.eobjects.analyzer.job.builder.AnalyzerJobBuilder;
import org.eobjects.analyzer.job.builder.FilterJobBuilder;
import org.eobjects.analyzer.job.builder.TransformerJobBuilder;
import org.eobjects.analyzer.job.jaxb.JobMetadataType;
import org.eobjects.analyzer.test.MockAnalyzer;
import org.eobjects.analyzer.test.TestHelper;
public class JaxbJobWriterTest extends TestCase {
// mock metadata factory used in this test case because we will otherwise
// have time-dependent dates in the metadata which will make it difficult to
// compare results
private JaxbJobMetadataFactory _metadataFactory;
private JaxbJobWriter _writer;
protected void setUp() throws Exception {
_metadataFactory = new JaxbJobMetadataFactoryImpl() {
@Override
protected void buildMainSection(JobMetadataType jobMetadata, AnalysisJob analysisJob) throws Exception {
jobMetadata.setAuthor("John Doe");
jobMetadata.setJobVersion("2.0");
jobMetadata.setCreatedDate(DatatypeFactory.newInstance().newXMLGregorianCalendar(2010, 11, 12, 13, 48,
0, 0, 0));
}
};
_writer = new JaxbJobWriter(new AnalyzerBeansConfigurationImpl(), _metadataFactory);
};
public void testColumnPathWhenColumnNameIsBlank() throws Exception {
final CsvDatastore ds = new CsvDatastore("input", "src/test/resources/csv_with_blank_column_name.txt", null,
';', "UTF8");
final SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider();
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(MockAnalyzer.class));
final DatastoreCatalogImpl datastoreCatalog = new DatastoreCatalogImpl(ds);
final AnalyzerBeansConfiguration conf = new AnalyzerBeansConfigurationImpl().replace(datastoreCatalog).replace(
descriptorProvider);
final AnalysisJob builtJob;
try (final AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(conf)) {
jobBuilder.setDatastore(ds);
final Table table = jobBuilder.getDatastoreConnection().getDataContext().getDefaultSchema().getTable(0);
assertEquals("[foo, bar, baz, ]", Arrays.toString(table.getColumnNames()));
assertEquals(4, table.getColumnCount());
jobBuilder.addSourceColumns(table.getColumns());
final AnalyzerJobBuilder<MockAnalyzer> analyzer = jobBuilder.addAnalyzer(MockAnalyzer.class);
analyzer.addInputColumns(jobBuilder.getSourceColumns());
builtJob = jobBuilder.toAnalysisJob();
}
final ByteArrayOutputStream out = new ByteArrayOutputStream();
_writer.write(builtJob, out);
final byte[] bytes = out.toByteArray();
final String str = new String(bytes);
assertTrue(str,
str.indexOf("<column id=\"col_3\" path=\"csv_with_blank_column_name.txt.\" type=\"STRING\"/>") != -1);
final AnalysisJob readJob = new JaxbJobReader(conf).read(new ByteArrayInputStream(bytes));
List<InputColumn<?>> sourceColumns = readJob.getSourceColumns();
assertEquals("[MetaModelInputColumn[resources.csv_with_blank_column_name.txt.foo], "
+ "MetaModelInputColumn[resources.csv_with_blank_column_name.txt.bar], "
+ "MetaModelInputColumn[resources.csv_with_blank_column_name.txt.baz], "
+ "MetaModelInputColumn[resources.csv_with_blank_column_name.txt.]]", sourceColumns.toString());
}
public void testReadAndWriteAnyComponentRequirementJob() throws Exception {
Datastore ds = TestHelper.createSampleDatabaseDatastore("my database");
SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider();
descriptorProvider.addFilterBeanDescriptor(Descriptors.ofFilter(NullCheckFilter.class));
descriptorProvider.addTransformerBeanDescriptor(Descriptors.ofTransformer(ConcatenatorTransformer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(StringAnalyzer.class));
AnalyzerBeansConfiguration conf = new AnalyzerBeansConfigurationImpl().replace(new DatastoreCatalogImpl(ds))
.replace(descriptorProvider);
JaxbJobReader reader = new JaxbJobReader(conf);
AnalysisJob job;
try (AnalysisJobBuilder jobBuilder = reader.create(new File(
"src/test/resources/example-job-any-component-requirement.xml"))) {
job = jobBuilder.toAnalysisJob();
}
ComponentRequirement requirement = job.getAnalyzerJobs().get(0).getComponentRequirement();
assertEquals("AnyComponentRequirement[]", requirement.toString());
assertMatchesBenchmark(job, "JaxbJobWriterTest-testReadAndWriteAnyComponentRequirementJob.xml");
}
public void testReadAndWriteCompoundComponentRequirementJob() throws Exception {
Datastore ds = TestHelper.createSampleDatabaseDatastore("my database");
SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider();
descriptorProvider.addFilterBeanDescriptor(Descriptors.ofFilter(NullCheckFilter.class));
descriptorProvider.addTransformerBeanDescriptor(Descriptors.ofTransformer(ConcatenatorTransformer.class));
descriptorProvider.addAnalyzerBeanDescriptor(Descriptors.ofAnalyzer(StringAnalyzer.class));
AnalyzerBeansConfiguration conf = new AnalyzerBeansConfigurationImpl().replace(new DatastoreCatalogImpl(ds))
.replace(descriptorProvider);
JaxbJobReader reader = new JaxbJobReader(conf);
AnalysisJob job;
try (AnalysisJobBuilder jobBuilder = reader.create(new File(
"src/test/resources/example-job-compound-component-requirement.xml"))) {
job = jobBuilder.toAnalysisJob();
}
ComponentRequirement requirement = job.getAnalyzerJobs().get(0).getComponentRequirement();
assertEquals("FilterOutcome[category=NOT_NULL] OR FilterOutcome[category=NULL]", requirement.toString());
assertMatchesBenchmark(job, "JaxbJobWriterTest-testReadAndWriteCompoundComponentRequirementJob.xml");
}
@SuppressWarnings("unchecked")
public void testNullColumnProperty() throws Exception {
Datastore ds = TestHelper.createSampleDatabaseDatastore("db");
AnalyzerBeansConfiguration conf = new AnalyzerBeansConfigurationImpl().replace(new DatastoreCatalogImpl(ds));
try (AnalysisJobBuilder ajb = new AnalysisJobBuilder(conf)) {
ajb.setDatastore(ds);
DateGapAnalyzer dga = ajb.addAnalyzer(DateGapAnalyzer.class).getComponentInstance();
Column orderDateColumn = ds.openConnection().getSchemaNavigator()
.convertToColumn("PUBLIC.ORDERS.ORDERDATE");
Column shippedDateColumn = ds.openConnection().getSchemaNavigator()
.convertToColumn("PUBLIC.ORDERS.SHIPPEDDATE");
ajb.addSourceColumns(orderDateColumn, shippedDateColumn);
dga.setFromColumn((InputColumn<Date>) ajb.getSourceColumnByName("ORDERDATE"));
dga.setToColumn((InputColumn<Date>) ajb.getSourceColumnByName("SHIPPEDDATE"));
dga.setSingleDateOverlaps(true);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
_writer.write(ajb.toAnalysisJob(), baos);
String str = new String(baos.toByteArray());
str = str.replaceAll("\"", "_");
String[] lines = str.split("\n");
assertEquals(27, lines.length);
assertEquals("<?xml version=_1.0_ encoding=_UTF-8_ standalone=_yes_?>", lines[0]);
assertEquals("<job xmlns=_http://eobjects.org/analyzerbeans/job/1.0_>", lines[1]);
assertEquals(" <job-metadata>", lines[2]);
assertEquals(" <job-version>2.0</job-version>", lines[3]);
assertEquals(" <author>John Doe</author>", lines[4]);
assertEquals(" <created-date>2010-11-12Z</created-date>", lines[5]);
assertEquals(" </job-metadata>", lines[6]);
assertEquals(" <source>", lines[7]);
assertEquals(" <data-context ref=_db_/>", lines[8]);
assertEquals(" <columns>", lines[9]);
assertEquals(" <column id=_col_0_ path=_ORDERS.ORDERDATE_ type=_TIMESTAMP_/>", lines[10]);
assertEquals(" <column id=_col_1_ path=_ORDERS.SHIPPEDDATE_ type=_TIMESTAMP_/>", lines[11]);
assertEquals(" </columns>", lines[12]);
assertEquals(" </source>", lines[13]);
assertEquals(" <transformation/>", lines[14]);
assertEquals(" <analysis>", lines[15]);
assertEquals(" <analyzer>", lines[16]);
assertEquals(" <descriptor ref=_Date gap analyzer_/>", lines[17]);
assertEquals(" <properties>", lines[18]);
assertEquals(
" <property name=_Count intersecting from and to dates as overlaps_ value=_true_/>",
lines[19]);
assertEquals(" <property name=_Fault tolerant switch from/to dates_ value=_true_/>",
lines[20]);
assertEquals(" </properties>", lines[21]);
assertEquals(" <input ref=_col_0_ name=_From column_/>", lines[22]);
assertEquals(" <input ref=_col_1_ name=_To column_/>", lines[23]);
assertEquals(" </analyzer>", lines[24]);
assertEquals(" </analysis>", lines[25]);
assertEquals("</job>", lines[26]);
}
}
public void testEmptyJobEnvelope() throws Exception {
AnalysisJob job = EasyMock.createMock(AnalysisJob.class);
EasyMock.expect(job.getMetadata()).andReturn(AnalysisJobMetadata.EMPTY_METADATA).anyTimes();
Datastore ds = EasyMock.createMock(Datastore.class);
EasyMock.expect(job.getDatastore()).andReturn(ds);
EasyMock.expect(ds.getName()).andReturn("myds");
EasyMock.expect(job.getSourceColumns()).andReturn(new ArrayList<InputColumn<?>>());
EasyMock.expect(job.getTransformerJobs()).andReturn(new ArrayList<TransformerJob>());
EasyMock.expect(job.getFilterJobs()).andReturn(new ArrayList<FilterJob>());
EasyMock.expect(job.getAnalyzerJobs()).andReturn(new ArrayList<AnalyzerJob>());
EasyMock.replay(job, ds);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
_writer.write(job, baos);
String str = new String(baos.toByteArray());
str = str.replaceAll("\"", "_");
String[] lines = str.split("\n");
assertEquals(14, lines.length);
assertEquals("<?xml version=_1.0_ encoding=_UTF-8_ standalone=_yes_?>", lines[0]);
assertEquals("<job xmlns=_http://eobjects.org/analyzerbeans/job/1.0_>", lines[1]);
assertEquals(" <job-metadata>", lines[2]);
assertEquals(" <job-version>2.0</job-version>", lines[3]);
assertEquals(" <author>John Doe</author>", lines[4]);
assertEquals(" <created-date>2010-11-12Z</created-date>", lines[5]);
assertEquals(" </job-metadata>", lines[6]);
assertEquals(" <source>", lines[7]);
assertEquals(" <data-context ref=_myds_/>", lines[8]);
assertEquals(" <columns/>", lines[9]);
assertEquals(" </source>", lines[10]);
assertEquals(" <transformation/>", lines[11]);
assertEquals(" <analysis/>", lines[12]);
assertEquals("</job>", lines[13]);
EasyMock.verify(job, ds);
}
public void testCompareWithBenchmarkFiles() throws Exception {
Datastore datastore = TestHelper.createSampleDatabaseDatastore("my db");
try (AnalysisJobBuilder ajb = new AnalysisJobBuilder(
new AnalyzerBeansConfigurationImpl().replace(new DatastoreCatalogImpl(datastore)))) {
ajb.setDatastore("my db");
ajb.addSourceColumns("PUBLIC.EMPLOYEES.FIRSTNAME", "PUBLIC.EMPLOYEES.LASTNAME", "PUBLIC.EMPLOYEES.EMAIL");
InputColumn<?> fnCol = ajb.getSourceColumnByName("FIRSTNAME");
InputColumn<?> lnCol = ajb.getSourceColumnByName("LASTNAME");
InputColumn<?> emailCol = ajb.getSourceColumnByName("EMAIL");
AnalyzerJobBuilder<StringAnalyzer> strAnalyzer = ajb.addAnalyzer(StringAnalyzer.class);
strAnalyzer.addInputColumns(fnCol, lnCol);
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file1.xml");
TransformerJobBuilder<EmailStandardizerTransformer> tjb = ajb
.addTransformer(EmailStandardizerTransformer.class);
tjb.addInputColumn(emailCol);
strAnalyzer.addInputColumns(tjb.getOutputColumns());
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file2.xml");
FilterJobBuilder<NullCheckFilter, NullCheckFilter.NullCheckCategory> fjb1 = ajb
.addFilter(NullCheckFilter.class);
fjb1.addInputColumn(fnCol);
strAnalyzer.setRequirement(fjb1, "NOT_NULL");
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file3.xml");
AnalyzerJobBuilder<PatternFinderAnalyzer> patternFinder1 = ajb.addAnalyzer(PatternFinderAnalyzer.class);
makeCrossPlatformCompatible(patternFinder1);
MutableInputColumn<?> usernameColumn = tjb.getOutputColumnByName("Username");
patternFinder1.addInputColumn(fnCol).addInputColumn(usernameColumn).getComponentInstance()
.setEnableMixedTokens(false);
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file4.xml");
FilterJobBuilder<SingleWordFilter, ValidationCategory> fjb2 = ajb.addFilter(SingleWordFilter.class);
fjb2.addInputColumn(usernameColumn);
AnalyzerJobBuilder<PatternFinderAnalyzer> patternFinder2 = ajb.addAnalyzer(PatternFinderAnalyzer.class);
patternFinder2.addInputColumn(tjb.getOutputColumns().get(1));
patternFinder2.setRequirement(fjb2, ValidationCategory.INVALID);
makeCrossPlatformCompatible(patternFinder2);
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file5.xml");
tjb.setName("trans1");
fjb1.setName("fjb1");
fjb2.setName("fjb2");
patternFinder1.setName("pf 1");
patternFinder2.setName("pf 2");
assertMatchesBenchmark(ajb.toAnalysisJob(), "JaxbJobWriterTest-file6.xml");
}
}
/**
* Helper method to make sure that some of the locale-dependent settings of
* the pattern finder are standardized in order to make the test
* cross-platform compatible.
*
* @param pfb
*/
private void makeCrossPlatformCompatible(AnalyzerJobBuilder<PatternFinderAnalyzer> pfb) {
PatternFinderAnalyzer pf = pfb.getComponentInstance();
pf.setDecimalSeparator('.');
pf.setMinusSign('-');
pf.setThousandsSeparator(',');
}
private void assertMatchesBenchmark(AnalysisJob analysisJob, String filename) throws Exception {
final File outputFolder = new File("target/test-output/");
if (!outputFolder.exists()) {
assertTrue("Could not create output folder!", outputFolder.mkdirs());
}
final File benchmarkFolder = new File("src/test/resources/");
File outputFile = new File(outputFolder, filename);
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outputFile))) {
_writer.write(analysisJob, bos);
bos.flush();
}
String output = FileHelper.readFileAsString(outputFile);
File benchmarkFile = new File(benchmarkFolder, filename);
if (!benchmarkFile.exists()) {
assertEquals("No benchmark file '" + filename + "' exists!", output);
}
String benchmark = FileHelper.readFileAsString(benchmarkFile);
assertEquals(benchmark, output);
}
}