/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.job; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Collections; import java.util.List; import junit.framework.TestCase; import org.eobjects.analyzer.beans.StringAnalyzerResult; import org.eobjects.analyzer.beans.convert.ConvertToDateTransformer; import org.eobjects.analyzer.beans.dategap.DateGapAnalyzerResult; import org.eobjects.analyzer.beans.dategap.DateGapTextRenderer; import org.eobjects.analyzer.beans.transform.DateMaskMatcherTransformer; import org.eobjects.analyzer.beans.valuedist.ValueDistributionAnalyzerResult; import org.eobjects.analyzer.configuration.AnalyzerBeansConfiguration; import org.eobjects.analyzer.configuration.AnalyzerBeansConfigurationImpl; import org.eobjects.analyzer.configuration.SourceColumnMapping; import org.eobjects.analyzer.connection.CsvDatastore; import org.eobjects.analyzer.connection.Datastore; import org.eobjects.analyzer.connection.DatastoreCatalog; import org.eobjects.analyzer.connection.DatastoreCatalogImpl; import org.eobjects.analyzer.connection.DatastoreConnection; import org.eobjects.analyzer.data.InputColumn; import org.eobjects.analyzer.data.MetaModelInputColumn; import org.eobjects.analyzer.descriptors.ClasspathScanDescriptorProvider; import org.eobjects.analyzer.descriptors.DescriptorProvider; import org.eobjects.analyzer.job.builder.AnalysisJobBuilder; import org.eobjects.analyzer.job.builder.AnalyzerJobBuilder; import org.eobjects.analyzer.job.builder.TransformerJobBuilder; import org.eobjects.analyzer.job.runner.AnalysisResultFuture; import org.eobjects.analyzer.job.runner.AnalysisRunner; import org.eobjects.analyzer.job.runner.AnalysisRunnerImpl; import org.eobjects.analyzer.result.AnalyzerResult; import org.eobjects.analyzer.result.CrosstabResult; import org.eobjects.analyzer.result.renderer.CrosstabTextRenderer; import org.eobjects.analyzer.test.TestHelper; import org.eobjects.analyzer.util.SchemaNavigator; import org.apache.metamodel.util.ToStringComparator; public class JaxbJobReaderTest extends TestCase { private final DescriptorProvider descriptorProvider = new ClasspathScanDescriptorProvider().scanPackage( "org.eobjects.analyzer.beans", true).scanPackage("org.eobjects.analyzer.result", true); private final DatastoreCatalog datastoreCatalog = new DatastoreCatalogImpl( TestHelper.createSampleDatabaseDatastore("my database")); private final AnalyzerBeansConfiguration conf = new AnalyzerBeansConfigurationImpl().replace(datastoreCatalog) .replace(descriptorProvider); public void testReadComponentNames() throws Exception { JobReader<InputStream> reader = new JaxbJobReader(conf); AnalysisJob job = reader .read(new FileInputStream(new File("src/test/resources/example-job-component-names.xml"))); assertEquals(1, job.getAnalyzerJobs().size()); assertEquals("analyzer_1", job.getAnalyzerJobs().iterator().next().getName()); assertEquals(2, job.getFilterJobs().size()); assertEquals("single_word_1", job.getFilterJobs().iterator().next().getName()); assertEquals(1, job.getTransformerJobs().size()); assertEquals("email_std_1", job.getTransformerJobs().iterator().next().getName()); } public void testReadMetadataFull() throws Exception { JobReader<InputStream> reader = new JaxbJobReader(conf); AnalysisJobMetadata metadata = reader.readMetadata(new FileInputStream(new File( "src/test/resources/example-job-metadata.xml"))); assertEquals("Kasper Sørensen", metadata.getAuthor()); assertEquals("my database", metadata.getDatastoreName()); assertEquals("Job metadata", metadata.getJobName()); assertEquals("An example job with complete metadata", metadata.getJobDescription()); assertEquals("1.1", metadata.getJobVersion()); assertEquals("[PUBLIC.PERSONS.FIRSTNAME, PUBLIC.PERSONS.LASTNAME]", metadata.getSourceColumnPaths().toString()); assertEquals("propertyValue", metadata.getProperties().get("propertyName")) ; assertNotNull(metadata.getCreatedDate()); assertNotNull(metadata.getUpdatedDate()); } public void testReadMetadataNone() throws Exception { JobReader<InputStream> reader = new JaxbJobReader(new AnalyzerBeansConfigurationImpl()); AnalysisJobMetadata metadata = reader.readMetadata(new FileInputStream(new File( "src/test/resources/example-job-valid.xml"))); assertNull(metadata.getAuthor()); assertNull(metadata.getJobName()); assertNull(metadata.getJobDescription()); assertNull(metadata.getJobVersion()); assertTrue(metadata.getProperties().isEmpty()); assertEquals("my database", metadata.getDatastoreName()); assertEquals("[PUBLIC.EMPLOYEES.FIRSTNAME, PUBLIC.EMPLOYEES.LASTNAME, PUBLIC.EMPLOYEES.EMAIL]", metadata .getSourceColumnPaths().toString()); assertNull(metadata.getCreatedDate()); assertNull(metadata.getUpdatedDate()); } public void testSimpleFilter() throws Exception { JaxbJobReader reader = new JaxbJobReader(conf); AnalysisJobBuilder jobBuilder = reader.create(new File("src/test/resources/example-job-simple-filter.xml")); assertEquals(1, jobBuilder.getFilterJobBuilders().size()); assertEquals(3, jobBuilder.getAnalyzerJobBuilders().size()); AnalysisJob analysisJob = jobBuilder.toAnalysisJob(); AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(conf).run(analysisJob); List<AnalyzerResult> results = resultFuture.getResults(); assertEquals(3, results.size()); // sort it to make sure test is deterministic Collections.sort(results, ToStringComparator.getComparator()); // the first result is for the unfiltered String analyzer CrosstabResult res3 = (CrosstabResult) results.get(0); assertEquals(1, res3.getCrosstab().where("Column", "FIRSTNAME").where("Measures", "Min words").get()); assertEquals(2, res3.getCrosstab().where("Column", "FIRSTNAME").where("Measures", "Max words").get()); // this result represents the single manager (one unique and no repeated // values) ValueDistributionAnalyzerResult res1 = (ValueDistributionAnalyzerResult) results.get(1); assertEquals("[[<unique>->1]]", res1.getValueCounts().toString()); assertEquals(1, res1.getUniqueCount().intValue()); // this result represents all the employees: Two repeated values and 18 // unique ValueDistributionAnalyzerResult res2 = (ValueDistributionAnalyzerResult) results.get(2); assertEquals(18, res2.getUniqueCount().intValue()); assertEquals("[[<unique>->18], [Gerard->2], [Leslie->2]]", res2.getValueCounts().toString()); } public void testNamedInputs() throws Exception { JaxbJobReader factory = new JaxbJobReader(conf); AnalysisJobBuilder jobBuilder = factory.create(new File("src/test/resources/example-job-named-inputs.xml")); assertEquals(true, jobBuilder.isConfigured()); assertEquals(2, jobBuilder.getTransformerJobBuilders().size()); List<AnalyzerJobBuilder<?>> analyzerJobBuilders = jobBuilder.getAnalyzerJobBuilders(); assertEquals(1, analyzerJobBuilders.size()); AnalyzerJobBuilder<?> analyzerJobBuilder = analyzerJobBuilders.get(0); AnalyzerJob analyzerJob = analyzerJobBuilder.toAnalyzerJob(); BeanConfiguration configuration = analyzerJob.getConfiguration(); InputColumn<?> col1 = (InputColumn<?>) configuration.getProperty(analyzerJob.getDescriptor() .getConfiguredProperty("From column")); assertEquals("date 1", col1.getName()); InputColumn<?> col2 = (InputColumn<?>) configuration.getProperty(analyzerJob.getDescriptor() .getConfiguredProperty("To column")); assertEquals("date 2", col2.getName()); AnalysisJob analysisJob = jobBuilder.toAnalysisJob(); AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(conf).run(analysisJob); List<AnalyzerResult> results = resultFuture.getResults(); assertEquals(1, results.size()); DateGapAnalyzerResult result = (DateGapAnalyzerResult) results.get(0); String[] resultLines = new DateGapTextRenderer().render(result).split("\n"); assertEquals(58, resultLines.length); assertEquals(" - time gap: 2003-01-18 to 2003-01-29", resultLines[0]); assertEquals(" - time gap: 2003-02-09 to 2003-02-11", resultLines[1]); assertEquals(" - time gap: 2003-05-16 to 2003-05-20", resultLines[2]); assertEquals(" - time gap: 2003-07-23 to 2003-07-24", resultLines[3]); assertEquals(" - time gap: 2003-08-21 to 2003-08-25", resultLines[4]); assertEquals(" - time gap: 2003-09-02 to 2003-09-03", resultLines[5]); assertEquals(" - time gap: 2003-11-03 to 2003-11-04", resultLines[6]); assertEquals(" - time gap: 2003-12-17 to 2004-01-02", resultLines[7]); assertEquals(" - time gap: 2004-05-24 to 2004-05-26", resultLines[8]); assertEquals(" - time gap: 2004-09-22 to 2004-09-27", resultLines[9]); assertEquals(" - time gap: 2004-12-24 to 2005-01-05", resultLines[10]); assertEquals(" - time gap: 2005-05-28 to 2005-05-29", resultLines[11]); assertEquals(" - time overlap: 2003-01-09 to 2003-01-18", resultLines[12]); assertEquals(" - time overlap: 2003-01-31 to 2003-02-07", resultLines[13]); assertEquals(" - time overlap: 2005-05-29 to 2005-06-08", resultLines[57]); } public void testInvalidRead() throws Exception { JaxbJobReader factory = new JaxbJobReader(new AnalyzerBeansConfigurationImpl()); try { factory.create(new File("src/test/resources/example-job-invalid.xml")); fail("Exception expected"); } catch (IllegalArgumentException e) { String message = e.getMessage(); assertTrue(message, message.startsWith("javax.xml.bind.UnmarshalException: unexpected element " + "(uri:\"http://eobjects.org/analyzerbeans/job/1.0\", local:\"datacontext\").")); } } public void testMissingDatastore() throws Exception { JaxbJobReader factory = new JaxbJobReader(new AnalyzerBeansConfigurationImpl()); try { factory.create(new File("src/test/resources/example-job-valid.xml")); fail("Exception expected"); } catch (NoSuchDatastoreException e) { assertEquals("No such datastore: my database", e.getMessage()); } } public void testMissingTransformerDescriptor() throws Exception { JaxbJobReader factory = new JaxbJobReader(conf); try { factory.create(new File("src/test/resources/example-job-missing-descriptor.xml")); fail("Exception expected"); } catch (NoSuchComponentException e) { assertEquals("No such Transformer descriptor: tokenizerDescriptor", e.getMessage()); } } public void testValidJob() throws Exception { JaxbJobReader factory = new JaxbJobReader(conf); AnalysisJobBuilder builder = factory.create(new File("src/test/resources/example-job-valid.xml")); assertTrue(builder.isConfigured()); List<MetaModelInputColumn> sourceColumns = builder.getSourceColumns(); assertEquals(3, sourceColumns.size()); assertEquals("MetaModelInputColumn[PUBLIC.EMPLOYEES.FIRSTNAME]", sourceColumns.get(0).toString()); assertEquals("MetaModelInputColumn[PUBLIC.EMPLOYEES.LASTNAME]", sourceColumns.get(1).toString()); assertEquals("MetaModelInputColumn[PUBLIC.EMPLOYEES.EMAIL]", sourceColumns.get(2).toString()); assertEquals(1, builder.getTransformerJobBuilders().size()); assertEquals( "[TransformedInputColumn[id=trans-0001-0002,name=username], TransformedInputColumn[id=trans-0001-0003,name=domain]]", builder.getTransformerJobBuilders().get(0).getOutputColumns().toString()); assertEquals("[TransformedInputColumn[id=trans-0001-0002,name=username], " + "TransformedInputColumn[id=trans-0001-0003,name=domain], " + "MetaModelInputColumn[PUBLIC.EMPLOYEES.FIRSTNAME], " + "MetaModelInputColumn[PUBLIC.EMPLOYEES.LASTNAME]]", Arrays.toString(builder.getAnalyzerJobBuilders().get(0).toAnalyzerJob().getInput())); List<AnalyzerResult> results = new AnalysisRunnerImpl(conf).run(builder.toAnalysisJob()).getResults(); assertEquals(1, results.size()); CrosstabResult crosstabResult = (CrosstabResult) results.get(0); String[] resultLines = crosstabResult.toString(-1).split("\n"); assertEquals(85, resultLines.length); assertEquals("Crosstab:", resultLines[0]); assertEquals("FIRSTNAME,Avg chars: 5.391304347826087", resultLines[1]); assertEquals("FIRSTNAME,Avg white spaces: 0.043478260869565216", resultLines[2]); assertEquals("FIRSTNAME,Blank count: 0", resultLines[3]); assertEquals("FIRSTNAME,Diacritic chars: 0", resultLines[4]); assertEquals("FIRSTNAME,Digit chars: 0", resultLines[5]); } public void testUsingSourceColumnMapping() throws Throwable { Datastore datastore = TestHelper.createSampleDatabaseDatastore("another datastore name"); JobReader<InputStream> reader = new JaxbJobReader(conf); AnalysisJobMetadata metadata = reader.readMetadata(new FileInputStream(new File( "src/test/resources/example-job-valid.xml"))); SourceColumnMapping sourceColumnMapping = new SourceColumnMapping(metadata.getSourceColumnPaths()); assertFalse(sourceColumnMapping.isSatisfied()); assertEquals("[PUBLIC.EMPLOYEES.EMAIL, PUBLIC.EMPLOYEES.FIRSTNAME, PUBLIC.EMPLOYEES.LASTNAME]", sourceColumnMapping.getPaths().toString()); sourceColumnMapping.setDatastore(datastore); DatastoreConnection con = datastore.openConnection(); SchemaNavigator sn = con.getSchemaNavigator(); sourceColumnMapping.setColumn("PUBLIC.EMPLOYEES.EMAIL", sn.convertToColumn("PUBLIC.CUSTOMERS.PHONE")); sourceColumnMapping.setColumn("PUBLIC.EMPLOYEES.FIRSTNAME", sn.convertToColumn("PUBLIC.CUSTOMERS.CONTACTFIRSTNAME")); sourceColumnMapping.setColumn("PUBLIC.EMPLOYEES.LASTNAME", sn.convertToColumn("PUBLIC.CUSTOMERS.CONTACTLASTNAME")); assertEquals("[]", sourceColumnMapping.getUnmappedPaths().toString()); assertTrue(sourceColumnMapping.isSatisfied()); AnalysisJob job = reader.read(new FileInputStream(new File("src/test/resources/example-job-valid.xml")), sourceColumnMapping); assertEquals("another datastore name", job.getDatastore().getName()); assertEquals("[MetaModelInputColumn[PUBLIC.CUSTOMERS.CONTACTFIRSTNAME], " + "MetaModelInputColumn[PUBLIC.CUSTOMERS.CONTACTLASTNAME], " + "MetaModelInputColumn[PUBLIC.CUSTOMERS.PHONE]]", job.getSourceColumns().toString()); AnalysisRunner runner = new AnalysisRunnerImpl(conf); AnalysisResultFuture resultFuture = runner.run(job); if (!resultFuture.isSuccessful()) { throw resultFuture.getErrors().get(0); } AnalyzerResult res = resultFuture.getResults().get(0); assertTrue(res instanceof StringAnalyzerResult); String[] resultLines = new CrosstabTextRenderer().render((CrosstabResult) res).split("\n"); assertEquals( " username domain CONTACTFIRSTNAME CONTACTLASTNAME ", resultLines[0]); assertEquals( "Row count 122 122 122 122 ", resultLines[1]); assertEquals( "Null count 122 122 0 0 ", resultLines[2]); } public void testReadVariables() throws Exception { CsvDatastore datastore = new CsvDatastore("date-datastore", "src/test/resources/example-dates.csv"); AnalyzerBeansConfiguration configuration = new AnalyzerBeansConfigurationImpl().replace( new DatastoreCatalogImpl(datastore)).replace(descriptorProvider); JaxbJobReader reader = new JaxbJobReader(configuration); File file = new File("src/test/resources/example-job-variables.xml"); assertTrue(file.exists()); AnalysisJobBuilder ajb = reader.create(file); List<TransformerJobBuilder<?>> tjbs = ajb.getTransformerJobBuilders(); DateMaskMatcherTransformer dateMaskMatcherTransformer = (DateMaskMatcherTransformer) tjbs.get(0) .getComponentInstance(); assertEquals("[yyyy-MM-dd]", Arrays.toString(dateMaskMatcherTransformer.getDateMasks())); ConvertToDateTransformer convertToDateTransformer = (ConvertToDateTransformer) tjbs.get(1) .getComponentInstance(); assertEquals("[yyyy-MM-dd]", Arrays.toString(convertToDateTransformer.getDateMasks())); assertEquals("2000-01-01", new SimpleDateFormat("yyyy-MM-dd").format(convertToDateTransformer.getNullReplacement())); } }