/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.job.runner; import java.util.ArrayList; import java.util.List; import junit.framework.TestCase; import org.eobjects.analyzer.beans.StringAnalyzer; import org.eobjects.analyzer.beans.api.Analyzer; import org.eobjects.analyzer.beans.api.Filter; import org.eobjects.analyzer.beans.api.Transformer; import org.eobjects.analyzer.beans.filter.MaxRowsFilter; import org.eobjects.analyzer.beans.filter.NullCheckFilter; import org.eobjects.analyzer.beans.filter.NullCheckFilter.NullCheckCategory; import org.eobjects.analyzer.beans.standardize.EmailStandardizerTransformer; import org.eobjects.analyzer.beans.stringpattern.PatternFinderAnalyzer; import org.eobjects.analyzer.configuration.AnalyzerBeansConfiguration; import org.eobjects.analyzer.configuration.AnalyzerBeansConfigurationImpl; import org.eobjects.analyzer.connection.CsvDatastore; import org.eobjects.analyzer.connection.Datastore; import org.eobjects.analyzer.connection.DatastoreCatalogImpl; import org.eobjects.analyzer.connection.DatastoreConnection; import org.eobjects.analyzer.data.InputColumn; import org.eobjects.analyzer.data.MutableInputColumn; import org.eobjects.analyzer.descriptors.AnalyzerBeanDescriptor; import org.eobjects.analyzer.descriptors.FilterBeanDescriptor; import org.eobjects.analyzer.descriptors.TransformerBeanDescriptor; import org.eobjects.analyzer.job.AnalyzerJob; import org.eobjects.analyzer.job.FilterJob; import org.eobjects.analyzer.job.TransformerJob; import org.eobjects.analyzer.job.builder.AnalysisJobBuilder; import org.eobjects.analyzer.job.builder.AnalyzerJobBuilder; import org.eobjects.analyzer.job.builder.FilterJobBuilder; import org.eobjects.analyzer.job.builder.TransformerJobBuilder; import org.eobjects.analyzer.lifecycle.LifeCycleHelper; import org.eobjects.analyzer.test.TestHelper; import org.eobjects.analyzer.util.SourceColumnFinder; import org.apache.metamodel.query.Query; import org.apache.metamodel.schema.Column; public class RowProcessingQueryOptimizerTest extends TestCase { private final LifeCycleHelper lifeCycleHelper = new LifeCycleHelper(null, null, true); private Datastore datastore; private AnalyzerBeansConfiguration conf; private AnalysisJobBuilder ajb; private FilterJobBuilder<MaxRowsFilter, MaxRowsFilter.Category> maxRowsBuilder; private AnalyzerJobBuilder<StringAnalyzer> stringAnalyzerBuilder; private DatastoreConnection con; private Column lastnameColumn; private InputColumn<?> lastNameInputColumn; private ArrayList<RowProcessingConsumer> consumers; private Query baseQuery; private SourceColumnFinder sourceColumnFinder; @Override protected void setUp() throws Exception { super.setUp(); // set up a common fixture with a simple Max rows filter and a String // analyzer on the LASTNAME // column datastore = TestHelper.createSampleDatabaseDatastore("mydb"); conf = new AnalyzerBeansConfigurationImpl().replace(new DatastoreCatalogImpl(datastore)); ajb = new AnalysisJobBuilder(conf); ajb.setDatastore(datastore); maxRowsBuilder = ajb.addFilter(MaxRowsFilter.class); stringAnalyzerBuilder = ajb.addAnalyzer(StringAnalyzer.class); stringAnalyzerBuilder.setRequirement(maxRowsBuilder, MaxRowsFilter.Category.VALID); con = conf.getDatastoreCatalog().getDatastore("mydb").openConnection(); lastnameColumn = con.getSchemaNavigator().convertToColumn("EMPLOYEES.LASTNAME"); ajb.addSourceColumn(lastnameColumn); lastNameInputColumn = ajb.getSourceColumnByName("lastname"); stringAnalyzerBuilder.addInputColumn(lastNameInputColumn); sourceColumnFinder = new SourceColumnFinder(); consumers = new ArrayList<RowProcessingConsumer>(); consumers.add(createConsumer(maxRowsBuilder)); consumers.add(createConsumer(stringAnalyzerBuilder)); baseQuery = con.getDataContext().query().from("EMPLOYEES").select("LASTNAME").toQuery(); } @Override protected void tearDown() throws Exception { super.tearDown(); con.close(); } public void testSimpleOptimization() throws Exception { RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertTrue(optimizer.isOptimizable()); Query optimizedQuery = optimizer.getOptimizedQuery(); Integer maxRows = optimizedQuery.getMaxRows(); assertNotNull("No max rows specified!", maxRows); assertEquals(1000, maxRows.intValue()); } public void testAlwaysOptimizableFilter() throws Exception { Datastore datastore = new CsvDatastore("foo", "src/test/resources/projects.csv"); RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertTrue(optimizer.isOptimizable()); FilterJobBuilder<?, ?> fjb = ajb.addFilter(NullCheckFilter.class).addInputColumn(lastNameInputColumn); maxRowsBuilder.setRequirement(fjb, NullCheckCategory.NOT_NULL); consumers.add(0, createConsumer(fjb)); optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertFalse(optimizer.isOptimizable()); } public void testOptimizedChainedTransformer() throws Exception { TransformerJobBuilder<EmailStandardizerTransformer> emailStdBuilder = ajb .addTransformer(EmailStandardizerTransformer.class); Column emailColumn = con.getSchemaNavigator().convertToColumn("EMPLOYEES.EMAIL"); ajb.addSourceColumn(emailColumn); InputColumn<?> emailInputColumn = ajb.getSourceColumnByName("email"); emailStdBuilder.addInputColumn(emailInputColumn); // reconfigure the string analyzer to depend on transformed columns stringAnalyzerBuilder.clearInputColumns(); List<MutableInputColumn<?>> outputColumns = emailStdBuilder.getOutputColumns(); stringAnalyzerBuilder.addInputColumns(outputColumns); // remove the string analyzer and add the transformer in between consumers.remove(1); consumers.add(createConsumer(emailStdBuilder)); consumers.add(createConsumer(stringAnalyzerBuilder)); RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); // not optimizable because the transformer doesn't have the requirement assertFalse(optimizer.isOptimizable()); consumers.remove(2); consumers.remove(1); emailStdBuilder.setRequirement(maxRowsBuilder, MaxRowsFilter.Category.VALID); consumers.add(createConsumer(emailStdBuilder)); consumers.add(createConsumer(stringAnalyzerBuilder)); optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertTrue(optimizer.isOptimizable()); // even without the requirement, the string analyzer should still be // optimizable, because of it's dependency to the email standardizer stringAnalyzerBuilder.setRequirement(null); consumers.remove(2); consumers.add(createConsumer(stringAnalyzerBuilder)); optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertTrue(optimizer.isOptimizable()); } public void testDontOptimizeWhenComponentsHaveNoRequirements() throws Exception { AnalyzerJobBuilder<PatternFinderAnalyzer> patternFinderBuilder = ajb.addAnalyzer(PatternFinderAnalyzer.class); patternFinderBuilder.addInputColumn(lastNameInputColumn); consumers.add(createConsumer(patternFinderBuilder)); RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertFalse(optimizer.isOptimizable()); } public void testMultipleOptimizations() throws Exception { FilterJobBuilder<NullCheckFilter, NullCheckFilter.NullCheckCategory> notNullBuilder = ajb .addFilter(NullCheckFilter.class); Column emailColumn = con.getSchemaNavigator().convertToColumn("EMPLOYEES.EMAIL"); ajb.addSourceColumn(emailColumn); InputColumn<?> emailInputColumn = ajb.getSourceColumnByName("email"); notNullBuilder.addInputColumn(emailInputColumn); notNullBuilder.setRequirement(maxRowsBuilder, MaxRowsFilter.Category.VALID); stringAnalyzerBuilder.setRequirement(notNullBuilder, NullCheckCategory.NOT_NULL); consumers.remove(1); consumers.add(createConsumer(notNullBuilder)); consumers.add(createConsumer(stringAnalyzerBuilder)); RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertTrue(optimizer.isOptimizable()); List<RowProcessingConsumer> optimizedConsumers = optimizer.getOptimizedConsumers(); assertEquals(1, optimizedConsumers.size()); Query q = optimizer.getOptimizedQuery(); assertEquals( "SELECT \"EMPLOYEES\".\"LASTNAME\" FROM PUBLIC.\"EMPLOYEES\" WHERE \"EMPLOYEES\".\"EMAIL\" IS NOT NULL", q.toSql()); assertEquals(1000, q.getMaxRows().intValue()); } public void testMultipleOutcomesUsed() throws Exception { AnalyzerJobBuilder<PatternFinderAnalyzer> patternFinderBuilder = ajb.addAnalyzer(PatternFinderAnalyzer.class); patternFinderBuilder.addInputColumn(lastNameInputColumn); patternFinderBuilder.setRequirement(maxRowsBuilder, MaxRowsFilter.Category.INVALID); consumers.add(createConsumer(patternFinderBuilder)); RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore, consumers, baseQuery); assertFalse(optimizer.isOptimizable()); } private FilterConsumer createConsumer(FilterJobBuilder<?, ?> filterJobBuilder) { FilterJob filterJob = filterJobBuilder.toFilterJob(); FilterBeanDescriptor<?, ?> descriptor = filterJob.getDescriptor(); Filter<?> filter = descriptor.newInstance(); lifeCycleHelper.assignConfiguredProperties(descriptor, filter, filterJob.getConfiguration()); FilterConsumer consumer = new FilterConsumer(filter, filterJob, filterJobBuilder.getInput(), sourceColumnFinder); return consumer; } private TransformerConsumer createConsumer(TransformerJobBuilder<?> transformerJobBuilder) { TransformerJob transformerJob = transformerJobBuilder.toTransformerJob(); TransformerBeanDescriptor<?> descriptor = transformerJob.getDescriptor(); Transformer<?> transformer = descriptor.newInstance(); lifeCycleHelper.assignConfiguredProperties(descriptor, transformer, transformerJob.getConfiguration()); TransformerConsumer consumer = new TransformerConsumer(transformer, transformerJob, transformerJobBuilder.getInput(), sourceColumnFinder); return consumer; } private AnalyzerConsumer createConsumer(AnalyzerJobBuilder<?> analyzerBuilder) { AnalyzerJob analyzerJob = analyzerBuilder.toAnalyzerJob(); AnalyzerBeanDescriptor<?> descriptor = analyzerJob.getDescriptor(); Analyzer<?> analyzer = descriptor.newInstance(); lifeCycleHelper.assignConfiguredProperties(descriptor, analyzer, analyzerJob.getConfiguration()); AnalyzerConsumer consumer = new AnalyzerConsumer(analyzer, analyzerJob, analyzerBuilder.getInput(), sourceColumnFinder); return consumer; } }