/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.job.runner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.jdbc.JdbcDataContext;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.Func;
import org.apache.metamodel.util.LazyRef;
import org.apache.metamodel.util.Predicate;
import org.eobjects.analyzer.beans.api.Analyzer;
import org.eobjects.analyzer.beans.api.Filter;
import org.eobjects.analyzer.beans.api.Transformer;
import org.eobjects.analyzer.configuration.ContextAwareInjectionManager;
import org.eobjects.analyzer.configuration.InjectionManager;
import org.eobjects.analyzer.connection.Datastore;
import org.eobjects.analyzer.connection.DatastoreConnection;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.MetaModelInputRow;
import org.eobjects.analyzer.descriptors.ComponentDescriptor;
import org.eobjects.analyzer.job.AnalysisJob;
import org.eobjects.analyzer.job.AnalyzerJob;
import org.eobjects.analyzer.job.BeanConfiguration;
import org.eobjects.analyzer.job.ComponentJob;
import org.eobjects.analyzer.job.ConfigurableBeanJob;
import org.eobjects.analyzer.job.FilterJob;
import org.eobjects.analyzer.job.FilterOutcome;
import org.eobjects.analyzer.job.HasFilterOutcomes;
import org.eobjects.analyzer.job.TransformerJob;
import org.eobjects.analyzer.job.concurrent.ForkTaskListener;
import org.eobjects.analyzer.job.concurrent.JoinTaskListener;
import org.eobjects.analyzer.job.concurrent.RunNextTaskTaskListener;
import org.eobjects.analyzer.job.concurrent.TaskListener;
import org.eobjects.analyzer.job.concurrent.TaskRunnable;
import org.eobjects.analyzer.job.concurrent.TaskRunner;
import org.eobjects.analyzer.job.tasks.CloseTaskListener;
import org.eobjects.analyzer.job.tasks.CollectResultsTask;
import org.eobjects.analyzer.job.tasks.ConsumeRowTask;
import org.eobjects.analyzer.job.tasks.InitializeReferenceDataTask;
import org.eobjects.analyzer.job.tasks.InitializeTask;
import org.eobjects.analyzer.job.tasks.RunRowProcessingPublisherTask;
import org.eobjects.analyzer.job.tasks.Task;
import org.eobjects.analyzer.lifecycle.LifeCycleHelper;
import org.eobjects.analyzer.util.SystemProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class RowProcessingPublisher {
private final static Logger logger = LoggerFactory.getLogger(RowProcessingPublisher.class);
private final RowProcessingPublishers _publishers;
private final Table _table;
private final Set<Column> _physicalColumns = new LinkedHashSet<Column>();
private final List<RowProcessingConsumer> _consumers = new ArrayList<RowProcessingConsumer>();
private final LazyRef<RowProcessingQueryOptimizer> _queryOptimizerRef;
private final AtomicBoolean _successful = new AtomicBoolean(true);
public RowProcessingPublisher(RowProcessingPublishers publishers, Table table) {
if (publishers == null) {
throw new IllegalArgumentException("RowProcessingPublishers cannot be null");
}
if (table == null) {
throw new IllegalArgumentException("Table cannot be null");
}
_publishers = publishers;
_table = table;
_queryOptimizerRef = createQueryOptimizerRef();
if (!"true".equalsIgnoreCase(SystemProperties.QUERY_SELECTCLAUSE_OPTIMIZE)) {
final Collection<InputColumn<?>> sourceColumns = publishers.getAnalysisJob().getSourceColumns();
final List<Column> columns = new ArrayList<Column>();
for (InputColumn<?> sourceColumn : sourceColumns) {
Column column = sourceColumn.getPhysicalColumn();
if (column != null && table.equals(column.getTable())) {
columns.add(column);
}
}
addPhysicalColumns(columns.toArray(new Column[columns.size()]));
}
}
/**
* Gets metrics for this row processing session. Note that consumers are
* assumed to be initialized at this point. See
* {@link #initializeConsumers(TaskListener)}.
*
* @return
*/
public RowProcessingMetrics getRowProcessingMetrics() {
RowProcessingMetricsImpl metrics = new RowProcessingMetricsImpl(_publishers, this);
return metrics;
}
public Table getTable() {
return _table;
}
/**
* Inspects the row processed tables primary keys. If all primary keys are
* in the source columns of the AnalysisJob, they will be added to the
* physically queried columns.
*
* Adding the primary keys to the query is a trade-off: It helps a lot in
* making eg. annotated rows referenceable to the source table, but it may
* also potentially make the job heavier to execute since a lot of (unique)
* values will be retrieved.
*/
public void addPrimaryKeysIfSourced() {
Column[] primaryKeyColumns = _table.getPrimaryKeys();
if (primaryKeyColumns == null || primaryKeyColumns.length == 0) {
logger.info("No primary keys defined for table {}, not pre-selecting primary keys", _table.getName());
return;
}
final AnalysisJob analysisJob = _publishers.getAnalysisJob();
final Collection<InputColumn<?>> sourceInputColumns = analysisJob.getSourceColumns();
final List<Column> sourceColumns = CollectionUtils.map(sourceInputColumns, new Func<InputColumn<?>, Column>() {
@Override
public Column eval(InputColumn<?> inputColumn) {
return inputColumn.getPhysicalColumn();
}
});
for (Column primaryKeyColumn : primaryKeyColumns) {
if (!sourceColumns.contains(primaryKeyColumn)) {
logger.info("Primary key column {} not added to source columns, not pre-selecting primary keys");
return;
}
}
addPhysicalColumns(primaryKeyColumns);
}
private LazyRef<RowProcessingQueryOptimizer> createQueryOptimizerRef() {
return new LazyRef<RowProcessingQueryOptimizer>() {
@Override
protected RowProcessingQueryOptimizer fetch() {
final Datastore datastore = _publishers.getDatastore();
try (final DatastoreConnection con = datastore.openConnection()) {
final DataContext dataContext = con.getDataContext();
final Column[] columnArray = _physicalColumns.toArray(new Column[_physicalColumns.size()]);
final Query baseQuery = dataContext.query().from(_table).select(columnArray).toQuery();
logger.debug("Base query for row processing: {}", baseQuery);
final List<RowProcessingConsumer> sortedConsumers = sortConsumers(_consumers);
final RowProcessingQueryOptimizer optimizer = new RowProcessingQueryOptimizer(datastore,
sortedConsumers, baseQuery);
return optimizer;
} catch (RuntimeException e) {
logger.error("Failed to build query optimizer! {}", e.getMessage(), e);
throw e;
}
}
};
}
/**
* Sorts a list of consumers into their execution order
*
* @param consumers
* @return
*/
public static List<RowProcessingConsumer> sortConsumers(List<RowProcessingConsumer> consumers) {
final RowProcessingConsumerSorter sorter = new RowProcessingConsumerSorter(consumers);
final List<RowProcessingConsumer> sortedConsumers = sorter.createProcessOrderedConsumerList();
if (logger.isDebugEnabled()) {
logger.debug("Row processing order ({} consumers):", sortedConsumers.size());
int i = 1;
for (RowProcessingConsumer rowProcessingConsumer : sortedConsumers) {
logger.debug(" {}) {}", i, rowProcessingConsumer);
i++;
}
}
return sortedConsumers;
}
public void initialize() {
// can safely load query optimizer in separate thread here
_queryOptimizerRef.requestLoad();
}
public void addPhysicalColumns(Column... columns) {
for (Column column : columns) {
if (!_table.equals(column.getTable())) {
throw new IllegalArgumentException("Column does not pertain to the correct table. Expected table: "
+ _table + ", actual table: " + column.getTable());
}
_physicalColumns.add(column);
}
}
public RowProcessingQueryOptimizer getQueryOptimizer() {
final RowProcessingQueryOptimizer optimizer = _queryOptimizerRef.get();
if (optimizer == null) {
final Throwable e = _queryOptimizerRef.getError();
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
}
throw new IllegalStateException(e);
}
return optimizer;
}
public Query getQuery() {
return getQueryOptimizer().getOptimizedQuery();
}
/**
* Fires the actual row processing. This method assumes that consumers have
* been initialized and the publisher is ready to start processing.
*
* @return true if no errors occurred during processing
*
* @param rowProcessingMetrics
*
* @see #runRowProcessing(Queue, TaskListener)
*/
public void processRows(RowProcessingMetrics rowProcessingMetrics) {
final RowProcessingQueryOptimizer queryOptimizer = getQueryOptimizer();
final Query finalQuery = queryOptimizer.getOptimizedQuery();
final RowIdGenerator idGenerator;
if (finalQuery.getFirstRow() == null) {
idGenerator = new SimpleRowIdGenerator();
} else {
idGenerator = new SimpleRowIdGenerator(finalQuery.getFirstRow());
}
final AnalysisJob analysisJob = _publishers.getAnalysisJob();
final AnalysisListener analysisListener = _publishers.getAnalysisListener();
final TaskRunner taskRunner = _publishers.getTaskRunner();
for (RowProcessingConsumer rowProcessingConsumer : _consumers) {
if (rowProcessingConsumer instanceof AnalyzerConsumer) {
final AnalyzerConsumer analyzerConsumer = (AnalyzerConsumer) rowProcessingConsumer;
final AnalyzerJob analyzerJob = analyzerConsumer.getComponentJob();
final AnalyzerMetrics metrics = rowProcessingMetrics.getAnalysisJobMetrics().getAnalyzerMetrics(
analyzerJob);
analysisListener.analyzerBegin(analysisJob, analyzerJob, metrics);
}
if (rowProcessingConsumer instanceof TransformerConsumer) {
((TransformerConsumer) rowProcessingConsumer).setRowIdGenerator(idGenerator);
}
}
final List<RowProcessingConsumer> consumers = queryOptimizer.getOptimizedConsumers();
final Collection<? extends FilterOutcome> availableOutcomes = queryOptimizer.getOptimizedAvailableOutcomes();
analysisListener.rowProcessingBegin(analysisJob, rowProcessingMetrics);
final RowConsumerTaskListener taskListener = new RowConsumerTaskListener(analysisJob, analysisListener,
taskRunner);
final Datastore datastore = _publishers.getDatastore();
try (final DatastoreConnection con = datastore.openConnection()) {
final DataContext dataContext = con.getDataContext();
if (logger.isDebugEnabled()) {
final String queryString;
if (dataContext instanceof JdbcDataContext) {
final JdbcDataContext jdbcDataContext = (JdbcDataContext) dataContext;
queryString = jdbcDataContext.getQueryRewriter().rewriteQuery(finalQuery);
} else {
queryString = finalQuery.toSql();
}
logger.debug("Final query: {}", queryString);
logger.debug("Final query firstRow={}, maxRows={}", finalQuery.getFirstRow(), finalQuery.getMaxRows());
}
// represents the distinct count of rows as well as the number of
// tasks to execute
int numTasks = 0;
try (final DataSet dataSet = dataContext.executeQuery(finalQuery)) {
final ConsumeRowHandler consumeRowHandler = new ConsumeRowHandler(consumers, availableOutcomes);
while (dataSet.next()) {
if (taskListener.isErrornous()) {
break;
}
numTasks++;
final Row metaModelRow = dataSet.getRow();
final int rowId = idGenerator.nextPhysicalRowId();
final MetaModelInputRow inputRow = new MetaModelInputRow(rowId, metaModelRow);
final ConsumeRowTask task = new ConsumeRowTask(consumeRowHandler, rowProcessingMetrics, inputRow,
analysisListener, numTasks);
taskRunner.run(task, taskListener);
}
}
taskListener.awaitTasks(numTasks);
}
if (taskListener.isErrornous()) {
_successful.set(false);
return;
}
analysisListener.rowProcessingSuccess(analysisJob, rowProcessingMetrics);
}
public void addAnalyzerBean(Analyzer<?> analyzer, AnalyzerJob analyzerJob, InputColumn<?>[] inputColumns) {
addConsumer(new AnalyzerConsumer(analyzer, analyzerJob, inputColumns, _publishers));
}
public void addTransformerBean(Transformer<?> transformer, TransformerJob transformerJob,
InputColumn<?>[] inputColumns) {
addConsumer(new TransformerConsumer(transformer, transformerJob, inputColumns, _publishers));
}
public void addFilterBean(final Filter<?> filter, final FilterJob filterJob, final InputColumn<?>[] inputColumns) {
addConsumer(new FilterConsumer(filter, filterJob, inputColumns, _publishers));
}
public boolean containsOutcome(final FilterOutcome prerequisiteOutcome) {
for (final RowProcessingConsumer consumer : _consumers) {
final ComponentJob componentJob = consumer.getComponentJob();
if (componentJob instanceof HasFilterOutcomes) {
final Collection<FilterOutcome> outcomes = ((HasFilterOutcomes) componentJob).getFilterOutcomes();
for (FilterOutcome outcome : outcomes) {
if (outcome.isEquals(prerequisiteOutcome)) {
return true;
}
}
}
}
return false;
}
private void addConsumer(final RowProcessingConsumer consumer) {
_consumers.add(consumer);
}
public List<RowProcessingConsumer> getConfigurableConsumers() {
final List<RowProcessingConsumer> configurableConsumers = CollectionUtils.filter(_consumers,
new Predicate<RowProcessingConsumer>() {
@Override
public Boolean eval(RowProcessingConsumer input) {
return input.getComponentJob() instanceof ConfigurableBeanJob<?>;
}
});
return configurableConsumers;
}
/**
* Runs the whole row processing logic, start to finish, including
* initialization, process rows, result collection and cleanup/closing
* resources.
*
* @param resultQueue
* a queue on which to append results
* @param finishedTaskListener
* a task listener which will be invoked once the processing is
* done.
*
* @see #processRows(RowProcessingMetrics)
* @see #initializeConsumers(TaskListener)
*/
public void runRowProcessing(Queue<JobAndResult> resultQueue, TaskListener finishedTaskListener) {
final LifeCycleHelper lifeCycleHelper = _publishers.getLifeCycleHelper();
final TaskRunner taskRunner = _publishers.getTaskRunner();
final List<RowProcessingConsumer> configurableConsumers = getConfigurableConsumers();
final int numConsumerTasks = configurableConsumers.size();
// add tasks for closing components
final TaskListener closeTaskListener = new JoinTaskListener(numConsumerTasks, finishedTaskListener);
final List<TaskRunnable> closeTasks = new ArrayList<TaskRunnable>(numConsumerTasks);
for (RowProcessingConsumer consumer : configurableConsumers) {
closeTasks.add(createCloseTask(consumer, closeTaskListener));
}
final TaskListener getResultCompletionListener = new ForkTaskListener("collect results", taskRunner, closeTasks);
// add tasks for collecting results
final TaskListener getResultTaskListener = new JoinTaskListener(numConsumerTasks, getResultCompletionListener);
final List<TaskRunnable> getResultTasks = new ArrayList<TaskRunnable>();
for (RowProcessingConsumer consumer : configurableConsumers) {
final Task collectResultTask = createCollectResultTask(consumer, resultQueue);
if (collectResultTask == null) {
getResultTasks.add(new TaskRunnable(null, getResultTaskListener));
} else {
getResultTasks.add(new TaskRunnable(collectResultTask, getResultTaskListener));
}
}
final TaskListener runCompletionListener = new ForkTaskListener("run row processing", taskRunner,
getResultTasks);
final RowProcessingMetrics rowProcessingMetrics = getRowProcessingMetrics();
final RunRowProcessingPublisherTask runTask = new RunRowProcessingPublisherTask(this, rowProcessingMetrics);
final TaskListener referenceDataInitFinishedListener = new ForkTaskListener("Initialize row consumers",
taskRunner, Arrays.asList(new TaskRunnable(runTask, runCompletionListener)));
final RunNextTaskTaskListener initializeFinishedListener = new RunNextTaskTaskListener(taskRunner,
new InitializeReferenceDataTask(lifeCycleHelper), referenceDataInitFinishedListener);
// kick off the initialization
initializeConsumers(initializeFinishedListener);
}
/**
* Initializes consumers of this {@link RowProcessingPublisher}. Once
* consumers are initialized, row processing can begin, expected rows can be
* calculated and more.
*
* @param finishedListener
*/
public void initializeConsumers(TaskListener finishedListener) {
final List<RowProcessingConsumer> configurableConsumers = getConfigurableConsumers();
final int numConfigurableConsumers = configurableConsumers.size();
final TaskListener initFinishedListener = new JoinTaskListener(numConfigurableConsumers, finishedListener);
final TaskRunner taskRunner = _publishers.getTaskRunner();
for (RowProcessingConsumer consumer : configurableConsumers) {
TaskRunnable task = createInitTask(consumer, initFinishedListener);
taskRunner.run(task);
}
}
/**
* Closes consumers of this {@link RowProcessingPublisher}. Usually this
* will be done automatically when
* {@link #runRowProcessing(Queue, TaskListener)} is invoked.
*/
public void closeConsumers() {
final List<RowProcessingConsumer> configurableConsumers = getConfigurableConsumers();
final TaskRunner taskRunner = _publishers.getTaskRunner();
for (RowProcessingConsumer consumer : configurableConsumers) {
TaskRunnable task = createCloseTask(consumer, null);
taskRunner.run(task);
}
}
private Task createCollectResultTask(RowProcessingConsumer consumer, Queue<JobAndResult> resultQueue) {
if (consumer instanceof TransformerConsumer || consumer instanceof FilterConsumer) {
return null;
} else if (consumer instanceof AnalyzerConsumer) {
final AnalyzerConsumer analyzerConsumer = (AnalyzerConsumer) consumer;
final Analyzer<?> analyzer = analyzerConsumer.getComponent();
final AnalysisJob analysisJob = _publishers.getAnalysisJob();
final AnalysisListener analysisListener = _publishers.getAnalysisListener();
return new CollectResultsTask(analyzer, analysisJob, consumer.getComponentJob(), resultQueue,
analysisListener);
} else {
throw new IllegalStateException("Unknown consumer type: " + consumer);
}
}
private TaskRunnable createCloseTask(RowProcessingConsumer consumer, TaskListener closeTaskListener) {
final LifeCycleHelper lifeCycleHelper = _publishers.getLifeCycleHelper();
final ComponentDescriptor<?> descriptor = consumer.getComponentJob().getDescriptor();
final Object component = consumer.getComponent();
return new TaskRunnable(null, new CloseTaskListener(lifeCycleHelper, descriptor, component, _successful,
closeTaskListener));
}
private TaskRunnable createInitTask(RowProcessingConsumer consumer, TaskListener listener) {
final ComponentJob componentJob = consumer.getComponentJob();
final Object component = consumer.getComponent();
final BeanConfiguration configuration = ((ConfigurableBeanJob<?>) componentJob).getConfiguration();
final ComponentDescriptor<?> descriptor = componentJob.getDescriptor();
// make a component-context specific injection manager
final LifeCycleHelper lifeCycleHelper;
{
final LifeCycleHelper outerLifeCycleHelper = _publishers.getLifeCycleHelper();
final boolean includeNonDistributedTasks = outerLifeCycleHelper.isIncludeNonDistributedTasks();
final AnalysisJob analysisJob = _publishers.getAnalysisJob();
final InjectionManager outerInjectionManager = outerLifeCycleHelper.getInjectionManager();
final ReferenceDataActivationManager referenceDataActivationManager = outerLifeCycleHelper
.getReferenceDataActivationManager();
final ContextAwareInjectionManager injectionManager = new ContextAwareInjectionManager(
outerInjectionManager, analysisJob, componentJob, _publishers.getAnalysisListener());
lifeCycleHelper = new LifeCycleHelper(injectionManager, referenceDataActivationManager,
includeNonDistributedTasks);
}
InitializeTask task = new InitializeTask(lifeCycleHelper, descriptor, component, configuration);
return new TaskRunnable(task, listener);
}
@Override
public String toString() {
return "RowProcessingPublisher[table=" + _table.getQualifiedLabel() + ", consumers=" + _consumers.size() + "]";
}
public AnalyzerJob[] getAnalyzerJobs() {
List<AnalyzerJob> analyzerJobs = new ArrayList<AnalyzerJob>();
for (RowProcessingConsumer consumer : _consumers) {
if (consumer instanceof AnalyzerConsumer) {
AnalyzerJob analyzerJob = ((AnalyzerConsumer) consumer).getComponentJob();
analyzerJobs.add(analyzerJob);
}
}
return analyzerJobs.toArray(new AnalyzerJob[analyzerJobs.size()]);
}
}