/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.job.runner; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Queue; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.metamodel.query.Query; import org.datacleaner.api.HasAnalyzerResult; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.ComponentJob; import org.datacleaner.job.FilterOutcome; import org.datacleaner.job.concurrent.ForkTaskListener; import org.datacleaner.job.concurrent.JoinTaskListener; import org.datacleaner.job.concurrent.TaskListener; import org.datacleaner.job.concurrent.TaskRunnable; import org.datacleaner.job.concurrent.TaskRunner; import org.datacleaner.job.tasks.CloseTaskListener; import org.datacleaner.job.tasks.CollectResultsTask; import org.datacleaner.job.tasks.InitializeTask; import org.datacleaner.job.tasks.Task; import org.datacleaner.lifecycle.LifeCycleHelper; import org.datacleaner.util.SourceColumnFinder; public abstract class AbstractRowProcessingPublisher implements RowProcessingPublisher { private final AtomicBoolean _success; private final RowProcessingPublishers _publishers; private final RowProcessingStream _stream; private final List<RowProcessingConsumer> _consumers; private final SourceColumnFinder _sourceColumnFinder; public AbstractRowProcessingPublisher(final RowProcessingPublishers publishers, final RowProcessingStream stream) { if (publishers == null) { throw new IllegalArgumentException("RowProcessingPublishers cannot be null"); } if (stream == null) { throw new IllegalArgumentException("RowProcessingStream cannot be null"); } _publishers = publishers; _stream = stream; _sourceColumnFinder = new SourceColumnFinder(); _sourceColumnFinder.addSources(stream.getAnalysisJob()); _consumers = new ArrayList<>(); _success = new AtomicBoolean(true); } @Override public ErrorAware getErrorAware() { return _publishers.getErrorAware(); } /** * Gets a {@link RowProcessingQueryOptimizer} instance from the subclass, * used to get the query and any optimized consumer list. * * @return */ protected abstract RowProcessingQueryOptimizer getQueryOptimizer(); /** * Equivalent method to {@link #processRows(RowProcessingMetrics)}, which * returns whether or not the processing went well. * * Subclasses should not be invoking * {@link AnalysisListener#rowProcessingSuccess(AnalysisJob, RowProcessingMetrics)} * . * * @param analysisListener * @param rowProcessingMetrics * @return */ protected abstract boolean processRowsInternal(AnalysisListener analysisListener, RowProcessingMetrics rowProcessingMetrics); @Override public final SourceColumnFinder getSourceColumnFinder() { return _sourceColumnFinder; } @Override public final RowProcessingStream getStream() { return _stream; } @Override public final List<RowProcessingConsumer> getConsumers() { return _consumers; } protected final List<RowProcessingConsumer> getConsumersSorted() { final List<RowProcessingConsumer> consumers = getConsumers(); final RowProcessingConsumerSorter sorter = new RowProcessingConsumerSorter(consumers); return sorter.createProcessOrderedConsumerList(); } @Override public final void registerConsumer(final RowProcessingConsumer consumer) { _consumers.add(consumer); } @Override public final RowProcessingMetrics getRowProcessingMetrics() { return new RowProcessingMetricsImpl(_publishers, this); } @Override public final RowProcessingConsumer getConsumer(final ComponentJob componentJob) { for (final RowProcessingConsumer consumer : _consumers) { if (componentJob.equals(consumer.getComponentJob())) { return consumer; } } return null; } @Override public final RowProcessingPublishers getPublishers() { return _publishers; } @Override public final Query getQuery() { return getQueryOptimizer().getOptimizedQuery(); } @Override public final ConsumeRowHandler createConsumeRowHandler() { final RowProcessingQueryOptimizer queryOptimizer = getQueryOptimizer(); final Query finalQuery = queryOptimizer.getOptimizedQuery(); final RowIdGenerator idGenerator; if (finalQuery.getFirstRow() == null) { idGenerator = new SimpleRowIdGenerator(); } else { idGenerator = new SimpleRowIdGenerator(finalQuery.getFirstRow()); } final RowProcessingPublishers publishers = getPublishers(); final AnalysisListener analysisListener = publishers.getAnalysisListener(); for (final RowProcessingConsumer consumer : getConsumers()) { final ComponentJob componentJob = consumer.getComponentJob(); final ComponentMetrics metrics = new AnalysisJobMetricsImpl(consumer.getAnalysisJob(), publishers).getComponentMetrics(componentJob); analysisListener.componentBegin(getStream().getAnalysisJob(), componentJob, metrics); if (consumer instanceof TransformerConsumer) { ((TransformerConsumer) consumer).setRowIdGenerator(idGenerator); } } final List<RowProcessingConsumer> consumers = queryOptimizer.getOptimizedConsumers(); final Collection<? extends FilterOutcome> availableOutcomes = queryOptimizer.getOptimizedAvailableOutcomes(); return new ConsumeRowHandler(consumers, availableOutcomes); } protected final Task createCollectResultTask(final RowProcessingConsumer consumer, final Queue<JobAndResult> resultQueue) { final Object component = consumer.getComponent(); if (component instanceof HasAnalyzerResult) { final HasAnalyzerResult<?> hasAnalyzerResult = (HasAnalyzerResult<?>) component; final AnalysisListener analysisListener = _publishers.getAnalysisListener(); return new CollectResultsTask(hasAnalyzerResult, _stream.getAnalysisJob(), consumer.getComponentJob(), resultQueue, analysisListener); } return null; } protected final TaskRunnable createCloseTask(final RowProcessingConsumer consumer, final TaskListener closeTaskListener) { final LifeCycleHelper lifeCycleHelper = _publishers.getConsumerSpecificLifeCycleHelper(consumer); final CloseTaskListener taskListener = new CloseTaskListener(lifeCycleHelper, this, consumer, _success, closeTaskListener, _publishers.getAnalysisListener(), _stream.getAnalysisJob()); return new TaskRunnable(null, taskListener); } protected final TaskRunnable createInitTask(final RowProcessingConsumer consumer, final TaskListener listener) { final LifeCycleHelper lifeCycleHelper = _publishers.getConsumerSpecificLifeCycleHelper(consumer); final InitializeTask task = new InitializeTask(lifeCycleHelper, this, consumer); return new TaskRunnable(task, listener); } @Override public final void processRows(final RowProcessingMetrics rowProcessingMetrics) { final AnalysisListener analysisListener = getAnalysisListener(); final boolean success = processRowsInternal(analysisListener, rowProcessingMetrics); if (!success) { _success.set(false); return; } analysisListener.rowProcessingSuccess(getAnalysisJob(), rowProcessingMetrics); } @Override public final boolean runRowProcessing(final Queue<JobAndResult> resultQueue, final TaskListener finishedTaskListener) { if (!isReadyForRowProcessing()) { return false; } final List<TaskRunnable> postProcessingTasks = createPostProcessingTasks(resultQueue, finishedTaskListener); return runRowProcessingInternal(postProcessingTasks); } protected boolean isReadyForRowProcessing() { return true; } protected abstract boolean runRowProcessingInternal(List<TaskRunnable> postProcessingTasks); private List<TaskRunnable> createPostProcessingTasks(final Queue<JobAndResult> resultQueue, final TaskListener finishedTaskListener) { final List<RowProcessingConsumer> configurableConsumers = getConsumers(); final int numConsumers = configurableConsumers.size(); // add tasks for closing components final JoinTaskListener closeTaskListener = new JoinTaskListener(numConsumers, finishedTaskListener); final List<TaskRunnable> closeTasks = new ArrayList<>(); for (final RowProcessingConsumer consumer : configurableConsumers) { closeTasks.add(createCloseTask(consumer, closeTaskListener)); } final TaskListener getResultCompletionListener = new ForkTaskListener("collect results (" + getStream() + ")", getTaskRunner(), closeTasks); // add tasks for collecting results final TaskListener getResultTaskListener = new JoinTaskListener(numConsumers, getResultCompletionListener); final List<TaskRunnable> getResultTasks = new ArrayList<>(); for (final RowProcessingConsumer consumer : configurableConsumers) { final Task collectResultTask = createCollectResultTask(consumer, resultQueue); if (collectResultTask == null) { getResultTasks.add(new TaskRunnable(null, getResultTaskListener)); } else { getResultTasks.add(new TaskRunnable(collectResultTask, getResultTaskListener)); } } return getResultTasks; } @Override public final String toString() { return getClass().getSimpleName() + "[stream=" + getStream() + ", consumers=" + _consumers.size() + "]"; } protected final TaskRunner getTaskRunner() { return getPublishers().getTaskRunner(); } @Override public final AnalysisJob getAnalysisJob() { return getStream().getAnalysisJob(); } @Override public final AnalysisListener getAnalysisListener() { return getPublishers().getAnalysisListener(); } /** * Initializes consumers of this {@link SourceTableRowProcessingPublisher}. * * This method will not initialize consumers containing * {@link MultiStreamComponent}s. Ensure that * {@link #initializeMultiStreamConsumers(Set)} is also invoked. * * Once consumers are initialized, row processing can begin, expected rows * can be calculated and more. * * @param finishedListener */ @Override public final void initializeConsumers(final TaskListener finishedListener) { final TaskRunner taskRunner = getTaskRunner(); final List<RowProcessingConsumer> configurableConsumers = getConsumers(); final int numConfigurableConsumers = configurableConsumers.size(); final JoinTaskListener initFinishedListener = new JoinTaskListener(numConfigurableConsumers, finishedListener); for (final RowProcessingConsumer consumer : configurableConsumers) { final TaskRunnable task = createInitTask(consumer, initFinishedListener); taskRunner.run(task); } } /** * Closes consumers of this {@link SourceTableRowProcessingPublisher}. * Usually this will be done automatically when * {@link #runRowProcessing(Queue, TaskListener)} is invoked. */ @Override public final void closeConsumers() { final TaskRunner taskRunner = getTaskRunner(); final List<RowProcessingConsumer> configurableConsumers = getConsumers(); for (final RowProcessingConsumer consumer : configurableConsumers) { final TaskRunnable task = createCloseTask(consumer, null); taskRunner.run(task); } } }