/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job.runner;
import java.util.ArrayList;
import java.util.List;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.LazyRef;
import org.apache.metamodel.util.Ref;
import org.datacleaner.components.convert.ConvertToNumberTransformer;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.job.AnalyzerJob;
import org.datacleaner.job.ComponentJob;
final class RowProcessingMetricsImpl implements RowProcessingMetrics {
private final RowProcessingPublishers _publishers;
private final RowProcessingPublisher _publisher;
private final Ref<Integer> _expectedRows;
public RowProcessingMetricsImpl(final RowProcessingPublishers publishers, final RowProcessingPublisher publisher) {
_publishers = publishers;
_publisher = publisher;
_expectedRows = createExpectedRowsRef();
}
@Override
public AnalysisJobMetrics getAnalysisJobMetrics() {
return _publishers.getAnalysisJobMetrics();
}
@Override
public Query getQuery() {
return _publisher.getQuery();
}
@Override
public RowProcessingStream getStream() {
return _publisher.getStream();
}
@Override
public Table getTable() {
return _publisher.getStream().getTable();
}
@Override
public int getExpectedRows() {
final Integer expectedRows = _expectedRows.get();
return expectedRows.intValue();
}
@Override
public ComponentJob[] getResultProducers() {
final List<ComponentJob> resultProducers = new ArrayList<>();
for (final RowProcessingConsumer consumer : _publisher.getConsumers()) {
if (consumer.isResultProducer()) {
resultProducers.add(consumer.getComponentJob());
}
}
return resultProducers.toArray(new ComponentJob[resultProducers.size()]);
}
@Override
public AnalyzerJob[] getAnalyzerJobs() {
final List<AnalyzerJob> analyzerJobs = new ArrayList<>();
for (final RowProcessingConsumer consumer : _publisher.getConsumers()) {
if (consumer instanceof AnalyzerConsumer) {
final AnalyzerJob analyzerJob = ((AnalyzerConsumer) consumer).getComponentJob();
analyzerJobs.add(analyzerJob);
}
}
return analyzerJobs.toArray(new AnalyzerJob[analyzerJobs.size()]);
}
private Ref<Integer> createExpectedRowsRef() {
// TODO: This only seems valid for source tables
return new LazyRef<Integer>() {
@Override
protected Integer fetch() {
int expectedRows = -1;
{
final Query originalQuery = getQuery();
final Query countQuery = originalQuery.clone();
countQuery.setMaxRows(null);
countQuery.setFirstRow(null);
countQuery.getSelectClause().removeItems();
countQuery.getOrderByClause().removeItems();
countQuery.selectCount();
countQuery.getSelectClause().getItem(0).setFunctionApproximationAllowed(true);
final Datastore datastore = _publisher.getStream().getAnalysisJob().getDatastore();
try (DatastoreConnection connection = datastore.openConnection()) {
try (DataSet countDataSet = connection.getDataContext().executeQuery(countQuery)) {
if (countDataSet.next()) {
final Number count =
ConvertToNumberTransformer.transformValue(countDataSet.getRow().getValue(0));
if (count != null) {
expectedRows = count.intValue();
}
}
}
}
final Integer maxRows = originalQuery.getMaxRows();
if (maxRows != null) {
expectedRows = Math.min(expectedRows, maxRows.intValue());
}
}
return expectedRows;
}
};
}
}