/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job.runner;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.apache.metamodel.query.Query;
import org.datacleaner.api.Filter;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.QueryOptimizedFilter;
import org.datacleaner.components.maxrows.MaxRowsFilter;
import org.datacleaner.connection.Datastore;
import org.datacleaner.descriptors.FilterDescriptor;
import org.datacleaner.job.ComponentJob;
import org.datacleaner.job.ComponentRequirement;
import org.datacleaner.job.FilterOutcome;
import org.datacleaner.job.HasComponentRequirement;
import org.datacleaner.job.HasFilterOutcomes;
import org.datacleaner.job.InputColumnSinkJob;
import org.datacleaner.job.InputColumnSourceJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Default {@link RowProcessingQueryOptimizer} implementation
*/
public class RowProcessingQueryOptimizerImpl implements RowProcessingQueryOptimizer {
private static final Logger logger = LoggerFactory.getLogger(RowProcessingQueryOptimizerImpl.class);
private static final Class<?>[] ALWAYS_OPTIMIZABLE = new Class[] { MaxRowsFilter.class };
private final Datastore _datastore;
private final Query _baseQuery;
private final List<RowProcessingConsumer> _consumers;
private final Map<FilterConsumer, FilterOutcome> _optimizedFilters;
public RowProcessingQueryOptimizerImpl(final Datastore datastore, final List<RowProcessingConsumer> consumers,
final Query baseQuery) {
_datastore = datastore;
_consumers = consumers;
_baseQuery = baseQuery;
_optimizedFilters = new HashMap<>();
init();
}
private void init() {
int consumerIndex = 0;
for (final RowProcessingConsumer consumer : _consumers) {
if (consumer instanceof FilterConsumer) {
final FilterConsumer filterConsumer = (FilterConsumer) consumer;
if (!isOptimizable(filterConsumer)) {
logger.debug("Breaking optimization. Not optimizable: {}", filterConsumer);
// if it can be established that the filter is not
// optimizable at all (either because it is not an
// QueryOptimizableFilter or because input is not physical
// columns), then abort.
break;
}
final Collection<FilterOutcome> outcomes = filterConsumer.getComponentJob().getFilterOutcomes();
FilterOutcome optimizableOutcome = null;
for (final FilterOutcome outcome : outcomes) {
final boolean optimizable = isOptimizable(filterConsumer, outcome, consumerIndex);
if (optimizable) {
if (optimizableOutcome != null) {
// cannot have multiple optimizable outcomes for a
// single filter
break;
}
optimizableOutcome = outcome;
}
}
if (optimizableOutcome == null) {
break;
}
_optimizedFilters.put(filterConsumer, optimizableOutcome);
}
consumerIndex++;
}
}
private boolean isOptimizable(final FilterConsumer filterConsumer) {
final FilterDescriptor<?, ?> descriptor = filterConsumer.getComponentJob().getDescriptor();
if (!descriptor.isQueryOptimizable()) {
logger.debug("FilterBeanDescriptor not optimizable: {}", descriptor);
return false;
}
final InputColumn<?>[] input = filterConsumer.getRequiredInput();
for (final InputColumn<?> inputColumn : input) {
if (inputColumn.isVirtualColumn()) {
logger.debug("InputColumn is virtual: {}, so filter is not optimizable: {}", inputColumn,
filterConsumer);
return false;
}
}
return true;
}
private boolean isOptimizable(final FilterConsumer filterConsumer, final FilterOutcome filterOutcome,
final int consumerIndex) {
if (!filterConsumer.isQueryOptimizable(filterOutcome)) {
// the filter is not optimizable
return false;
}
if (!_datastore.getPerformanceCharacteristics().isQueryOptimizationPreferred()) {
// the datastore doesn't prefer query optimization
final Class<?> filterClass = filterConsumer.getComponentJob().getDescriptor().getComponentClass();
if (!ArrayUtils.contains(ALWAYS_OPTIMIZABLE, filterClass)) {
logger.debug("Datastore performance characteristics indicate that query optimization will "
+ "not improve performance for {}, stopping", filterConsumer);
// the filter is not in the "always optimizable" set.
return false;
}
}
final Set<InputColumn<?>> satisfiedColumns = new HashSet<>();
final Set<FilterOutcome> satisfiedRequirements = new HashSet<>();
satisfiedRequirements.add(filterOutcome);
for (int i = consumerIndex + 1; i < _consumers.size(); i++) {
boolean independentComponent = true;
final RowProcessingConsumer nextConsumer = _consumers.get(i);
final ComponentJob componentJob = nextConsumer.getComponentJob();
if (componentJob instanceof HasComponentRequirement) {
final ComponentRequirement componentRequirement = componentJob.getComponentRequirement();
if (componentRequirement != null) {
final Collection<FilterOutcome> requirements = componentRequirement.getProcessingDependencies();
for (final FilterOutcome requirement : requirements) {
if (!satisfiedRequirements.contains(requirement)) {
logger.debug("Requirement {} is not met using query optimization of {}", requirement,
filterConsumer);
return false;
} else {
independentComponent = false;
}
}
}
}
if (componentJob instanceof InputColumnSinkJob) {
final InputColumn<?>[] requiredColumns = ((InputColumnSinkJob) componentJob).getInput();
for (final InputColumn<?> column : requiredColumns) {
if (column.isVirtualColumn()) {
if (!satisfiedColumns.contains(column)) {
logger.debug("InputColumn {} is available at query time, and therefore not satisfied "
+ "for query optimization of {}", column, filterConsumer);
return false;
} else {
independentComponent = false;
}
}
}
}
if (independentComponent) {
// totally independent components prohibit optimization
logger.debug("Component {} is completely independent. Position in chain is not determinable, "
+ "so optimization cannot be done.", filterConsumer);
return false;
}
// this component is accepted now, add it's outcomes to the
// satisfied requirements
if (componentJob instanceof HasFilterOutcomes) {
final Collection<FilterOutcome> outcomes = ((HasFilterOutcomes) componentJob).getFilterOutcomes();
for (final FilterOutcome outcome : outcomes) {
satisfiedRequirements.add(outcome);
}
}
if (componentJob instanceof InputColumnSourceJob) {
final InputColumn<?>[] output = ((InputColumnSourceJob) componentJob).getOutput();
for (final InputColumn<?> column : output) {
satisfiedColumns.add(column);
}
}
}
return true;
}
@Override
public Query getOptimizedQuery() {
Query query = _baseQuery;
final Set<Entry<FilterConsumer, FilterOutcome>> entries = _optimizedFilters.entrySet();
if (!entries.isEmpty()) {
// create a copy/clone of the original query
query = query.clone();
for (final Entry<FilterConsumer, FilterOutcome> entry : entries) {
final FilterConsumer consumer = entry.getKey();
final FilterOutcome outcome = entry.getValue();
final Filter<?> filter = consumer.getComponent();
@SuppressWarnings("rawtypes") final QueryOptimizedFilter queryOptimizedFilter =
(QueryOptimizedFilter) filter;
@SuppressWarnings("unchecked") final Query newQuery =
queryOptimizedFilter.optimizeQuery(query, outcome.getCategory());
query = newQuery;
}
}
return query;
}
@Override
public List<RowProcessingConsumer> getOptimizedConsumers() {
final List<RowProcessingConsumer> result = new ArrayList<>(_consumers);
for (final FilterConsumer filterConsumer : _optimizedFilters.keySet()) {
if (filterConsumer.isRemoveableUponOptimization()) {
result.remove(filterConsumer);
}
}
return result;
}
@Override
public Set<? extends RowProcessingConsumer> getEliminatedConsumers() {
return _optimizedFilters.keySet();
}
@Override
public Collection<? extends FilterOutcome> getOptimizedAvailableOutcomes() {
return _optimizedFilters.values();
}
@Override
public boolean isOptimizable() {
return !_optimizedFilters.isEmpty();
}
}