/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.job.runner;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.apache.metamodel.query.Query;
import org.eobjects.analyzer.beans.api.Filter;
import org.eobjects.analyzer.beans.api.QueryOptimizedFilter;
import org.eobjects.analyzer.beans.filter.MaxRowsFilter;
import org.eobjects.analyzer.connection.Datastore;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.descriptors.FilterBeanDescriptor;
import org.eobjects.analyzer.job.ComponentJob;
import org.eobjects.analyzer.job.ComponentRequirement;
import org.eobjects.analyzer.job.FilterOutcome;
import org.eobjects.analyzer.job.HasComponentRequirement;
import org.eobjects.analyzer.job.HasFilterOutcomes;
import org.eobjects.analyzer.job.InputColumnSinkJob;
import org.eobjects.analyzer.job.InputColumnSourceJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Optimizer that will apply possible optimizations coming from
* {@link QueryOptimizedFilter} instances in the job.
*
*
*/
public class RowProcessingQueryOptimizer {
private static final Logger logger = LoggerFactory.getLogger(RowProcessingQueryOptimizer.class);
private static final Class<?>[] ALWAYS_OPTIMIZABLE = new Class[] { MaxRowsFilter.class };
private final Datastore _datastore;
private final Query _baseQuery;
private final List<RowProcessingConsumer> _consumers;
private final Map<FilterConsumer, FilterOutcome> _optimizedFilters;
public RowProcessingQueryOptimizer(Datastore datastore, List<RowProcessingConsumer> consumers, Query baseQuery) {
_datastore = datastore;
_consumers = consumers;
_baseQuery = baseQuery;
_optimizedFilters = new HashMap<FilterConsumer, FilterOutcome>();
init();
}
private void init() {
int consumerIndex = 0;
for (final RowProcessingConsumer consumer : _consumers) {
if (consumer instanceof FilterConsumer) {
final FilterConsumer filterConsumer = (FilterConsumer) consumer;
if (!isOptimizable(filterConsumer)) {
logger.debug("Breaking optimization. Not optimizable: {}", filterConsumer);
// if it can be established that the filter is not
// optimizable at all (either because it is not an
// QueryOptimizableFilter or because input is not physical
// columns), then abort.
break;
}
final Collection<FilterOutcome> outcomes = filterConsumer.getComponentJob().getFilterOutcomes();
FilterOutcome optimizableOutcome = null;
for (final FilterOutcome outcome : outcomes) {
final boolean optimizable = isOptimizable(filterConsumer, outcome, consumerIndex);
if (optimizable) {
if (optimizableOutcome != null) {
// cannot have multiple optimizable outcomes for a
// single filter
break;
}
optimizableOutcome = outcome;
}
}
if (optimizableOutcome == null) {
break;
}
_optimizedFilters.put(filterConsumer, optimizableOutcome);
}
consumerIndex++;
}
}
private boolean isOptimizable(FilterConsumer filterConsumer) {
final FilterBeanDescriptor<?, ?> descriptor = filterConsumer.getComponentJob().getDescriptor();
if (!descriptor.isQueryOptimizable()) {
logger.debug("FilterBeanDescriptor not optimizable: {}", descriptor);
return false;
}
final InputColumn<?>[] input = filterConsumer.getRequiredInput();
for (InputColumn<?> inputColumn : input) {
if (inputColumn.isVirtualColumn()) {
logger.debug("InputColumn is virtual: {}, so filter is not optimizable: {}", inputColumn,
filterConsumer);
return false;
}
}
return true;
}
private boolean isOptimizable(final FilterConsumer filterConsumer, final FilterOutcome filterOutcome,
final int consumerIndex) {
if (!filterConsumer.isQueryOptimizable(filterOutcome)) {
// the filter is not optimizable
return false;
}
if (!_datastore.getPerformanceCharacteristics().isQueryOptimizationPreferred()) {
// the datastore doesn't prefer query optimization
Class<?> filterClass = filterConsumer.getComponentJob().getDescriptor().getComponentClass();
if (!ArrayUtils.contains(ALWAYS_OPTIMIZABLE, filterClass)) {
logger.debug(
"Datastore performance characteristics indicate that query optimization will not improve performance for {}, stopping",
filterConsumer);
// the filter is not in the "always optimizable" set.
return false;
}
}
final Set<InputColumn<?>> satisfiedColumns = new HashSet<InputColumn<?>>();
final Set<FilterOutcome> satisfiedRequirements = new HashSet<FilterOutcome>();
satisfiedRequirements.add(filterOutcome);
for (int i = consumerIndex + 1; i < _consumers.size(); i++) {
boolean independentComponent = true;
final RowProcessingConsumer nextConsumer = _consumers.get(i);
final ComponentJob componentJob = nextConsumer.getComponentJob();
if (componentJob instanceof HasComponentRequirement) {
final ComponentRequirement componentRequirement = componentJob.getComponentRequirement();
if (componentRequirement != null) {
final Collection<FilterOutcome> requirements = componentRequirement.getProcessingDependencies();
for (final FilterOutcome requirement : requirements) {
if (!satisfiedRequirements.contains(requirement)) {
logger.debug("Requirement {} is not met using query optimization of {}", requirement,
filterConsumer);
return false;
} else {
independentComponent = false;
}
}
}
}
if (componentJob instanceof InputColumnSinkJob) {
InputColumn<?>[] requiredColumns = ((InputColumnSinkJob) componentJob).getInput();
for (InputColumn<?> column : requiredColumns) {
if (column.isVirtualColumn()) {
if (!satisfiedColumns.contains(column)) {
logger.debug(
"InputColumn {} is available at query time, and therefore not satisfied for query optimization of {}",
column, filterConsumer);
return false;
} else {
independentComponent = false;
}
}
}
}
if (independentComponent) {
// totally independent components prohibit optimization
logger.debug(
"Component {} is completely independent. Position in chain is not determinable, so optimization cannot be done.",
filterConsumer);
return false;
}
// this component is accepted now, add it's outcomes to the
// satisfied requirements
if (componentJob instanceof HasFilterOutcomes) {
final Collection<FilterOutcome> outcomes = ((HasFilterOutcomes) componentJob).getFilterOutcomes();
for (final FilterOutcome outcome : outcomes) {
satisfiedRequirements.add(outcome);
}
}
if (componentJob instanceof InputColumnSourceJob) {
InputColumn<?>[] output = ((InputColumnSourceJob) componentJob).getOutput();
for (InputColumn<?> column : output) {
satisfiedColumns.add(column);
}
}
}
return true;
}
/**
* Gets the optimized query.
*
* @return
*/
public Query getOptimizedQuery() {
// if (isOptimizable()) {
// return _baseQuery;
// }
// create a copy/clone of the original query
Query q = _baseQuery.clone();
Set<Entry<FilterConsumer, FilterOutcome>> entries = _optimizedFilters.entrySet();
for (Entry<FilterConsumer, FilterOutcome> entry : entries) {
FilterConsumer consumer = entry.getKey();
FilterOutcome outcome = entry.getValue();
Filter<?> filter = consumer.getComponent();
@SuppressWarnings("rawtypes")
QueryOptimizedFilter queryOptimizedFilter = (QueryOptimizedFilter) filter;
@SuppressWarnings("unchecked")
Query newQuery = queryOptimizedFilter.optimizeQuery(q, outcome.getCategory());
q = newQuery;
}
return q;
}
/**
* Gets the optimized list of {@link RowProcessingConsumer}. This list will
* consist of the original consumers, except the eliminated ones (see
* {@link #getEliminatedConsumers()}).
*
* @return
*/
public List<RowProcessingConsumer> getOptimizedConsumers() {
List<RowProcessingConsumer> result = new ArrayList<RowProcessingConsumer>(_consumers);
for (FilterConsumer filterConsumer : _optimizedFilters.keySet()) {
if (filterConsumer.isRemoveableUponOptimization()) {
result.remove(filterConsumer);
}
}
return result;
}
/**
* Gets the {@link RowProcessingConsumer}s that where eliminated while
* optimizing the query.
*
* @return
*/
public Set<? extends RowProcessingConsumer> getEliminatedConsumers() {
final Set<FilterConsumer> consumers = _optimizedFilters.keySet();
return consumers;
}
/**
* Gets the {@link Outcome}s that has been optimized by the query.
*
* @return
*/
public Collection<? extends FilterOutcome> getOptimizedAvailableOutcomes() {
return _optimizedFilters.values();
}
/**
* Determines if the query has been optimized or not.
*
* @return
*/
public boolean isOptimizable() {
return !_optimizedFilters.isEmpty();
}
}