/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.api.sql.optimizers.name.manual_batching;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import ch.epfl.data.squall.api.sql.optimizers.Optimizer;
import ch.epfl.data.squall.api.sql.optimizers.name.CostParams;
import ch.epfl.data.squall.api.sql.optimizers.name.NameCompGen;
import ch.epfl.data.squall.api.sql.util.ImproperParallelismException;
import ch.epfl.data.squall.api.sql.util.ParserUtil;
import ch.epfl.data.squall.api.sql.visitors.jsql.SQLVisitor;
import ch.epfl.data.squall.components.Component;
import ch.epfl.data.squall.query_plans.QueryBuilder;
import ch.epfl.data.squall.utilities.SystemParameters;
/*
* For left-deep plans, parallelism obtained from cost formula
*/
public class ManualBatchingOptimizer implements Optimizer {
private final SQLVisitor _pq;
private final Map _map;
private static Logger LOG = Logger.getLogger(ManualBatchingOptimizer.class);
public ManualBatchingOptimizer(Map map) {
_map = map;
_pq = ParserUtil.parseQuery(map);
}
private void addEquiJoinNotSuboptimal(Component firstComp,
Component secondComp, NameCompGen ncg, List<NameCompGen> listNcg) {
boolean isExc = false;
try {
ncg.generateEquiJoin(firstComp, secondComp);
} catch (final ImproperParallelismException exc) {
final StringBuilder errorMsg = new StringBuilder();
errorMsg.append(
"This subplan will never generated the optimal query plan, so it's thrown:")
.append("\n");
errorMsg.append(exc.getMessage()).append("\n");
LOG.info(errorMsg.toString());
isExc = true;
}
if (!isExc)
// if this subplan is somewhat suboptimal, it's not added to the
// list
listNcg.add(ncg);
}
/*
* best is the one with the smallest total nodes used
*/
private NameCompGen chooseBest(List<NameCompGen> ncgList) {
if (ncgList.isEmpty()) {
final String errorMsg = "No query plans can be efficiently executed with specified parallelisms.\n"
+ "Try to reduce DIP_TOTAL_SRC_PAR in config file.";
LOG.info(errorMsg);
System.exit(1);
}
final int index = getMinTotalLatencyIndex(ncgList);
return ncgList.get(index);
}
@Override
public QueryBuilder generate() {
final int totalSourcePar = SystemParameters.getInt(_map,
"DIP_TOTAL_SRC_PAR");
final ManualBatchingCompGenFactory factory = new ManualBatchingCompGenFactory(
_map, _pq.getTan(), totalSourcePar);
final List<String> sourceNames = factory.getParAssigner()
.getSortedSourceNames();
final int numSources = sourceNames.size();
NameCompGen optimal = null;
// **************creating single-relation plans********************
if (numSources == 1) {
optimal = factory.create();
optimal.generateDataSource(sourceNames.get(0));
}
// **************creating 2-way joins********************
List<NameCompGen> ncgListFirst = new ArrayList<NameCompGen>();
for (int i = 0; i < numSources; i++) {
final String firstCompName = sourceNames.get(i);
final List<String> joinedWith = _pq.getJte()
.getJoinedWithSingleDir(firstCompName);
if (joinedWith != null)
for (final String secondCompName : joinedWith) {
final NameCompGen ncg = factory.create();
final Component first = ncg
.generateDataSource(firstCompName);
final Component second = ncg
.generateDataSource(secondCompName);
addEquiJoinNotSuboptimal(first, second, ncg, ncgListFirst);
}
}
if (numSources == 2)
optimal = chooseBest(ncgListFirst);
// **************creating multi-way joins********************
for (int level = 2; level < numSources; level++) {
final List<NameCompGen> ncgListSecond = new ArrayList<NameCompGen>();
for (int i = 0; i < ncgListFirst.size(); i++) {
final NameCompGen ncg = ncgListFirst.get(i);
Component firstComp = ncg.getQueryBuilder().getLastComponent();
final List<String> ancestors = ParserUtil
.getSourceNameList(firstComp);
final List<String> joinedWith = _pq.getJte().getJoinedWith(
ancestors);
for (final String compName : joinedWith) {
NameCompGen newNcg = ncg;
if (joinedWith.size() > 1) {
// doing deepCopy only if there are multiple tables to
// be joined with
newNcg = ncg.deepCopy();
firstComp = newNcg.getQueryBuilder().getLastComponent();
}
final Component secondComp = newNcg
.generateDataSource(compName);
addEquiJoinNotSuboptimal(firstComp, secondComp, newNcg,
ncgListSecond);
}
}
if (level == numSources - 1)
// last level, chooseOptimal
optimal = chooseBest(ncgListSecond);
else {
// filtering - for NCGs with the same ancestor set, choose the
// one with the smallest totalParallelism
// ncgListSecond = pruneSubplans(ncgListSecond);
}
ncgListFirst = ncgListSecond;
}
ParserUtil.parallelismToMap(optimal, _map);
ParserUtil.batchesToMap(optimal, _map);
LOG.info("Predicted latency is " + getTotalLatency(optimal));
return optimal.getQueryBuilder();
}
private int getMinTotalLatencyIndex(List<NameCompGen> ncgList) {
double totalLatency = getTotalLatency(ncgList.get(0));
int minLatencyIndex = 0;
for (int i = 1; i < ncgList.size(); i++) {
final double currentTotalLatency = getTotalLatency(ncgList.get(i));
if (currentTotalLatency < totalLatency) {
minLatencyIndex = i;
totalLatency = currentTotalLatency;
}
}
return minLatencyIndex;
}
// TODO: should compare them by parallelism as well, and not only by
// totalLatency
// we could also do some pruning
private double getTotalLatency(NameCompGen ncg) {
final Map<String, CostParams> allParams = ncg.getCompCost();
final Component lastComponent = ncg.getQueryBuilder()
.getLastComponent();
final CostParams lastParams = allParams.get(lastComponent.getName());
return lastParams.getTotalAvgLatency(); // it's computed as query plan
// is built on
}
}