NameCostOptimizer.java example

Explorer
squall-master
/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ch.epfl.data.squall.api.sql.optimizers.name;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

import ch.epfl.data.squall.api.sql.optimizers.Optimizer;
import ch.epfl.data.squall.api.sql.util.ImproperParallelismException;
import ch.epfl.data.squall.api.sql.util.ParserUtil;
import ch.epfl.data.squall.api.sql.visitors.jsql.SQLVisitor;
import ch.epfl.data.squall.components.Component;
import ch.epfl.data.squall.query_plans.QueryBuilder;
import ch.epfl.data.squall.utilities.SystemParameters;

/*
 * For lefty plans, parallelism obtained from cost formula
 */
public class NameCostOptimizer implements Optimizer {
    private static Logger LOG = Logger.getLogger(NameCostOptimizer.class);

    private final SQLVisitor _pq;
    private final Map _map;

    public NameCostOptimizer(Map map) {
	_map = map;
	_pq = ParserUtil.parseQuery(map);
    }

    private void addEquiJoinNotSuboptimal(Component firstComp,
	    Component secondComp, NameCompGen ncg, List<NameCompGen> listNcg) {

	boolean isExc = false;
	try {
	    ncg.generateEquiJoin(firstComp, secondComp);
	} catch (final ImproperParallelismException exc) {
	    final StringBuilder errorMsg = new StringBuilder();
	    errorMsg.append(
		    "This subplan will never generated the optimal query plan, so it's thrown:")
		    .append("\n");
	    errorMsg.append(exc.getMessage()).append("\n");
	    LOG.info(errorMsg.toString());
	    isExc = true;
	}
	if (!isExc)
	    // if this subplan is somewhat suboptimal, it's not added to the
	    // list
	    listNcg.add(ncg);
    }

    /*
     * best is the one with the smallest total nodes used
     */
    private NameCompGen chooseBest(List<NameCompGen> ncgList) {
	if (ncgList.isEmpty()) {
	    final String errorMsg = "No query plans can be efficiently executed with specified parallelisms.\n"
		    + "Try to reduce DIP_TOTAL_SRC_PAR in config file.";
	    LOG.info(errorMsg);
	    System.exit(1);
	}

	final int index = getMinTotalParIndex(ncgList);
	return ncgList.get(index);
    }

    @Override
    public QueryBuilder generate() {
	final int totalSourcePar = SystemParameters.getInt(_map,
		"DIP_TOTAL_SRC_PAR");
	final NameCompGenFactory factory = new NameCompGenFactory(_map,
		_pq.getTan(), totalSourcePar);
	final List<String> sourceNames = factory.getParAssigner()
		.getSortedSourceNames();
	final int numSources = sourceNames.size();
	NameCompGen optimal = null;

	// **************creating single-relation plans********************
	if (numSources == 1) {
	    optimal = factory.create();
	    optimal.generateDataSource(sourceNames.get(0));
	}

	// **************creating 2-way joins********************
	List<NameCompGen> ncgListFirst = new ArrayList<NameCompGen>();
	for (int i = 0; i < numSources; i++) {
	    final String firstCompName = sourceNames.get(i);
	    final List<String> joinedWith = _pq.getJte()
		    .getJoinedWithSingleDir(firstCompName);
	    if (joinedWith != null)
		for (final String secondCompName : joinedWith) {
		    final NameCompGen ncg = factory.create();
		    final Component first = ncg
			    .generateDataSource(firstCompName);
		    final Component second = ncg
			    .generateDataSource(secondCompName);
		    addEquiJoinNotSuboptimal(first, second, ncg, ncgListFirst);
		}
	}
	if (numSources == 2)
	    optimal = chooseBest(ncgListFirst);

	// **************creating multi-way joins********************
	for (int level = 2; level < numSources; level++) {
	    List<NameCompGen> ncgListSecond = new ArrayList<NameCompGen>();
	    for (int i = 0; i < ncgListFirst.size(); i++) {
		final NameCompGen ncg = ncgListFirst.get(i);
		Component firstComp = ncg.getQueryBuilder().getLastComponent();
		final List<String> ancestors = ParserUtil
			.getSourceNameList(firstComp);
		final List<String> joinedWith = _pq.getJte().getJoinedWith(
			ancestors);
		for (final String compName : joinedWith) {
		    NameCompGen newNcg = ncg;
		    if (joinedWith.size() > 1) {
			// doing deepCopy only if there are multiple tables to
			// be joined with
			newNcg = ncg.deepCopy();
			firstComp = newNcg.getQueryBuilder().getLastComponent();
		    }

		    final Component secondComp = newNcg
			    .generateDataSource(compName);
		    addEquiJoinNotSuboptimal(firstComp, secondComp, newNcg,
			    ncgListSecond);
		}
	    }

	    if (level == numSources - 1)
		// last level, chooseOptimal
		optimal = chooseBest(ncgListSecond);
	    else
		// filtering - for NCGs with the same ancestor set, choose the
		// one with the smallest totalParallelism
		ncgListSecond = pruneSubplans(ncgListSecond);

	    ncgListFirst = ncgListSecond;
	}

	ParserUtil.parallelismToMap(optimal, _map);

	return optimal.getQueryBuilder();
    }

    private int getMinTotalPar(List<NameCompGen> ncgList) {
	final int minParIndex = getMinTotalParIndex(ncgList);
	return ParserUtil.getTotalParallelism(ncgList.get(minParIndex));
    }

    private int getMinTotalParIndex(List<NameCompGen> ncgList) {
	int totalPar = ParserUtil.getTotalParallelism(ncgList.get(0));
	int minParIndex = 0;
	for (int i = 1; i < ncgList.size(); i++) {
	    final int currentTotalPar = ParserUtil.getTotalParallelism(ncgList
		    .get(i));
	    if (currentTotalPar < totalPar) {
		minParIndex = i;
		totalPar = currentTotalPar;
	    }
	}
	return minParIndex;
    }

    private List<NameCompGen> pruneSubplans(List<NameCompGen> ncgList) {
	final Map<Set<String>, List<NameCompGen>> collection = new HashMap<Set<String>, List<NameCompGen>>();

	// filling in the collection with the appropriate key-value structure
	for (final NameCompGen ncg : ncgList) {
	    final Set<String> ancestors = ParserUtil.getSourceNameSet(ncg
		    .getQueryBuilder().getLastComponent());
	    ParserUtil.addToCollection(ancestors, ncg, collection);
	}

	final List<NameCompGen> pruned = new ArrayList<NameCompGen>();
	// for each key(which is set of ancestors) choose only the best one
	for (final Map.Entry<Set<String>, List<NameCompGen>> entrySet : collection
		.entrySet()) {
	    final List<NameCompGen> valueList = entrySet.getValue();

	    // all the equivalent plans having minimum totalParallelism are
	    // added
	    // there might be muptiple of them
	    final int minTotalPar = getMinTotalPar(valueList);
	    for (final NameCompGen ncg : valueList) {
		final int totalPar = ParserUtil.getTotalParallelism(ncg);
		if (totalPar == minTotalPar)
		    pruned.add(ncg);
	    }
	}
	return pruned;
    }

}