/*
* Copyright 2009-2016 Tilmann Zaeschke. All rights reserved.
*
* This file is part of ZooDB.
*
* ZooDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ZooDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ZooDB. If not, see <http://www.gnu.org/licenses/>.
*
* See the README and COPYING files for further information.
*/
package org.zoodb.internal.query;
import java.util.Comparator;
import java.util.Date;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.zoodb.api.impl.ZooPC;
import org.zoodb.internal.ZooClassDef;
import org.zoodb.internal.ZooFieldDef;
import org.zoodb.internal.query.QueryParser.FNCT_OP;
import org.zoodb.internal.server.index.BitTools;
import org.zoodb.internal.util.DBLogger;
public class QueryOptimizer {
private final ZooClassDef clsDef;
/**
* A lookup map for all characters that indicate a (non-indexable) regex String.
*/
private static final boolean[] REGEX_CHARS = new boolean[256];
static {
char[] regexChars = {'.', '\\', '+', '*', '[', '|', '$', '?'};
for (char c: regexChars) {
REGEX_CHARS[c] = true;
}
}
public QueryOptimizer(ZooClassDef clsDef) {
this.clsDef = clsDef;
}
/**
* Determine index to use.
*
* Policy:
* 1) Check if index are available. If not, do not perform any further query analysis (for now)
* -> Query rewriting may still be able to optimize really stupid queries.
* 2) Create sub-queries
* 3) Analyse sub-queries to determine best index to use. Result may imply that index usage is
* pointless (whole index range required). This could also be if one sub-query does not use
* any index, in which case using an index for the rest slightly increases disk access
* (index read) but reduces CPU needs (only sub-query to process, not whole query).
* 4a) For each sub-query, determine index with smallest range/density.
* 4b) Check for required sorting. Using an according index can be of advantage, even if range
* is larger.
* 5) Merge queries with same index and overlapping ranges
* 6) merge results
*
* @param queryTree
* @return Index to use.
*/
public List<QueryAdvice> determineIndexToUse(QueryTreeNode queryTree) {
List<QueryAdvice> advices = new LinkedList<QueryAdvice>();
List<ZooFieldDef> availableIndices = new LinkedList<ZooFieldDef>();
for (ZooFieldDef f: clsDef.getAllFields()) {
if (f.isIndexed()) {
availableIndices.add(f);
}
}
// step 1
if (availableIndices.isEmpty()) {
//no index usage
advices.add( new QueryAdvice(queryTree) );
return advices;
}
//step 2 - sub-queries
//We split the query tree at every OR into sub queries, such that every sub-query contains
//the full query but only one side of every OR. All ORs are removed.
//-> Optimization: We remove only (and split only at) ORs where at least on branch
// uses an index. TODO
List<QueryTreeNode> subQueries = new LinkedList<QueryTreeNode>();
subQueries.add(queryTree);
queryTree.createSubs(subQueries);
// System.out.println("Query2: " + queryTree.print());
for (QueryTreeNode sq: subQueries) {
optimize(sq);
// System.out.println("Sub-query: " + sq.print());
}
//TODO filter out terms that cannot become true.
//if none is left, return empty set.
IdentityHashMap<ZooFieldDef, Long> minMap = new IdentityHashMap<ZooFieldDef, Long>();
IdentityHashMap<ZooFieldDef, Long> maxMap = new IdentityHashMap<ZooFieldDef, Long>();
for (QueryTreeNode sq: subQueries) {
advices.add(determineIndexToUseSub(sq, minMap, maxMap));
minMap.clear();
maxMap.clear();
}
//TODO merge queries
//E.g.:
// - if none uses an index (or at least one doesn't), return only the full query
// - if ranges overlap, try to merge?
//TODO optimisation: merge queries
//for example the following query returns two identical sub-queries:
//"_int == 123 || _int == 123" --> This is bad and should be avoided.
//check for show-stoppers
//-> in their case, we simply run the un-split query on the full type extent.
for (QueryAdvice qa: advices) {
//assuming that the term is not an empty term (contradicting sub-terms)
if (qa == null) {
//ah, one of them iterates over the whole result set.
advices.clear();
advices.add(qa);
return advices;
}
//TODO instead of fixed values, use min/max of index.
if (qa.getMin() <= Long.MIN_VALUE && qa.getMax() >= Long.MAX_VALUE) {
//ah, one of them iterates over the whole result set.
advices.clear();
advices.add(qa);
return advices;
}
}
//check for overlapping / global min/max
mergeAdvices(advices);
return advices;
}
private static class AdviceComparator implements Comparator<QueryAdvice> {
@Override
public int compare(QueryAdvice o1, QueryAdvice o2) {
if (o1.getMin() < o2.getMin()) {
return -1;
} else if(o1.getMin() > o2.getMin()) {
return 1;
} else {
if (o1.getMax() < o2.getMax()) {
return -1;
} else if(o1.getMax() > o2.getMax()) {
return 1;
} else {
return 0;
}
}
}
}
private void mergeAdvices(List<QueryAdvice> advices) {
//if they overlap, we should merge them to void duplicate loading effort and results.
//if they don't overlap, we don't have to care about either.
//-> assuming they all use the same index...
if (advices.size() < 2) {
//shortcut
return;
}
IdentityHashMap<ZooFieldDef, TreeSet<QueryAdvice>> map =
new IdentityHashMap<ZooFieldDef, TreeSet<QueryAdvice>>();
//sort QAs by index and by minValue
for (QueryAdvice qa: advices) {
TreeSet<QueryAdvice> subList = map.get(qa.getIndex());
if (subList == null) {
subList = new TreeSet<QueryAdvice>(new AdviceComparator());
map.put(qa.getIndex(), subList);
}
subList.add(qa);
}
//merge
boolean merged = false;
for (QueryAdvice qa: advices) {
TreeSet<QueryAdvice> subList = map.get(qa.getIndex());
Iterator<QueryAdvice> iter = subList.iterator();
QueryAdvice prev = iter.next();
while (iter.hasNext()) {
QueryAdvice current = iter.next();
if (prev.getMax() >= current.getMin()) {
prev.setMax(current.getMax());
iter.remove();
merged = true;
} else {
prev = current;
}
}
}
if (merged) {
advices.clear();
for (TreeSet<QueryAdvice> subList: map.values()) {
advices.addAll(subList);
}
}
}
/**
*
* @param queryTree This is a sub-query that does not contain OR operands.
* @param maxMap2
* @param minMap2
* @return QueryAdvise
*/
private QueryAdvice determineIndexToUseSub(QueryTreeNode queryTree,
IdentityHashMap<ZooFieldDef, Long> minMap,
IdentityHashMap<ZooFieldDef, Long> maxMap) {
//TODO determine the Lists directly by assigning ZooFields to term during parsing?
QueryTreeIterator iter = queryTree.termIterator();
while (iter.hasNext()) {
QueryTerm term = iter.next();
if (!term.isRhsFixed() || term.isLhsFunction()) {
//ignore terms with variable rhs and functios on the LHS
//TODO we currently support only indexes on references, not on paths
if (term.isLhsFunction()) {
determineIndexToUseSubForQueryFunctions(minMap, maxMap, term.getLhsFunction());
}
continue;
}
ZooFieldDef f = term.getLhsFieldDef();
if (f == null || !f.isIndexed()) {
//ignore fields that are not index
continue;
}
Long minVal = minMap.get(f);
if (minVal == null) {
//needs initialization
//even if we don;t narrow the values, min/max allow ordered traversal
minMap.put(f, f.getMinValue());
maxMap.put(f, f.getMaxValue());
}
Object termVal = term.getValue(null);
//TODO if(term.isRef())?!?!?!
//TODO implement term.isIndexable() ?!?!?
//TODO swap left/right side of query term such that indexed field is always on the left
// and the constant is on the right.
Long value;
switch (f.getJdoType()) {
case PRIMITIVE:
switch (f.getPrimitiveType()) {
case BOOLEAN:
//pointless..., well pretty much, unless someone uses this to distinguish
//very few 'true' from many 'false' or vice versa.
continue;
case DOUBLE: value = BitTools.toSortableLong(
(termVal instanceof Double ? (double)termVal : (double)(float)termVal));
break;
case FLOAT: value = BitTools.toSortableLong(
(termVal instanceof Float ? (float)termVal : (float)(double)termVal));
break;
case CHAR: value = (long)((Character)termVal).charValue();
case BYTE:
case INT:
case LONG:
case SHORT: value = ((Number)termVal).longValue(); break;
default:
throw new IllegalArgumentException("Type: " + f.getPrimitiveType());
}
break;
case STRING:
value = BitTools.toSortableLong(
termVal == QueryTerm.NULL ? null : (String)termVal);
break;
case REFERENCE:
value = (termVal == QueryTerm.NULL ?
BitTools.NULL : ((ZooPC)termVal).jdoZooGetOid());
break;
case DATE:
value = (termVal == QueryTerm.NULL ? 0 : ((Date)termVal).getTime());
break;
default:
throw new IllegalArgumentException("Type: " + f.getJdoType());
}
switch (term.getOp()) {
case EQ: {
//TODO check range and exit if EQ does not fit in remaining range
minMap.put(f, value);
maxMap.put(f, value);
break;
}
case L:
if (value < maxMap.get(f)) {
maxMap.put(f, value - 1); //TODO does this work with floats?
}
break;
case LE:
if (value < maxMap.get(f)) {
maxMap.put(f, value);
}
break;
case A:
if (value > minMap.get(f)) {
minMap.put(f, value + 1); //TODO does this work with floats?
}
break;
case AE:
if (value > minMap.get(f)) {
minMap.put(f, value);
}
break;
case NE:
case STR_matches:
case STR_contains_NON_JDO:
case STR_endsWith:
//ignore
break;
case STR_startsWith:
setKeysForStringStartsWith((String) term.getValue(null), f, minMap, maxMap);
break;
default:
throw new IllegalArgumentException("Name: " + term.getOp());
}
//TODO take into account not-operators (x>1 && x<10) && !(x>5 && X <6) ??
// -> Hopefully this optimization is marginal and negligible.
//But it may break everything!
}
return createQueryAdvice(minMap, maxMap, queryTree);
}
private void determineIndexToUseSubForQueryFunctions(
IdentityHashMap<ZooFieldDef, Long> minMap,
IdentityHashMap<ZooFieldDef, Long> maxMap,
QueryFunction fn) {
//we can use indexes only for startsWith() and matches()
if (!FNCT_OP.STR_startsWith.equals(fn.op()) && !FNCT_OP.STR_matches.equals(fn.op())) {
return;
}
//we can use index only when operatig on a local field
QueryFunction f0 = fn.getParams()[0];
if (!FNCT_OP.FIELD.equals(f0.op())) {
return;
}
if (f0.getParams()[0].op() != FNCT_OP.THIS) {
//TODO we don't support path queries yet, i.e. the string field must belong to
//the currently evaluated main-object, not to a referenced object.
return;
}
ZooFieldDef f = f0.getFieldDef();
if (f == null || !f.isIndexed()) {
//ignore fields that are not index
return;
}
QueryFunction f1 = fn.getParams()[1];
if (!f1.isConstant()) {
return;
}
Object param1 = f1.evaluate(null, null);
Long minVal = minMap.get(f);
if (minVal == null) {
//needs initialization
//even if we don;t narrow the values, min/max allow ordered traversal
minMap.put(f, f.getMinValue());
maxMap.put(f, f.getMaxValue());
}
switch (fn.op()) {
case STR_matches:
String str = (String) param1;
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (REGEX_CHARS[c]) {
//if we have a regex that does not simply result in full match we
//simply use the leading part for a startsWith() query.
if (i == 0) {
DBLogger.info("Ignoring index on String query because of regex characters.");
}
str = str.substring(0, i);
setKeysForStringStartsWith(str, f, minMap, maxMap);
return;
}
}
long key = BitTools.toSortableLong(str);
if (key > minMap.get(f)) {
minMap.put(f, key);
}
if (key < maxMap.get(f)) {
maxMap.put(f, key);
}
break;
case STR_startsWith:
setKeysForStringStartsWith((String) param1, f, minMap, maxMap);
break;
default: //nothing
}
}
private void setKeysForStringStartsWith(String prefix, ZooFieldDef f,
IdentityHashMap<ZooFieldDef, Long> minMap,
IdentityHashMap<ZooFieldDef, Long> maxMap) {
long keyMin = BitTools.toSortableLongPrefixMinHash(prefix);
long keyMax = BitTools.toSortableLongPrefixMaxHash(prefix);
if (keyMin > minMap.get(f)) {
minMap.put(f, keyMin);
}
if (keyMax < maxMap.get(f)) {
maxMap.put(f, keyMax);
}
}
private QueryAdvice createQueryAdvice(
IdentityHashMap<ZooFieldDef, Long> minMap,
IdentityHashMap<ZooFieldDef, Long> maxMap,
QueryTreeNode queryTree) {
if (minMap.isEmpty()) {
//return default query
return new QueryAdvice(queryTree);
}
//the advised index to use...
// start with first
ZooFieldDef def = minMap.keySet().iterator().next();
QueryAdvice qa = new QueryAdvice(queryTree);
qa.setIndex( def );
qa.setMin( minMap.get(def) );
qa.setMax( maxMap.get(def) );
//only one index left? -> Easy!!!
//TODO well, better not use it if it covers the whole range? Maybe for sorting?
if (minMap.size() == 1) {
qa.setIndex( minMap.keySet().iterator().next() );
return qa;
}
for (Map.Entry<ZooFieldDef, Long> me2: minMap.entrySet()) {
long min2 = me2.getValue();
long max2 = maxMap.get(me2.getKey());
//TODO fix for very large values
if ((max2-min2) < (qa.getMax() - qa.getMin())) {
qa.setIndex( me2.getKey() );
qa.setMin( min2 );
qa.setMax( max2 );
}
}
if (qa.getIndex().isString()) {
//For String we have to extend the range because of the trailing hashcode
qa.setMin(BitTools.getMinPosInPage(qa.getMin()));
qa.setMax(BitTools.getMaxPosInPage(qa.getMax()));
}
// DatabaseLogger.debugPrintln(0, "Using index: " + def.getName());
return qa;
}
private void optimize(QueryTreeNode q) {
stripUnaryNodes(q);
}
private void stripUnaryNodes(QueryTreeNode q) {
while (q.isUnary() && q.n1 != null) {
//this is a unary root node that shouldn't be one
q.op = q.n1.op;
q.n2 = q.n1.n2;
q.t2 = q.n1.t2;
q.t1 = q.n1.t1;
q.n1 = q.n1.n1;
q.relateToChildren();
}
//check unary nodes if they are not root / pull down leaf-unaries
if (q.isUnary() && q.p != null) {
if (q.p.n1 == q) {
q.p.n1 = q.n1;
q.p.t1 = q.t1;
if (q.n1 != null) {
q.n1.p = q.p;
}
} else {
q.p.n2 = q.n1;
q.p.t2 = q.t1;
if (q.n2 != null) {
q.n2.p = q.p;
}
}
}
if (q.n1 != null) {
stripUnaryNodes(q.n1);
}
if (q.n2 != null) {
stripUnaryNodes(q.n2);
}
}
}