package lux.compiler; import java.util.ArrayList; import java.util.Arrays; import lux.index.IndexConfiguration; import lux.query.BooleanPQuery; import lux.query.BooleanPQuery.Clause; import lux.query.MatchAllPQuery; import lux.query.ParseableQuery; import lux.query.SpanBooleanPQuery; import lux.query.SpanMatchAll; import lux.query.SpanNearPQuery; import lux.xml.ValueType; import lux.xpath.AbstractExpression; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.SortField; /** * Wraps a Lucene Query, with advice as to how to process its results as XPath. * For now, simply distinguishes the two cases: whether the results are in fact * supposed to be the results of the original XPath evaluation, or if further * evaluation is needed. */ /* * We could also tell: whether the query will return the correct document set; * it's possible that we may sometimes retrieve documents that don't match. * We're not allowed to miss a document, though. Some evaluators that return the * correct doc set still may need additional evaluation though if the results * are not to be documents. * * We go to great lengths to maintain return type info, yet it is only used in one place * that has any other purpose than maintaining this info: * PathOptimizer.optimizeFunCall checks if the return type is document when determining * whether to replace count() with lux:count() * */ public class XPathQuery { // a Lucene query corresponding to an XQuery expression private final ParseableQuery pquery; // a Lucene query corresponding to the base expression of an XQuery expression: // this ignores any constraints deriving from predicates and is used for combining // path queries private ParseableQuery pathQuery; private ValueType valueType; private final boolean immutable; /** bitmask holding facts proven about the query; generally these facts enable different * optimizations. In the comments, we refer to the "result type" of the query meaning the * result type of the xpath expression that the query was generated from. */ private long facts; /** * A Lucene sort order to be applied to the query. This will have been computed from an XQuery order by expression. */ private SortField[] sortFields; public SortField[] getSortFields() { return sortFields; } public void setSortFields(SortField[] sortFields) { this.sortFields = sortFields; if (sortFields != null) { setFact (IGNORABLE, false); // prevent the fields from getting dropped } } /** * A query is exact iff its xpath expression returns exactly one value per document, and the * generated lucene query returns exactly those documents satisfying the xpath expression. * EXACT <=> MINIMAL and SINGULAR, and we never use EXACT explicitly */ public static final int EXACT=0x00000001; /** * A query is minimal if it returns all, and only, those documents satisfying the xpath expression. * Exact queries are all minimal. */ public static final int MINIMAL=0x00000002; /** * An expression is singular if it returns a single result for every matching document. * An XPathQuery is singular if it was generated from a singular expression, and therefore * its expression returns the same number of results as the lucene query. * */ public static final int SINGULAR=0x00000004; /** * A query is boolean_false if its result type is boolean, and the existence of a single query result indicates a 'false()' value */ public static final int BOOLEAN_FALSE=0x00000010; /** * If a query a is ignorable, then combine(a,b) = b unless b is also ignorable, * in which case combine(a,b) = a|b */ public static final int IGNORABLE=0x00000020; /** queries that match all documents (have no filter) are empty. */ public static final int EMPTY=0x00000040; /** queries whose path query constraint is already incorporated into the main query */ public static final int PATH_COMBINED=0x00000080; public final static XPathQuery MATCH_ALL = new XPathQuery(MatchAllPQuery.getInstance(), MatchAllPQuery.getInstance(), MINIMAL|SINGULAR|EMPTY, ValueType.DOCUMENT, true); private final static XPathQuery PATH_MATCH_ALL = new XPathQuery(SpanMatchAll.getInstance(), SpanMatchAll.getInstance(), MINIMAL|SINGULAR|EMPTY, ValueType.DOCUMENT, true); /** * @param query a Lucene query * @param pathQuery a Lucene query representing a constraint that the context item's path exists * @param resultFacts a bitmask with interesting facts about this query * @param valueType the type of results returned by the xpath expression, as specifically as * @param immutable whether this query may be changed - set true for some internal statics like MATCH_ALL * can be determined. */ protected XPathQuery(ParseableQuery query, ParseableQuery pathQuery, long resultFacts, ValueType valueType, boolean immutable) { this.pquery = query; this.pathQuery = pathQuery; this.facts = resultFacts; setType (valueType); this.immutable = immutable; } protected XPathQuery(ParseableQuery query, long resultFacts, ValueType valueType, boolean immutable) { this (query, null, resultFacts, valueType, immutable); } protected XPathQuery(ParseableQuery query, long resultFacts, ValueType valueType) { this (query, resultFacts, valueType, false); } /** * @param query the query on which the result is based * @param pathQuery * @param resultFacts the facts to use in the new query * @param valueType the result type of the new query * @param indexConfig the indexer configuration; controls which type of match-all query may be returned * @param sortFields the sort fields to record in the query * @return a new query (or an immutable query) based on an existing query with some modifications. */ public static XPathQuery getQuery (ParseableQuery query, ParseableQuery pathQuery, long resultFacts, ValueType valueType, IndexConfiguration indexConfig, SortField[] sortFields) { XPathQuery q; q = new XPathQuery (query, resultFacts, valueType); q.setPathQuery(pathQuery); q.setSortFields(sortFields); return q; } /* decides whether to create a pathQuery or a main query based on the type of the termQuery argument. */ public static XPathQuery getQuery(ParseableQuery termQuery, long facts, ValueType type, IndexConfiguration indexConfig, SortField[] sortFields) { if (termQuery.isSpanCompatible()) { return getQuery (null, termQuery, facts, type, indexConfig, sortFields); } return getQuery (termQuery, null, facts, type, indexConfig, sortFields); } public static XPathQuery getMatchAllQuery (IndexConfiguration indexConfig) { if (indexConfig.isOption(IndexConfiguration.INDEX_PATHS)) { return PATH_MATCH_ALL; } return MATCH_ALL; } /** * @return whether it is known that the query will return the minimal set of * documents containing the required result value. If false, some * documents may be returned that will eventually need to be * discarded if they don't match the xpath. */ public boolean isMinimal() { return (facts & MINIMAL) != 0; } public ValueType getResultType() { return valueType; } /** * Combines this query with another. * @param occur the occurrence specifier for this query * @param precursor the other query * @param precursorOccur the occurrence specifier for the precursor query * @param type the return type of the combined query * @param config the index configuration * @return the combined query */ public XPathQuery combineBooleanQueries(Occur occur, XPathQuery precursor, Occur precursorOccur, ValueType type, IndexConfiguration config) { XPathQuery result = combineIgnorableQueries(occur, precursor); if (result != null) { return result; } long resultFacts = combineQueryFacts (this, precursor); ParseableQuery combined = combineBoolean (this.pquery, occur, precursor.pquery, precursorOccur); SortField[] combinedSorts = combineSortFields(precursor); XPathQuery q = getQuery(combined, null, resultFacts, type, config, combinedSorts); q.setPathQuery(pathQuery); return q; } private XPathQuery combineIgnorableQueries(Occur occur, XPathQuery precursor) { if (occur == Occur.MUST && isFact(IGNORABLE) != precursor.isFact(IGNORABLE)) { if (isFact(IGNORABLE)) { if (isEmpty() && isMinimal()) { return precursor; } else { return precursor.setFact(MINIMAL, false); // we are losing some information by ignoring this query } } else { if (precursor.isEmpty() && precursor.isMinimal()) { return this; } else { return setFact (MINIMAL, false); // we are losing some information by ignoring the precursor query } } } return null; } private SortField[] combineSortFields(XPathQuery precursor) { if (sortFields != null) { if (precursor.sortFields != null) { ArrayList<SortField> combined = new ArrayList<SortField>(Arrays.asList(sortFields)); SortField prevSort = combined.get(combined.size()-1); for (SortField sortField : precursor.sortFields) { if (! sortField.equals(prevSort)) { combined.add(sortField); } } return combined.toArray(new SortField[combined.size()]); } else { return sortFields; } } else if (precursor.sortFields != null) { return precursor.sortFields; } else { return null; } } /** * Combines this query with another, separated by the given distance. Generates Lucene SpanQuerys, and * the constituent queries must be span queries as well. * @param precursor the other query * @param occur the boolean operator used to combine * @param isPredicate whether the enclosing expression is a predicate (or a path expression) * @param type the return type of the combined query * @param distance the distance between the queries * @param config the index configuration * @return the combined query */ public XPathQuery combineSpanQueries(XPathQuery precursor, Occur occur, boolean isPredicate, ValueType type, int distance, IndexConfiguration config) { XPathQuery result = combineIgnorableQueries(occur, precursor); if (result != null) { return result.setType(type); } long resultFacts = combineQueryFacts (this, precursor); ParseableQuery combined, pathCombined; if (this.pathQuery == null) { pathCombined = precursor.pathQuery; if (precursor.isFact(PATH_COMBINED)) { resultFacts |= PATH_COMBINED; } } else if (precursor.pathQuery == null) { pathCombined = this.pathQuery; if (isFact(PATH_COMBINED)) { resultFacts |= PATH_COMBINED; } } else { pathCombined = combineSpans (this.pathQuery, occur, precursor.pathQuery, distance); // we're adding something new to the path query, so it isn't known to be subsumed by the main query resultFacts &= ~PATH_COMBINED; } combined = combineBoolean(pquery, occur, precursor.pquery, occur); if (isPredicate) { // if this is a predicate, use existing pathQuery without combining, and add the combined pathQuery // to the main query combined = combineBoolean (combined, occur, pathCombined, occur); pathCombined = pathQuery; resultFacts |= PATH_COMBINED; // indicate that the path query has been incorporated into the main query, so it is redundant } SortField[] combinedSorts = combineSortFields(precursor); XPathQuery q = new XPathQuery(combined, resultFacts, type); q.setPathQuery(pathCombined); q.setSortFields(combinedSorts); return q; } private static long combineQueryFacts (XPathQuery a, XPathQuery b) { if (b.isEmpty() && b.isMinimal()) { return a.facts; } else if (a.isEmpty() && a.isMinimal()) { return b.facts; } else { return combineFacts(a.facts, b.facts) ; } } private static ParseableQuery combineBoolean (ParseableQuery a, Occur aOccur, ParseableQuery b, Occur bOccur) { if (a == null || a.equals(b)) { return b; } if (b == null) { return a; } if (a.isMatchAll()) { if (bOccur != Occur.MUST_NOT) { return b; } } if (b.isMatchAll()) { if (aOccur != Occur.MUST_NOT) { return a; } } return new BooleanPQuery(new BooleanPQuery.Clause(a, aOccur), new BooleanPQuery.Clause(b, bOccur)); } private static ParseableQuery combineSpans (ParseableQuery a, Occur occur, ParseableQuery b, int distance) { // don't create a span query for //foo; a single term is enough // distance < 0 means no distance could be computed if (a.isMatchAll() && occur != Occur.MUST_NOT && (distance > 90 || distance < 0)) { if (occur == Occur.SHOULD) { return a; } return b; } if (b.isMatchAll()) { if (occur == Occur.SHOULD) { return b; } return a; } if (distance >= 0) { // there is a specific distance (path steps separate by /*/, say) return combineFiniteSpan(a, occur, b, distance); } // distance = -1 if (a.equals(b)) { return a; } return new SpanBooleanPQuery(occur, a, b); } private static ParseableQuery combineFiniteSpan(ParseableQuery a, Occur occur, ParseableQuery b, int distance) { if (occur != Occur.MUST) { throw new IllegalArgumentException ("unsupported boolean combination for span query: " + occur); } assert (! (a instanceof SpanBooleanPQuery && b instanceof SpanBooleanPQuery)); if ((a instanceof SpanBooleanPQuery && ((SpanBooleanPQuery) a).getOccur() == Occur.MUST) || (b instanceof SpanBooleanPQuery && ((SpanBooleanPQuery) b).getOccur() == Occur.MUST)) { return combineBooleanWithSpan(a, b, distance); } if (a == MatchAllPQuery.getInstance()) { return new SpanNearPQuery(distance, true, SpanMatchAll.getInstance(), b); } return new SpanNearPQuery(distance, true, a, b); } private static ParseableQuery combineBooleanWithSpan(ParseableQuery a, ParseableQuery b, int distance) { // ((A NEAR B) AND C) NEAR D => ((A NEAR B) AND (C NEAR D)) // but what about // A NEAR (B AND C) => (A NEAR B) AND (B NEAR C) SpanBooleanPQuery bq = (SpanBooleanPQuery) ((a instanceof SpanBooleanPQuery) ? a : b); Clause[] bclauses = bq.getClauses(); Clause[] clauses = new Clause [bclauses.length]; for (int i = 0; i < clauses.length; i++) { Clause clause = bclauses[i]; ParseableQuery query = clause.getQuery(); if (! query.isSpanCompatible()) { clauses[i] = clause; continue; } if (bq == a) { clauses[i] = new Clause (new SpanNearPQuery (distance, true, query, b), clause.getOccur()); } else { clauses[i] = new Clause (new SpanNearPQuery (distance, true, a, query), clause.getOccur()); } } return new SpanBooleanPQuery (clauses); } private static final long combineFacts (long facts2, long facts3) { return facts2 & facts3; } public boolean isEmpty() { return isFact(EMPTY); } @Override public String toString () { StringBuilder buf = new StringBuilder (); buf.append(pquery == null ? "" : pquery.toString()); if (pathQuery != null) { buf.append('{').append(pathQuery.toString()).append('}'); } return buf.toString(); } public XPathQuery setFact(int fact, boolean t) { XPathQuery query = this; if (immutable) { query = new XPathQuery (this.pquery, facts, valueType); } if (t) { query.facts |= fact; } else { query.facts &= (~fact); } return query; } public final boolean isFact (int fact) { return (facts & fact) == fact; } public long getFacts() { return facts; } public XPathQuery setType(ValueType type) { XPathQuery query; if (type == null) { type = ValueType.VALUE; } if (immutable) { query = new XPathQuery (pquery, facts, type); } else { query = this; } query.valueType = type; query.facts &= ~BOOLEAN_FALSE; if (query.valueType == ValueType.BOOLEAN) { query.facts &= ~SINGULAR; } else if (query.valueType == ValueType.DOCUMENT) { query.facts |= SINGULAR; } return query; // no other type info is stored in facts since it's not needed by search() } public boolean isImmutable() { return immutable; } public AbstractExpression toXmlNode(String defaultField, IndexConfiguration config) { return getFullQuery().toXmlNode(defaultField, config); } public ParseableQuery getBooleanQuery() { return pquery; } /** * A query generated by a predicate expression. Predicates store their base query, * rather than their predicated filter query, as the base for path combinations, and * also set the filter query to add in as an additional filter. * @return the filter query. */ public ParseableQuery getPathQuery() { return pathQuery; } public void setPathQuery(ParseableQuery baseQuery) { this.pathQuery = baseQuery; } public ParseableQuery getFullQuery () { ParseableQuery q = getBooleanQuery(); if (pathQuery != null && !isFact(PATH_COMBINED)) { // add in the path constraint in baseQuery if (q == null || q.isMatchAll()) { q = pathQuery; } else { q = new BooleanPQuery (Occur.MUST, q, pathQuery); } } return q; } } /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */