/*
* eXist Open Source Native XML Database
* Copyright (C) 2013 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.xquery.modules.range;
import org.exist.collections.Collection;
import org.exist.dom.persistent.DocumentSet;
import org.exist.dom.persistent.NodeSet;
import org.exist.dom.QName;
import org.exist.indexing.range.RangeIndex;
import org.exist.indexing.range.RangeIndexConfig;
import org.exist.indexing.range.RangeIndexWorker;
import org.exist.storage.IndexSpec;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.*;
import org.exist.xquery.util.Error;
import org.exist.xquery.value.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
public class FieldLookup extends Function implements Optimizable {
private final static SequenceType[] PARAMETER_TYPE = new SequenceType[] {
new FunctionParameterSequenceType("fields", Type.STRING, Cardinality.ONE_OR_MORE,
"The name of the field(s) to search"),
new FunctionParameterSequenceType("keys", Type.ATOMIC, Cardinality.ZERO_OR_MORE,
"The keys to look up for each field.")
};
public final static FunctionSignature[] signatures = {
new FunctionSignature(
new QName("field", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function. Normally this will be used by the query optimizer.",
new SequenceType[] {
new FunctionParameterSequenceType("fields", Type.STRING, Cardinality.ONE_OR_MORE,
"The name of the field(s) to search"),
new FunctionParameterSequenceType("operators", Type.STRING, Cardinality.ONE_OR_MORE,
"The operators to use as strings: eq, lt, gt, contains ..."),
new FunctionParameterSequenceType("keys", Type.ATOMIC, Cardinality.ZERO_OR_MORE,
"The keys to look up for each field.")
},
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-eq", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on equality comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-ne", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on non-equality comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is not equal to the key."),
true
),
new FunctionSignature(
new QName("field-gt", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on greater-than comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-lt", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on less-than comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-le", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on less-than-equal comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-ge", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"General field lookup function based on greater-than-equal comparison. Normally this will be used by the query optimizer.",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-starts-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"Used by optimizer to optimize a starts-with() function call",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-ends-with", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"Used by optimizer to optimize a ends-with() function call",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-contains", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"Used by optimizer to optimize a contains() function call",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value is equal to the key."),
true
),
new FunctionSignature(
new QName("field-matches", RangeIndexModule.NAMESPACE_URI, RangeIndexModule.PREFIX),
"Used by optimizer to optimize a matches() function call",
PARAMETER_TYPE,
new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE,
"all nodes from the field set whose node value matches the regular expression."),
true
)
};
private NodeSet preselectResult = null;
protected Expression fallback = null;
public FieldLookup(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
public void setFallback(Expression expression) {
this.fallback = expression;
}
public void setArguments(List<Expression> arguments) throws XPathException {
steps.clear();
Expression path = arguments.get(0);
path = new DynamicCardinalityCheck(context, Cardinality.ONE_OR_MORE, path,
new Error(Error.FUNC_PARAM_CARDINALITY, "1", mySignature));
steps.add(path);
int j = 1;
if (isCalledAs("field")) {
Expression fields = arguments.get(1);
fields = new DynamicCardinalityCheck(context, Cardinality.ONE_OR_MORE, fields,
new Error(Error.FUNC_PARAM_CARDINALITY, "2", mySignature));
steps.add(fields);
j++;
}
for (int i = j; i < arguments.size(); i++) {
Expression arg = arguments.get(i).simplify();
arg = new DynamicCardinalityCheck(context, Cardinality.ONE_OR_MORE, arg,
new org.exist.xquery.util.Error(org.exist.xquery.util.Error.FUNC_PARAM_CARDINALITY, "1", mySignature));
steps.add(arg);
}
}
@Override
public void analyze(AnalyzeContextInfo contextInfo) throws XPathException {
super.analyze(new AnalyzeContextInfo(contextInfo));
this.contextId = contextInfo.getContextId();
}
@Override
public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XPathException {
if (contextSequence != null && !contextSequence.isPersistentSet())
// in-memory docs won't have an index
return NodeSet.EMPTY_SET;
long start = System.currentTimeMillis();
// the expression can be called multiple times, so we need to clear the previous preselectResult
preselectResult = null;
Sequence fieldSeq = getArgument(0).eval(contextSequence);
RangeIndex.Operator[] operators = null;
int j = 1;
if (isCalledAs("field")) {
Sequence operatorSeq = getArgument(1).eval(contextSequence);
operators = new RangeIndex.Operator[operatorSeq.getItemCount()];
int i = 0;
for (SequenceIterator si = operatorSeq.iterate(); si.hasNext(); i++) {
operators[i] = RangeIndexModule.OPERATOR_MAP.get(si.nextItem().getStringValue());
}
j++;
} else {
RangeIndex.Operator operator = getOperator();
operators = new RangeIndex.Operator[fieldSeq.getItemCount()];
for (int i = 0; i < operators.length; i++) {
operators[i] = operator;
}
}
Sequence[] keys = new Sequence[getArgumentCount() - j];
for (int i = j; i < getArgumentCount(); i++) {
keys[i - j] = Atomize.atomize(getArgument(i).eval(contextSequence));
}
DocumentSet docs = contextSequence.getDocumentSet();
RangeIndexWorker index = (RangeIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(RangeIndex.ID);
try {
preselectResult = index.queryField(getExpressionId(), docs, useContext ? contextSequence.toNodeSet() : null, fieldSeq, keys, operators, NodeSet.DESCENDANT);
} catch (IOException e) {
throw new XPathException(this, "Error while querying full text index: " + e.getMessage(), e);
}
LOG.info("preselect for " + Arrays.toString(keys) + " on " + contextSequence.getItemCount() + "returned " + preselectResult.getItemCount() +
" and took " + (System.currentTimeMillis() - start));
if( context.getProfiler().traceFunctions() ) {
context.getProfiler().traceIndexUsage( context, "new-range", this, PerformanceStats.OPTIMIZED_INDEX, System.currentTimeMillis() - start );
}
//preselectResult.setSelfAsContext(getContextId());
return preselectResult;
}
@Override
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (contextItem != null)
contextSequence = contextItem.toSequence();
if (contextSequence != null && !contextSequence.isPersistentSet())
// in-memory docs won't have an index
if (fallback == null) {
return Sequence.EMPTY_SEQUENCE;
} else {
return fallback.eval(contextSequence, contextItem);
}
NodeSet result;
if (preselectResult == null) {
long start = System.currentTimeMillis();
DocumentSet docs;
if (contextSequence == null)
docs = context.getStaticallyKnownDocuments();
else
docs = contextSequence.getDocumentSet();
NodeSet contextSet = null;
if (contextSequence != null)
contextSet = contextSequence.toNodeSet();
Sequence fields = getArgument(0).eval(contextSequence);
RangeIndex.Operator[] operators = null;
int j = 1;
if (isCalledAs("field")) {
Sequence operatorSeq = getArgument(1).eval(contextSequence);
operators = new RangeIndex.Operator[operatorSeq.getItemCount()];
int i = 0;
for (SequenceIterator si = operatorSeq.iterate(); si.hasNext(); i++) {
operators[i] = RangeIndexModule.OPERATOR_MAP.get(si.nextItem().getStringValue());
}
j++;
} else {
RangeIndex.Operator operator = getOperator();
operators = new RangeIndex.Operator[fields.getItemCount()];
for (int i = 0; i < operators.length; i++) {
operators[i] = operator;
}
}
if (operators.length != fields.getItemCount()) {
throw new XPathException(this, "Number of operators specified must correspond to number of fields queried");
}
Sequence[] keys = new Sequence[getArgumentCount() - j];
SequenceIterator fieldIter = fields.unorderedIterator();
for (int i = j; i < getArgumentCount(); i++) {
keys[i - j] = getArgument(i).eval(contextSequence);
int targetType = Type.ITEM;
if (fieldIter.hasNext()) {
String field = fieldIter.nextItem().getStringValue();
targetType = getType(contextSequence, field);
}
if (targetType != Type.ITEM && !Type.subTypeOf(keys[i -j].getItemType(), targetType)) {
if (keys[i - j].hasMany()) {
final Sequence temp = new ValueSequence(keys[i -j].getItemCount());
for (final SequenceIterator iterator = keys[i - j].unorderedIterator(); iterator.hasNext(); ) {
temp.add(iterator.nextItem().convertTo(targetType));
}
keys[i - j] = temp;
} else {
keys[i - j] = keys[i - j].convertTo(targetType);
}
}
}
if (keys.length < fields.getItemCount()) {
throw new XPathException(this, "Number of keys to look up must correspond to number of fields specified");
}
RangeIndexWorker index = (RangeIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(RangeIndex.ID);
try {
result = index.queryField(getExpressionId(), docs, contextSet, fields, keys, operators, NodeSet.DESCENDANT);
if (contextSet != null) {
if (fallback != null && (fallback.getPrimaryAxis() == Constants.CHILD_AXIS || fallback.getPrimaryAxis() == Constants.ATTRIBUTE_AXIS)) {
result = result.selectParentChild(contextSet, NodeSet.DESCENDANT, getContextId());
} else {
result = result.selectAncestorDescendant(contextSet, NodeSet.DESCENDANT, true, getContextId(), true);
}
}
} catch (IOException e) {
throw new XPathException(this, e.getMessage());
}
if( context.getProfiler().traceFunctions() ) {
context.getProfiler().traceIndexUsage( context, "new-range", this, PerformanceStats.OPTIMIZED_INDEX, System.currentTimeMillis() - start );
}
// LOG.info("eval plain took " + (System.currentTimeMillis() - start));
} else {
result = preselectResult.selectAncestorDescendant(contextSequence.toNodeSet(), NodeSet.DESCENDANT, true, getContextId(), true);
}
return result;
}
private RangeIndex.Operator getOperator() {
final String calledAs = getSignature().getName().getLocalPart();
return RangeIndexModule.OPERATOR_MAP.get(calledAs.substring("field-".length()));
}
public int getType(Sequence contextSequence, String field) {
if (contextSequence == null) {
return Type.ITEM;
}
for (final Iterator<Collection> i = contextSequence.getCollectionIterator(); i.hasNext(); ) {
final Collection collection = i.next();
if (collection.getURI().startsWith(XmldbURI.SYSTEM_COLLECTION_URI)) {
continue;
}
IndexSpec idxConf = collection.getIndexConfiguration(context.getBroker());
if (idxConf != null) {
RangeIndexConfig config = (RangeIndexConfig) idxConf.getCustomIndexSpec(RangeIndex.ID);
if (config != null) {
int type = config.getType(field);
if (type != Type.ITEM) {
return type;
}
}
}
}
return Type.ITEM;
}
@Override
public boolean canOptimize(Sequence contextSequence) {
return true;
}
@Override
public boolean optimizeOnSelf() {
return false;
}
@Override
public boolean optimizeOnChild() {
return true;
}
@Override
public int getOptimizeAxis() {
return Constants.CHILD_AXIS;
}
@Override
public int getDependencies() {
final Expression stringArg = getArgument(0);
if (!Dependency.dependsOn(stringArg, Dependency.CONTEXT_ITEM)) {
return Dependency.CONTEXT_SET;
} else {
return Dependency.CONTEXT_SET + Dependency.CONTEXT_ITEM;
}
}
public int returnsType() {
return Type.NODE;
}
@Override
public void resetState(boolean postOptimization) {
super.resetState(postOptimization);
if (fallback != null) {
fallback.resetState(postOptimization);
}
if (!postOptimization) {
preselectResult = null;
}
}
}