/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 8, 2011
*/
package com.bigdata.rdf.sparql.ast.eval;
import java.util.Map;
import org.openrdf.model.Literal;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.NV;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.lexicon.ITextIndexer;
import com.bigdata.rdf.lexicon.ITextIndexer.FullTextQuery;
import com.bigdata.rdf.sparql.ast.ConstantNode;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.FunctionNode;
import com.bigdata.rdf.sparql.ast.FunctionRegistry;
import com.bigdata.rdf.sparql.ast.FunctionRegistry.InFactory;
import com.bigdata.rdf.sparql.ast.IValueExpressionNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.ValueExpressionNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.optimizers.AbstractJoinGroupOptimizer;
import com.bigdata.rdf.store.BDS;
import com.bigdata.search.Hiterator;
import com.bigdata.search.IHit;
/**
* Converts a {@link BDS#SEARCH_IN_SEARCH} function call (inside a filter) into
* an IN filter using the full text index to determine the IN set.
*
* Convert:
*
* <pre>
* filter(<BDS.SEARCH_IN_SEARCH>(?o,"foo")) .
* </pre>
*
* To:
*
* <pre>
* filter(?o IN ("foo", "foo bar", "hello foo", ...)) .
* </pre>
*
* This is a way of using the full text index to filter instead of using regex.
*/
public class ASTSearchInSearchOptimizer extends AbstractJoinGroupOptimizer {
// private static final Logger log = Logger.getLogger(ASTSearchInSearchOptimizer.class);
// static private long time = 0;
/**
* Optimize the join group.
*/
protected void optimizeJoinGroup(final AST2BOpContext ctx,
final StaticAnalysis sa, final IBindingSet[] bSets, final JoinGroupNode group) {
// final long start = System.currentTimeMillis();
for (FilterNode node : group.getChildren(FilterNode.class)) {
optimize(ctx, sa, group, node);
}
// time += (System.currentTimeMillis() - start);
// System.err.println(time);
}
/**
* Optimize a single FilterNode. We want to replace a search in search
* function node with an In function node.
*/
protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa,
final JoinGroupNode group, final FilterNode filterNode) {
final IValueExpressionNode veNode = filterNode.getValueExpressionNode();
if (veNode instanceof FunctionNode) {
final FunctionNode funcNode = (FunctionNode) veNode;
if (funcNode.getFunctionURI().equals(BDS.SEARCH_IN_SEARCH)) {
filterNode.setArg(0, convert(ctx, funcNode));
}
}
}
/**
* Perform the conversion from one function (BDS.SEARCH_IN_SEARCH) to
* another (IN).
*/
protected FunctionNode convert(final AST2BOpContext ctx,
final FunctionNode searchInSearch) {
final VarNode var = (VarNode) searchInSearch.get(0);
final Literal search = (Literal) ((ConstantNode) searchInSearch.get(1)).getValue();
final String match;
if (searchInSearch.arity() > 2) {
final Literal l = (Literal) ((ConstantNode) searchInSearch.get(2)).getValue();
match = l.getLabel();
} else {
match = "ANY";
}
final String regex;
if (searchInSearch.arity() > 3) {
final Literal l = (Literal) ((ConstantNode) searchInSearch.get(3)).getValue();
regex = l.getLabel();
} else {
regex = null;
}
final IV[] hits = getHits(ctx, search, match, regex);
final ValueExpressionNode[] args = new ValueExpressionNode[hits.length+1];
args[0] = var;
for (int i = 0; i < hits.length; i++) {
args[i+1] = new ConstantNode(new Constant<IV>(hits[i]));
}
final Map<String, Object> props = NV.asMap(
new NV(InFactory.Annotations.ALLOW_LITERALS, true));
final FunctionNode in = new FunctionNode(FunctionRegistry.IN, props, args);
return in;
}
/**
* Collect the hits for the IN filter.
*/
@SuppressWarnings("unchecked")
protected IV[] getHits(final AST2BOpContext ctx,
final Literal search, final String match, final String regex) {
final ITextIndexer<IHit> textIndex = (ITextIndexer<IHit>)
ctx.getAbstractTripleStore().getLexiconRelation().getSearchEngine();
if (textIndex == null)
throw new UnsupportedOperationException("No free text index?");
String s = search.getLabel();
final boolean prefixMatch;
if (s.indexOf('*') >= 0) {
prefixMatch = true;
s = s.replaceAll("\\*", "");
} else {
prefixMatch = false;
}
final Hiterator<IHit> it = textIndex.search(new FullTextQuery(
s,//
search.getLanguage(),//
prefixMatch,//
regex,
match != null && match.equalsIgnoreCase("ALL"),
match != null && match.equalsIgnoreCase("EXACT")
));
final IV[] hits = new IV[it.size()];
int i = 0;
while (it.hasNext()) {
hits[i++] = (IV) it.next().getDocId();
}
return hits;
}
}