/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 9, 2011
*/
package com.bigdata.rdf.sparql.ast.eval;
import java.io.Serializable;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import com.bigdata.bop.BOp;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.Var;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.RangeBOp;
import com.bigdata.rdf.internal.impl.literal.XSDNumericIV;
import com.bigdata.rdf.lexicon.ITextIndexer;
import com.bigdata.rdf.lexicon.ITextIndexer.FullTextQuery;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.sparql.ast.ConstantNode;
import com.bigdata.rdf.sparql.ast.GroupNodeBase;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.TermNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions;
import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall;
import com.bigdata.rdf.sparql.ast.service.IServiceOptions;
import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams;
import com.bigdata.rdf.sparql.ast.service.ServiceFactory;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.BD;
import com.bigdata.rdf.store.BDS;
import com.bigdata.search.Hiterator;
import com.bigdata.search.IHit;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.SingleValueChunkedIterator;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.Striterator;
/**
* A factory for a search service. It accepts a group consisting of search magic
* predicates. See {@link BD#SEARCH}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*
* TODO Add support for slicing the join using tools like CUTOFF (to
* limit the #of solutions) and specifying a {@link RangeBOp} (to
* impose the desired key range). CUTOFF and a single-threaded pipeline
* join can guarantee that we report out the top-N subjects in terms of
* the hit relevance. {@link RangeBOp} would work on a cluster. None of
* this is perfectly satisfactory.
*/
public class SearchServiceFactory extends AbstractServiceFactoryBase {
private static final Logger log = Logger
.getLogger(SearchServiceFactory.class);
/*
* Note: This could extend the base class to allow for search service
* configuration options.
*/
private final BigdataNativeServiceOptions serviceOptions;
public SearchServiceFactory() {
serviceOptions = new BigdataNativeServiceOptions();
serviceOptions.setRunFirst(true);
}
@Override
public BigdataNativeServiceOptions getServiceOptions() {
return serviceOptions;
}
public BigdataServiceCall create(final ServiceCallCreateParams params) {
if (params == null)
throw new IllegalArgumentException();
final AbstractTripleStore store = params.getTripleStore();
if (store == null)
throw new IllegalArgumentException();
final ServiceNode serviceNode = params.getServiceNode();
if (serviceNode == null)
throw new IllegalArgumentException();
/*
* Validate the search predicates for a given search variable.
*/
final Map<IVariable<?>, Map<URI, StatementPatternNode>> map = verifyGraphPattern(
store, serviceNode.getGraphPattern());
if (map == null)
throw new RuntimeException("Not a search request.");
if (map.size() != 1)
throw new RuntimeException(
"Multiple search requests may not be combined.");
final Map.Entry<IVariable<?>, Map<URI, StatementPatternNode>> e = map
.entrySet().iterator().next();
final IVariable<?> searchVar = e.getKey();
final Map<URI, StatementPatternNode> statementPatterns = e.getValue();
validateSearch(searchVar, statementPatterns);
/*
* Create and return the ServiceCall object which will execute this
* query.
*/
return new SearchCall(store, searchVar, statementPatterns,
getServiceOptions());
}
/**
* Validate the search request. This looks for search magic predicates and
* returns them all. It is an error if anything else is found in the group.
* All such search patterns are reported back by this method, but the
* service can only be invoked for one a single search variable at a time.
* The caller will detect both the absence of any search and the presence of
* more than one search and throw an exception.
*/
private Map<IVariable<?>, Map<URI, StatementPatternNode>> verifyGraphPattern(
final AbstractTripleStore database,
final GroupNodeBase<IGroupMemberNode> group) {
// lazily allocate iff we find some search predicates in this group.
Map<IVariable<?>, Map<URI, StatementPatternNode>> tmp = null;
final int arity = group.arity();
for (int i = 0; i < arity; i++) {
final BOp child = group.get(i);
if (child instanceof GroupNodeBase<?>) {
throw new RuntimeException("Nested groups are not allowed.");
}
if (child instanceof StatementPatternNode) {
final StatementPatternNode sp = (StatementPatternNode) child;
final TermNode p = sp.p();
if (!p.isConstant())
throw new RuntimeException("Expecting search predicate: "
+ sp);
final URI uri = (URI) ((ConstantNode) p).getValue();
if (!uri.stringValue().startsWith(BDS.NAMESPACE))
throw new RuntimeException("Expecting search predicate: "
+ sp);
/*
* Some search predicate.
*/
if (!ASTSearchOptimizer.searchUris.contains(uri))
throw new RuntimeException("Unknown search predicate: "
+ uri);
final TermNode s = sp.s();
if (!s.isVariable())
throw new RuntimeException(
"Subject of search predicate is constant: " + sp);
final IVariable<?> searchVar = ((VarNode) s)
.getValueExpression();
// Lazily allocate map.
if (tmp == null) {
tmp = new LinkedHashMap<IVariable<?>, Map<URI, StatementPatternNode>>();
}
// Lazily allocate set for that searchVar.
Map<URI, StatementPatternNode> statementPatterns = tmp
.get(searchVar);
if (statementPatterns == null) {
tmp.put(searchVar,
statementPatterns = new LinkedHashMap<URI, StatementPatternNode>());
}
// Add search predicate to set for that searchVar.
statementPatterns.put(uri, sp);
}
}
return tmp;
}
/**
* Validate the search. There must be exactly one {@link BD#SEARCH}
* predicate. There should not be duplicates of any of the search predicates
* for a given searchVar.
*/
private void validateSearch(final IVariable<?> searchVar,
final Map<URI, StatementPatternNode> statementPatterns) {
final Set<URI> uris = new LinkedHashSet<URI>();
for(StatementPatternNode sp : statementPatterns.values()) {
final URI uri = (URI)(sp.p()).getValue();
if (!uris.add(uri))
throw new RuntimeException(
"Search predicate appears multiple times for same search variable: predicate="
+ uri + ", searchVar=" + searchVar);
if (uri.equals(BDS.SEARCH)) {
assertObjectIsLiteral(sp);
} else if (uri.equals(BDS.RELEVANCE) || uri.equals(BDS.RANK)) {
assertObjectIsVariable(sp);
} else if(uri.equals(BDS.MIN_RANK)||uri.equals(BDS.MAX_RANK)) {
assertObjectIsLiteral(sp);
} else if (uri.equals(BDS.MIN_RELEVANCE) || uri.equals(BDS.MAX_RELEVANCE)) {
assertObjectIsLiteral(sp);
} else if(uri.equals(BDS.MATCH_ALL_TERMS)) {
assertObjectIsLiteral(sp);
} else if(uri.equals(BDS.MATCH_EXACT)) {
assertObjectIsLiteral(sp);
} else if(uri.equals(BDS.SUBJECT_SEARCH)) {
assertObjectIsLiteral(sp);
} else if(uri.equals(BDS.SEARCH_TIMEOUT)) {
assertObjectIsLiteral(sp);
} else if (uri.equals(BDS.RANGE_COUNT)) {
assertObjectIsVariable(sp);
} else if(uri.equals(BDS.MATCH_REGEX)) {
// a variable for the object is equivalent to regex = null
// assertObjectIsLiteral(sp);
} else {
throw new AssertionError("Unverified search predicate: " + sp);
}
}
if (!uris.contains(BDS.SEARCH)) {
throw new RuntimeException("Required search predicate not found: "
+ BDS.SEARCH + " for searchVar=" + searchVar);
}
}
private void assertObjectIsLiteral(final StatementPatternNode sp) {
final TermNode o = sp.o();
if (!o.isConstant()
|| !(((ConstantNode) o).getValue() instanceof Literal)) {
throw new IllegalArgumentException("Object is not literal: " + sp);
}
}
private void assertObjectIsVariable(final StatementPatternNode sp) {
final TermNode o = sp.o();
if (!o.isVariable()) {
throw new IllegalArgumentException("Object must be variable: " + sp);
}
}
/**
*
* Note: This has the {@link AbstractTripleStore} reference attached. This
* is not a {@link Serializable} object. It MUST run on the query
* controller.
*/
private static class SearchCall implements BigdataServiceCall {
private final AbstractTripleStore store;
private final IServiceOptions serviceOptions;
private final Literal query;
private final IVariable<IV> search;
private final IVariable<?>[] vars;
private final Literal minRank;
private final Literal maxRank;
private final Literal minRelevance;
private final Literal maxRelevance;
private final boolean matchAllTerms;
private final boolean matchExact;
/**
* @deprecated Feature was never completed due to scalability issues.
* See BZLG-1548, BLZG-563.
*/
@Deprecated
private final boolean subjectSearch;
private final Literal searchTimeout;
private final Literal matchRegex;
private final IVariable<?> rangeCountVar;
public SearchCall(
final AbstractTripleStore store,
final IVariable<?> searchVar,
final Map<URI, StatementPatternNode> statementPatterns,
final IServiceOptions serviceOptions) {
if(store == null)
throw new IllegalArgumentException();
if(searchVar == null)
throw new IllegalArgumentException();
if(statementPatterns == null)
throw new IllegalArgumentException();
if(serviceOptions == null)
throw new IllegalArgumentException();
this.store = store;
this.serviceOptions = serviceOptions;
/*
* Unpack the "search" magic predicate:
*
* [?searchVar bd:search objValue]
*/
final StatementPatternNode sp = statementPatterns.get(BDS.SEARCH);
query = (Literal) sp.o().getValue();
final TermNode searchQuery = sp.o();
if (searchQuery instanceof ConstantNode) {
final ConstantNode cNode = (ConstantNode) searchQuery;
final IConstant<IV> c = cNode.getValueExpression();
this.search = (IVariable<IV>)
c.getProperty(Constant.Annotations.VAR);
} else {
this.search = null;
}
/*
* Unpack the search service request parameters.
*/
IVariable<?> relVar = null;
IVariable<?> rankVar = null;
IVariable<?> rangeCountVar = null;
Literal minRank = null;
Literal maxRank = null;
Literal minRelevance = null;
Literal maxRelevance = null;
boolean matchAllTerms = false;
boolean matchExact = false;
boolean subjectSearch = false;
Literal searchTimeout = null;
Literal matchRegex = null;
for (StatementPatternNode meta : statementPatterns.values()) {
final URI p = (URI) meta.p().getValue();
final Literal oVal = meta.o().isConstant() ? (Literal) meta.o()
.getValue() : null;
final IVariable<?> oVar = meta.o().isVariable() ? (IVariable<?>) meta
.o().getValueExpression() : null;
if (BDS.RELEVANCE.equals(p)) {
relVar = oVar;
} else if (BDS.RANK.equals(p)) {
rankVar = oVar;
} else if (BDS.RANGE_COUNT.equals(p)) {
rangeCountVar = oVar;
} else if (BDS.MIN_RANK.equals(p)) {
minRank = (Literal) oVal;
} else if (BDS.MAX_RANK.equals(p)) {
maxRank = (Literal) oVal;
} else if (BDS.MIN_RELEVANCE.equals(p)) {
minRelevance = (Literal) oVal;
} else if (BDS.MAX_RELEVANCE.equals(p)) {
maxRelevance = (Literal) oVal;
} else if (BDS.MATCH_ALL_TERMS.equals(p)) {
matchAllTerms = ((Literal) oVal).booleanValue();
} else if (BDS.MATCH_EXACT.equals(p)) {
matchExact = ((Literal) oVal).booleanValue();
} else if (BDS.SUBJECT_SEARCH.equals(p)) {
subjectSearch = ((Literal) oVal).booleanValue();
} else if (BDS.SEARCH_TIMEOUT.equals(p)) {
searchTimeout = (Literal) oVal;
} else if (BDS.MATCH_REGEX.equals(p)) {
matchRegex = (Literal) oVal;
}
}
if (search == null) {
this.vars = new IVariable[] {//
searchVar,//
relVar == null ? Var.var() : relVar,// must be non-null.
rankVar == null ? Var.var() : rankVar // must be non-null.
};
} else {
this.vars = new IVariable[] {//
searchVar,//
relVar == null ? Var.var() : relVar,// must be non-null.
rankVar == null ? Var.var() : rankVar, // must be non-null.
search
};
}
this.minRank = minRank;
this.maxRank = maxRank;
this.minRelevance = minRelevance;
this.maxRelevance = maxRelevance;
this.matchAllTerms = matchAllTerms;
this.matchExact = matchExact;
this.subjectSearch = subjectSearch;
this.searchTimeout = searchTimeout;
this.matchRegex = matchRegex;
this.rangeCountVar = rangeCountVar;
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private Hiterator<IHit<?>> getHiterator() {
// final IValueCentricTextIndexer<IHit> textIndex = (IValueCentricTextIndexer) store
// .getLexiconRelation().getSearchEngine();
final ITextIndexer<IHit> textIndex = (ITextIndexer)
(this.subjectSearch ?
store.getLexiconRelation().getSubjectCentricSearchEngine() :
store.getLexiconRelation().getSearchEngine());
if (textIndex == null)
throw new UnsupportedOperationException("No free text index?");
String s = query.getLabel();
final boolean prefixMatch;
if (s.indexOf('*') >= 0) {
prefixMatch = true;
s = s.replaceAll("\\*", "");
} else {
prefixMatch = false;
}
return (Hiterator) textIndex.search(new FullTextQuery(
s,//
query.getLanguage(),//
prefixMatch,//
matchRegex == null ? null : matchRegex.stringValue(),
matchAllTerms,
matchExact,
minRelevance == null ? BDS.DEFAULT_MIN_RELEVANCE : minRelevance.doubleValue()/* minCosine */,
maxRelevance == null ? BDS.DEFAULT_MAX_RELEVANCE : maxRelevance.doubleValue()/* maxCosine */,
minRank == null ? BDS.DEFAULT_MIN_RANK/*1*/ : minRank.intValue()/* minRank */,
maxRank == null ? BDS.DEFAULT_MAX_RANK/*Integer.MAX_VALUE*/ : maxRank.intValue()/* maxRank */,
searchTimeout == null ? BDS.DEFAULT_TIMEOUT/*0L*/ : searchTimeout.longValue()/* timeout */,
TimeUnit.MILLISECONDS
));
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private int getRangeCount() {
// final IValueCentricTextIndexer<IHit> textIndex = (IValueCentricTextIndexer) store
// .getLexiconRelation().getSearchEngine();
final ITextIndexer<IHit> textIndex = (ITextIndexer)
(this.subjectSearch ?
store.getLexiconRelation().getSubjectCentricSearchEngine() :
store.getLexiconRelation().getSearchEngine());
if (textIndex == null)
throw new UnsupportedOperationException("No free text index?");
String s = query.getLabel();
final boolean prefixMatch;
if (s.indexOf('*') >= 0) {
prefixMatch = true;
s = s.replaceAll("\\*", "");
} else {
prefixMatch = false;
}
return textIndex.count(new FullTextQuery(
s,//
query.getLanguage(),//
prefixMatch,//
matchRegex == null ? null : matchRegex.stringValue(),
matchAllTerms,
matchExact,
minRelevance == null ? BDS.DEFAULT_MIN_RELEVANCE : minRelevance.doubleValue()/* minCosine */,
maxRelevance == null ? BDS.DEFAULT_MAX_RELEVANCE : maxRelevance.doubleValue()/* maxCosine */,
minRank == null ? BDS.DEFAULT_MIN_RANK/*1*/ : minRank.intValue()/* minRank */,
maxRank == null ? BDS.DEFAULT_MAX_RANK/*Integer.MAX_VALUE*/ : maxRank.intValue()/* maxRank */,
searchTimeout == null ? BDS.DEFAULT_TIMEOUT/*0L*/ : searchTimeout.longValue()/* timeout */,
TimeUnit.MILLISECONDS
));
}
/**
* {@inheritDoc}
*
* FIXME The bindingsClause is ignored. If someone were to bind the
* subject, rank, or relevance variables then we would to notice that
* here. We would also have to produce one solution for each binding
* set input to the service.
*/
@Override
@SuppressWarnings({ "rawtypes", "unchecked" })
public ICloseableIterator<IBindingSet> call(
final IBindingSet[] bindingsClause) {
if (bindingsClause.length > 1) {
/*
* FIXME This case is not supported. We need to run
* the search engine for each of the source solutions.
*/
/*
* Fixed this to allow an incoming binding stream that does not
* include any of the search variables.
*/
// throw new UnsupportedOperationException();
for (IBindingSet bs : bindingsClause) {
if (rangeCountVar != null) {
if (bs.isBound(rangeCountVar)) {
/*
* FIXME This case is not supported. We need to run
* the search engine for each of the source solutions.
*/
throw new UnsupportedOperationException();
}
} else {
for (int i = 0; i < vars.length; i++) {
if (bs.isBound(vars[i])) {
/*
* FIXME This case is not supported. We need to run
* the search engine for each of the source solutions.
*/
throw new UnsupportedOperationException();
}
}
}
}
}
if (bindingsClause.length == 1 && !bindingsClause[0].isEmpty()) {
/*
* Fixed this by putting the ASTBindingAssigner before the
* ASTSearchOptimizer in the DefaultOptimizerList.
*/
// throw new UnsupportedOperationException();
}
if (rangeCountVar != null) {
final int i = getRangeCount();
final ListBindingSet bs = new ListBindingSet(
new IVariable[] { rangeCountVar },
new IConstant[] { new Constant(new XSDNumericIV(i)) });
return new SingleValueChunkedIterator<IBindingSet>(bs);
} else {
final Striterator sitr = new Striterator(getHiterator());
sitr.addFilter(new Resolver() {
private static final long serialVersionUID = 1L;
@Override
protected Object resolve(final Object obj) {
return bind((IHit<?>) obj);
}
});
return new ChunkedWrappedIterator<IBindingSet>(sitr);
}
}
/**
* Convert an {@link IHit} into an {@link IBindingSet}.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private IBindingSet bind(final IHit<?> hit) {
final IConstant<?>[] vals = search == null ?
new IConstant[] {
new Constant(hit.getDocId()), // searchVar
new Constant(new XSDNumericIV(hit.getCosine())), // cosine
new Constant(new XSDNumericIV(hit.getRank())) // rank
} :
new IConstant[] {
new Constant(hit.getDocId()), // searchVar
new Constant(new XSDNumericIV(hit.getCosine())), // cosine
new Constant(new XSDNumericIV(hit.getRank())), // rank
new Constant(((BigdataLiteral) query).getIV())
};
final IBindingSet bs = new ListBindingSet(vars, vals);
if (log.isTraceEnabled()) {
log.trace(bs);
log.trace(query.getClass());
log.trace(((BigdataLiteral) query).getIV());
log.trace(((BigdataLiteral) query).getIV().getClass());
}
return bs;
}
// private class HitConverter implements ICloseableIterator<IBindingSet> {
//
// private final Hiterator<IHit<?>> src;
//
// private IHit<?> current = null;
// private boolean open = true;
//
// public HitConverter(final Hiterator<IHit<?>> src) {
//
// this.src = src;
//
// }
//
// public void close() {
// if (open) {
// open = false;
// }
// }
//
// public boolean hasNext() {
// if (!open)
// return false;
// if (current != null)
// return true;
// while (src.hasNext()) {
// current = src.next();
// return true;
// }
// return current != null;
// }
//
// public IBindingSet next() {
//
// if (!hasNext())
// throw new NoSuchElementException();
//
// final IHit<?> tmp = current;
//
// current = null;
//
// return newBindingSet(tmp);
//
// }
//
// public void remove() {
//
// throw new UnsupportedOperationException();
//
// }
//
// } // class HitConverter
@Override
public IServiceOptions getServiceOptions() {
return serviceOptions;
}
}
}