/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/boca/com.ibm.adtech.boca.model/src/com/ibm/adtech/boca/glitter/predicates/TextMatchPredicate.java,v $
* Created by: Wing Yung (<a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com</a>)
* Created on: 12/18/2006
* Revision: $Id: TextMatchPredicate.java 227 2007-08-02 13:52:42Z mroy $
*
* Contributors:
* IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.glitter.query;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.openanzo.exceptions.LogUtils;
import org.openanzo.glitter.exception.FunctionalPredicateInvocationException;
import org.openanzo.glitter.exception.GlitterException;
import org.openanzo.glitter.util.TypeConversions;
import org.openanzo.rdf.Bindable;
import org.openanzo.rdf.PlainLiteral;
import org.openanzo.rdf.Statement;
import org.openanzo.rdf.TriplePattern;
import org.openanzo.rdf.TriplePatternComponent;
import org.openanzo.rdf.URI;
import org.openanzo.rdf.Variable;
import org.openanzo.rdf.Constants.GRAPHS;
import org.openanzo.rdf.Constants.INDEXER;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Special predicate that allows one to use the text indexer to match literal values. <li>Example: SELECT ?S WHERE {?s <http:/openanzo.org/predicates/textlike>
* 'test%'}</li>
*
* @author Wing Yung (<a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com</a>)
*
*/
//FIXEXCEPTIONS:Fix strings in exceptions
public class TextMatchPredicate implements FunctionalPredicate {
private static final Logger log = LoggerFactory.getLogger(TextMatchPredicate.class.getName());
private Bindable var;
private String textMatch;
private final List<TriplePattern> patterns = new ArrayList<TriplePattern>();
private QueryInformation queryInformation;
private TriplePattern functionalTriplePattern = null;
public void initialize(QueryInformation qi) {
this.queryInformation = qi;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(this.getClass().getSimpleName());
sb.append("(TriplePatternNode(");
sb.append(functionalTriplePattern.toString());
sb.append(")");
for (TriplePattern tp : patterns) {
sb.append(", TriplePatternNode(");
sb.append(tp.toString());
sb.append(")");
}
sb.append(")");
return sb.toString();
}
public boolean canBindGraphVariables() {
return true;
}
/**
* Return the patterns that this special predicate handles
*
* @return the patterns that this special predicate handles
*/
public List<TriplePattern> getPatterns() {
return patterns;
}
/**
* Create a solution set based on the results of a lucene query
*
* @param results
* results to convert
* @param namedGraphVariable
* variable binding for namedgraph
* @return the converted solution set
*/
public SolutionSet createSolutionSet(Collection<Statement> results, Variable namedGraphVariable) {
SolutionList ss = new SolutionList();
boolean includePattern = patterns.size() > 0;
Bindable subjBindable = null;
Bindable predBindable = null;
if (includePattern) {
TriplePattern p = patterns.get(0);
TriplePatternComponent subj = p.getSubject();
TriplePatternComponent pred = p.getPredicate();
if (subj instanceof Bindable) {
subjBindable = (Bindable) subj;
}
if (pred instanceof Bindable) {
predBindable = (Bindable) pred;
}
}
for (Statement quad : results) {
PatternSolution sol = new PatternSolutionImpl();
sol.setBinding(var, quad.getObject());
if (includePattern && subjBindable != null) {
sol.setBinding(subjBindable, quad.getSubject());
}
if (includePattern && predBindable != null) {
sol.setBinding(predBindable, quad.getPredicate());
}
if (namedGraphVariable != null)
sol.setBinding(namedGraphVariable, quad.getNamedGraphUri());
ss.add(sol);
}
return ss;
}
/**
* @return the textMatch
*/
public String getTextMatch() {
return textMatch;
}
/**
* @return the var
*/
public Bindable getVar() {
return var;
}
/**
* Create a lucene query for the given configuration
*
* @param namedGraph
* namedgraph for query
* @param namedGraphVariable
* variable for binding namedgraph
* @return lucene query object
* @throws GlitterException
*/
public TextMatchQuery getLuceneQuery(org.openanzo.rdf.URI namedGraph, Variable namedGraphVariable) throws GlitterException {
// Massage the match text to include other variables.
// For now, only support one pattern; not clear what multiple
// patterns mean.
boolean includePattern = patterns.size() > 0;
if (patterns.size() > 1) {
throw new GlitterException("Can't support multiple patterns for textmatch");
}
TextMatchQuery query = new TextMatchQuery();
query.literalQuery = textMatch;
if (includePattern) {
TriplePattern p = patterns.get(0);
TriplePatternComponent subj = p.getSubject();
TriplePatternComponent pred = p.getPredicate();
if (!(subj instanceof Bindable)) {
query.terms.add(new TermQuery(new Term(INDEXER.INDEXER_FIELD_SUBJECT, subj.toString())));
}
if (!(pred instanceof Bindable)) {
query.terms.add(new TermQuery(new Term(INDEXER.INDEXER_FIELD_PREDICATE, pred.toString())));
}
}
boolean noSolutions = false;
if (namedGraph != null) {
if (namedGraph.equals(GRAPHS.ALL_METADATAGRAPHS))
noSolutions = true;
if (!namedGraph.equals(GRAPHS.ALL_NAMEDGRAPHS) && !namedGraph.equals(GRAPHS.ALL_GRAPHS))
query.terms.add(new TermQuery(new Term(INDEXER.INDEXER_FIELD_GRAPH_URI, namedGraph.toString())));
} else {
Set<URI> iter = null;
if (namedGraphVariable == null) {
iter = queryInformation.getSolutionGenerator().getQueryDataset().getDefaultGraphURIs();
} else {
iter = queryInformation.getSolutionGenerator().getQueryDataset().getNamedGraphURIs();
}
Set<URI> ngraphs = new HashSet<URI>();
for (URI currURI : iter) {
// Ignore metadatagraphs.
if (currURI.equals(GRAPHS.ALL_METADATAGRAPHS))
continue;
// Don't restrict search to certain graphs since these include all named graphs.
if (currURI.equals(GRAPHS.ALL_GRAPHS) || currURI.equals(GRAPHS.ALL_NAMEDGRAPHS)) {
ngraphs.clear();
break;
}
ngraphs.add(currURI);
}
int datasetThreshold = 500;
if (ngraphs.size() > 0) {
if (ngraphs.size() < datasetThreshold) {
if (ngraphs.size() == 1) {
URI ngURI = ngraphs.iterator().next();
query.terms.add(new TermQuery(new Term(INDEXER.INDEXER_FIELD_GRAPH_URI, ngURI.toString())));
} else {
BooleanQuery booleanQuery = new BooleanQuery();
for (URI uri : ngraphs) {
booleanQuery.add(new BooleanClause(new TermQuery(new Term(INDEXER.INDEXER_FIELD_GRAPH_URI, uri.toString())), Occur.SHOULD));
}
query.terms.add(booleanQuery);
}
} else {
query.graphs.addAll(ngraphs);
}
}
}
return noSolutions ? null : query;
}
public boolean handlesTriplePattern(TriplePattern pattern) throws FunctionalPredicateInvocationException {
if (pattern.getObject().equals(var)) {
patterns.add(pattern);
return true;
}
return false;
}
public void setFunctionalTriplePattern(TriplePattern pattern) throws FunctionalPredicateInvocationException {
if (!(pattern.getSubject() instanceof Bindable))
throw new FunctionalPredicateInvocationException("Subject of textmatch must be a bindable");
if (!TypeConversions.isSimpleLiteral(pattern.getObject()))
throw new FunctionalPredicateInvocationException("Object of textmatch must be a simple literal");
this.var = (Bindable) pattern.getSubject();
this.textMatch = ((PlainLiteral) pattern.getObject()).getLabel();
functionalTriplePattern = pattern;
}
public boolean usesDataFromGraphs() {
return true;
}
public double getCost(NodeCostModel costModel) {
return 0;
}
public TriplePattern getFunctionalTriplePattern() {
return functionalTriplePattern;
}
public SolutionSet generateSolutions(URI namedGraph, Variable namedGraphVariable, SolutionSet bindingConstraints) throws GlitterException {
if (log.isDebugEnabled()) {
log.debug(LogUtils.GLITTER_MARKER, "TextMatchPredicate.generateSolutions called directly, returning noSolutions.");
}
return SolutionUtils.noSolutions();
}
/**
*Text match query that is passed to lucene
*/
public static class TextMatchQuery {
/** The literal query to run */
public String literalQuery = null;
/** Extra query terms for query */
public final Collection<Query> terms = new HashSet<Query>();
/** Graphs over which this query is run */
public final Collection<URI> graphs = new HashSet<URI>();
}
}