/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.resolver.lucene;
// Java 2 standard packages
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
// Third party packages
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.MapFieldSelector;
// JRDf
import org.jrdf.graph.BlankNode;
import org.jrdf.graph.Literal;
import org.jrdf.graph.URIReference;
// local packages
import org.mulgara.query.Constraint;
import org.mulgara.query.ConstraintElement;
import org.mulgara.query.LocalNode;
import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
import org.mulgara.query.rdf.LiteralImpl;
import org.mulgara.query.rdf.URIReferenceImpl;
import org.mulgara.resolver.spi.GlobalizeException;
import org.mulgara.resolver.spi.LocalizeException;
import org.mulgara.resolver.spi.Resolution;
import org.mulgara.resolver.spi.ResolverSession;
import org.mulgara.store.tuples.AbstractTuples;
import org.mulgara.store.tuples.Annotation;
import org.mulgara.store.tuples.DefinablePrefixAnnotation;
import org.mulgara.store.tuples.MandatoryBindingAnnotation;
import org.mulgara.store.tuples.RowComparator;
import org.mulgara.store.tuples.Tuples;
import org.mulgara.store.tuples.TuplesOperations;
/**
* A {@link Tuples} backed by a {@link FullTextStringIndex}. This is split into two parts, an
* upper layer which mostly delegates all methods to another tuples, and a lower layer which is
* a direct tuples around the lucene search hits. This allows us to conditionally materialize
* the results by materializing the lower-layer tuples.
*
* <p>The observed performance of lucene is such that running one larger query is faster than
* running, say, 100 smaller queries; at the same time, however, retrieving all the documents
* for the results usually takes several times longer than the actual query took. We therefore
* run a single query for the given constraint (rather than, say, running a query on each
* <code>beforeFirst</code> with the currently bound values) which gathers the document id's
* and scores. The documents are then retrieved on-demand by the lower-layer tuples; for most
* queries we then just materialize that tuples rather than creating our own document-cache
* (lucene does not explicitly cache documents) so repeated iterations over the results are
* fast.
*
* @created 2002-03-27
* @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a>
* @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
* @copyright © 2002-2004 <A href="http://www.PIsoftware.com/">Plugged In
* Software Pty Ltd</A>
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
class FullTextStringIndexTuples extends AbstractTuples implements Resolution, Cloneable {
/** Logger. */
private final static Logger logger = Logger.getLogger(FullTextStringIndexTuples.class);
/** Session used to localize Lucene text into string pool nodes. */
private final ResolverSession session;
/** The upper bound on the number of items in tuples */
private long rowUpperBound = -1;
/** The real results. */
private Tuples results = null;
/** The list of variables as found in the constraint */
private final List<Variable> constrVariableList = new ArrayList<Variable>(4);
/** The list of lucene keys corresponding to the variables found in the constraint */
private final List<String> constrLuceneKeyList = new ArrayList<String>(3);
private final FullTextStringIndex fullTextStringIndex;
private final LuceneConstraint constraint;
private final ConstraintElement subjectElement;
private final ConstraintElement predicateElement;
private final ConstraintElement objectElement;
//
// Constructor
//
/**
* Find the answer to a single constraint.
*
* The {@link org.mulgara.query.Answer}'s columns will
* match the variables in the <var>constraint</var>, except that a magical
* column called <code>$score</code> is added at the end, containing Lucene's
* reckoning of how close the matches are.
*
* @param fullTextStringIndex PARAMETER TO DO
* @param constraint the single constraint
* @param session a session context for globalization, etc
* @throws TuplesException if the set of triples couldn't be determined
*/
FullTextStringIndexTuples(FullTextStringIndex fullTextStringIndex,
LuceneConstraint constraint, ResolverSession session) throws TuplesException {
this.fullTextStringIndex = fullTextStringIndex;
this.session = session;
this.constraint = constraint;
// process subject
subjectElement = constraint.getSubject();
if (subjectElement instanceof Variable) {
constrVariableList.add((Variable)subjectElement);
constrLuceneKeyList.add(FullTextStringIndex.SUBJECT_KEY);
}
// process predicate
predicateElement = constraint.getPredicate();
if (predicateElement instanceof Variable) {
constrVariableList.add((Variable)predicateElement);
constrLuceneKeyList.add(FullTextStringIndex.PREDICATE_KEY);
}
// process object
objectElement = constraint.getObject();
if (objectElement instanceof Variable) {
constrVariableList.add((Variable)objectElement);
constrLuceneKeyList.add(FullTextStringIndex.LITERAL_KEY);
}
// Get the score variable
Variable score = constraint.getScoreVar();
if (score != null) {
constrVariableList.add(score);
}
setVariables(constrVariableList);
/* run the query now and materialize the result; it is often much faster to run a large query
* and grab all resulting lucene documents than it is to run many smaller queries. Ideally we
* would try and figure out which approach is better on a query-by-query basis.
*
* One special case is the all-variable query, which retrieves the whole db - this one is not
* materialized because it's usually only used by export().
*/
String subject = getString(subjectElement);
String predicate = getString(predicateElement);
String object = getString(objectElement);
results = new SearchHitsTuples(subject, predicate, object);
if (subject != null && predicate != null && object != null) {
Tuples old = results;
results = old.next() ? TuplesOperations.unconstrained() : TuplesOperations.empty();
old.close();
} else if (subject != null || predicate != null || object != null) {
Tuples old = results;
long t0 = System.currentTimeMillis();
results = TuplesOperations.materialize(results);
logger.debug("materialized " + results.getRowCount() + " lucene results() in " + (System.currentTimeMillis() - t0));
old.close();
}
}
//
// Implementation of AbstractTuples methods
//
public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
assert (constraint.getScoreVar() == null ||
getString(objectElement, prefix, constrVariableList) != null) :
"Internal error: lucene-query string not bound even though a score is requested";
results.beforeFirst(prefix, suffixTruncation);
}
private String getString(ConstraintElement ce) throws TuplesException {
return getString(ce, NO_PREFIX, constrVariableList);
}
private String getString(ConstraintElement ce, long[] prefix, List<Variable> vars) throws TuplesException {
long boundVal = 0;
if (ce instanceof LocalNode) {
boundVal = ((LocalNode)ce).getValue();
} else if (ce instanceof Variable) {
int idx = vars.indexOf(ce);
boundVal = (idx < prefix.length) ? prefix[idx] : 0;
}
if (boundVal == 0) return null;
try {
Object val = session.globalize(boundVal);
if (val instanceof URIReference) return ((URIReference)val).getURI().toString();
if (val instanceof Literal) return ((Literal)val).getLexicalForm();
if (val instanceof BlankNode) return "";
throw new TuplesException("Unknown node-type for Lucene constraint '" + ce + "': local-value=" + boundVal + ", global-value=" + val + ", class=" + val.getClass());
} catch (GlobalizeException e) {
throw new TuplesException("Couldn't globalize value " + boundVal, e);
}
}
public void close() throws TuplesException {
if (results != null)
results.close();
}
public FullTextStringIndexTuples clone() {
FullTextStringIndexTuples clone = (FullTextStringIndexTuples)super.clone();
if (results != null) clone.results = (Tuples)results.clone();
return clone;
}
public long getColumnValue(int column) throws TuplesException {
return results.getColumnValue(column);
}
public long getRowCount() throws TuplesException {
if (results == null)
beforeFirst();
return results.getRowCount();
}
public long getRowUpperBound() throws TuplesException {
return (results != null) ? results.getRowUpperBound() : getRowUpperBoundEstimate();
}
public long getRowExpectedCount() throws TuplesException {
return (results != null) ? results.getRowExpectedCount() : getRowUpperBoundEstimate();
}
private long getRowUpperBoundEstimate() throws TuplesException {
if (rowUpperBound == -1) {
try {
rowUpperBound = (results != null) ? results.getRowCount() :
fullTextStringIndex.getMaxDocs(getString(subjectElement),
getString(predicateElement),
getString(objectElement));
} catch (FullTextStringIndexException e) {
throw new TuplesException("Couldn't row upper-bound from text index: subject='" +
getString(subjectElement) + "', predicate='" +
getString(predicateElement) + "', object='" +
getString(objectElement) + "'", e);
}
}
return rowUpperBound;
}
public int getRowCardinality() throws TuplesException {
long bound = getRowUpperBound();
if (bound == 0) return Tuples.ZERO;
if (bound == 1) return Tuples.ONE;
return Tuples.MANY;
}
/**
* Lucene never generates unbound columns.
*
* @return <code>false</code>
*/
public boolean isColumnEverUnbound(int column) throws TuplesException {
return false;
}
public boolean hasNoDuplicates() throws TuplesException {
if (results == null)
beforeFirst();
return results.hasNoDuplicates();
}
public boolean isMaterialized() {
return (results != null) && results.isMaterialized();
}
public RowComparator getComparator() {
if (results != null) {
return results.getComparator();
} else {
return null;
}
}
public boolean next() throws TuplesException {
assert results != null : "next() called without beforeFirst()";
return results.next();
}
public Constraint getConstraint() {
return constraint;
}
//!!FIXME: I have no idea if this is correct.
public boolean isComplete() {
return false;
}
public List<Tuples> getOperands() {
return Collections.singletonList(results);
}
public Annotation getAnnotation(Class<? extends Annotation> annotationClass) throws TuplesException {
// the object (lucene query string) is required when a score is requested
if (annotationClass == MandatoryBindingAnnotation.class &&
objectElement instanceof Variable && constraint.getScoreVar() != null) {
return new MandatoryBindingAnnotation(new Variable[] { (Variable)objectElement });
}
return (results != null) ? results.getAnnotation(annotationClass) : null;
}
private class SearchHitsTuples extends AbstractTuples {
/** The current list of variables (possibly re-ordered from definePrefix()) */
private final List<Variable> variableList = new ArrayList<Variable>(4);
/** The list of lucene keys corresponding to the (re-ordered) variable-list */
private final List<String> luceneKeyList = new ArrayList<String>(3);
/** The native Lucene query result to represent as a {@link Tuples}. */
private FullTextStringIndex.Hits hits;
/** Which fields to load from the documents. */
private final FieldSelector fieldSelector;
/**
* The current document within the {@link #hits}.
*
* A Lucene document hit corresponds to a {@link Tuples} row.
*/
private Document document;
/** The index of the next {@link #document} within the {@link #hits}. */
private int nextDocumentIndex = 0;
/** the currently bound value for the subject, or null if not bound */
private String subject;
/** the currently bound value for the predicate, or null if not bound */
private String predicate;
/** the currently bound value for the object, or null if not bound */
private String object;
public SearchHitsTuples(final String subject, final String predicate, final String object) throws TuplesException {
if (logger.isDebugEnabled()) {
logger.debug("Searching for " + subject + " : " + predicate + " : " + object);
}
// run the query
try {
hits = fullTextStringIndex.find(subject, predicate, object);
} catch (FullTextStringIndexException e) {
throw new TuplesException("Couldn't generate answer from text index: subject='" + subject +
"', predicate='" + predicate + "', object='" + object + "'", e);
}
// sort the result in doc-id order for faster document retrieval
hits.sort();
// make sure we only load those fields we need (=> faster document retrieval)
List<String> load = new ArrayList<String>(3);
if (subject == null) load.add(FullTextStringIndex.SUBJECT_KEY);
if (predicate == null) load.add(FullTextStringIndex.PREDICATE_KEY);
if (object == null) load.add(FullTextStringIndex.LITERAL_KEY);
fieldSelector = new MapFieldSelector(load);
// prepare for iterating
document = null;
nextDocumentIndex = 0;
variableList.addAll(constrVariableList);
luceneKeyList.addAll(constrLuceneKeyList);
setVariables(variableList);
}
public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
subject = getString(subjectElement, prefix, variableList);
predicate = getString(predicateElement, prefix, variableList);
object = getString(objectElement, prefix, variableList);
document = null;
nextDocumentIndex = 0;
}
public boolean next() throws TuplesException {
try {
while (nextDocumentIndex < hits.length()) {
document = hits.doc(nextDocumentIndex++, fieldSelector);
if (matches(subject, document.get(FullTextStringIndex.SUBJECT_KEY)) &&
matches(predicate, document.get(FullTextStringIndex.PREDICATE_KEY)) &&
matches(object, document.get(FullTextStringIndex.LITERAL_KEY))) {
return true;
}
}
document = null;
return false;
} catch (IOException e) {
throw new TuplesException("Couldn't obtain next Lucene hit", e);
}
}
private boolean matches(String bound, String found) {
return bound == null || found == null || bound.equals(found);
}
public long getRowCount() throws TuplesException {
return hits.length();
}
public long getRowUpperBound() throws TuplesException {
return hits.length();
}
public long getRowExpectedCount() throws TuplesException {
return hits.length();
}
public int getRowCardinality() throws TuplesException {
switch (hits.length()) {
case 0: return Tuples.ZERO;
case 1: return Tuples.ONE;
default: return Tuples.MANY;
}
}
public List<Tuples> getOperands() {
return Collections.<Tuples>emptyList();
}
public long getColumnValue(int column) throws TuplesException {
try {
if (column >= 0 && column < luceneKeyList.size()) {
String luceneKey = luceneKeyList.get(column);
if (luceneKey == FullTextStringIndex.LITERAL_KEY)
return session.localize(new LiteralImpl(document.get(luceneKey)));
else
return session.localize(new URIReferenceImpl(new URI(document.get(luceneKey))));
} else if (column == luceneKeyList.size()) {
// Generate the score column
return session.localize(new LiteralImpl(hits.score(nextDocumentIndex - 1)));
} else {
throw new TuplesException("Column " + column + " does not exist");
}
} catch (IOException e) {
throw new TuplesException("Couldn't get column " + column + " value", e);
} catch (LocalizeException e) {
throw new TuplesException("Couldn't localize column " + column + " value", e);
} catch (URISyntaxException e) {
throw new TuplesException("Couldn't get column " + column + " value", e);
}
}
public boolean isColumnEverUnbound(int column) throws TuplesException {
return false;
}
public boolean hasNoDuplicates() throws TuplesException {
return false;
}
public void close() throws TuplesException {
try {
if (hits != null) hits.close();
} catch (IOException ioe) {
throw new TuplesException("Error closing fulltext index hits", ioe);
}
}
public SearchHitsTuples clone() {
SearchHitsTuples clone = (SearchHitsTuples)super.clone();
if (hits != null) clone.hits = hits.clone();
return clone;
}
public Annotation getAnnotation(Class<? extends Annotation> annotationClass) throws TuplesException {
// support re-ordering the variables so any variables can be bound in the prefix
if (annotationClass == DefinablePrefixAnnotation.class) {
return new DefinablePrefixAnnotation() {
public void definePrefix(Set<Variable> boundVars) throws TuplesException {
if (boundVars.contains(constraint.getScoreVar()))
throw new TuplesException("Score variable may not be bound");
variableList.clear();
luceneKeyList.clear();
for (boolean useBound : new boolean[] { true, false }) {
for (int idx = 0; idx < constrLuceneKeyList.size(); idx++) {
Variable var = constrVariableList.get(idx);
if (boundVars.contains(var) == useBound) {
variableList.add(var);
luceneKeyList.add(constrLuceneKeyList.get(idx));
}
}
}
if (constraint.getScoreVar() != null) variableList.add(constraint.getScoreVar());
setVariables(variableList);
}
};
}
return null;
}
}
}