/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
* getModel() contributed by Netymon Pty Ltd on behalf of
* The Australian Commonwealth Government under contract 4500507038.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.resolver.lucene;
// Java 2 standard packages
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URLConnection;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import javax.activation.MimeType;
import javax.activation.MimeTypeParseException;
import javax.transaction.xa.XAResource;
// Log4j
import org.apache.log4j.Logger;
import org.apache.lucene.util.Version;
// JRDF
import org.jrdf.graph.BlankNode;
import org.jrdf.graph.Node;
import org.jrdf.graph.Literal;
import org.jrdf.graph.URIReference;
// Locally written packages
import org.mulgara.query.Constraint;
import org.mulgara.query.ConstraintElement;
import org.mulgara.query.LocalNode;
import org.mulgara.query.QueryException;
import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
import org.mulgara.resolver.spi.AbstractXAResource;
import org.mulgara.resolver.spi.AbstractXAResource.RMInfo;
import org.mulgara.resolver.spi.AbstractXAResource.TxInfo;
import org.mulgara.resolver.spi.EmptyResolution;
import org.mulgara.resolver.spi.GlobalizeException;
import org.mulgara.resolver.spi.LocalizeException;
import org.mulgara.resolver.spi.Resolution;
import org.mulgara.resolver.spi.Resolver;
import org.mulgara.resolver.spi.ResolverFactory;
import org.mulgara.resolver.spi.ResolverException;
import org.mulgara.resolver.spi.ResolverSession;
import org.mulgara.resolver.spi.Statements;
import org.mulgara.util.StackTrace;
import org.mulgara.util.conversion.html.HtmlToTextConverter;
/**
* Resolves constraints in models defined by static RDF documents.
*
* @created 2004-04-01
*
* @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a>
* @copyright © 2003 <A href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</A>
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public class LuceneResolver implements Resolver {
/** Logger. */
private static final Logger logger = Logger.getLogger(LuceneResolver.class);
/** System property name used to look up Lucene query language version. */
public final static String LUCENE_VERSION_PROPERTY = "org.mulgara.lucene.version";
/** Default Lucene query language */
public final static Version DEFAULT_LUCENE_VERSION = Version.LUCENE_34;
/** Version of the Lucene query language to use. May be set via system properties for backwards compatibility. */
public final static Version LUCENE_VERSION;
static {
Version ver = DEFAULT_LUCENE_VERSION;
String versionProp = System.getProperty(LUCENE_VERSION_PROPERTY);
if (versionProp != null && versionProp.length() > 0) {
try {
ver = Version.valueOf(versionProp);
} catch (IllegalArgumentException e) {
logger.warn("Illegal Lucene query language version property '" + versionProp +
"', defaulting to " + DEFAULT_LUCENE_VERSION);
}
}
assert ver != null;
LUCENE_VERSION = ver;
}
/**
* The preallocated node identifying the type of temporary model to create
* in the {@link #modifyModel} method.
*/
protected final URI modelTypeURI;
protected final ResolverSession resolverSession;
protected final LuceneResolverFactory resolverFactory;
protected final boolean forWrites;
protected final XAResource xares;
// for abort() only
protected Collection<FullTextStringIndex> indexes;
//
// Constructors
//
/**
* Construct a resolver.
*
* @param modelTypeURI the URI of the lucene model type
* @param resolverSession the session this resolver is associated with
* @param resolverFactory the resolver-factory that created us
* @param forWrites whether we may be getting writes
*/
LuceneResolver(URI modelTypeURI, ResolverSession resolverSession,
LuceneResolverFactory resolverFactory, boolean forWrites) {
// Initialize fields
this.modelTypeURI = modelTypeURI;
this.resolverSession = resolverSession;
this.resolverFactory = resolverFactory;
this.forWrites = forWrites;
this.xares = new LuceneXAResource(10, resolverFactory, this);
}
//
// Methods implementing Resolver
//
/**
* Create a model by treating the <var>model</var> as the {@link java.net.URL} of an
* RDF document and downloading it into the database.
*
* @param model {@inheritDoc}. In this case, it should be the {@link java.net.URL} of
* an RDF/XML document.
* @param modelTypeURI {@inheritDoc}. This field is ignored, because URL
* models are external.
*/
public void createModel(long model, URI modelTypeURI) throws ResolverException, LocalizeException {
if (logger.isDebugEnabled()) {
logger.debug("Create Lucene model " + model);
}
}
public XAResource getXAResource() {
return xares;
}
/**
* Insert or delete RDF statements in a model at a URL.
*/
public void modifyModel(long model, Statements statements, boolean occurs) throws ResolverException {
if (logger.isDebugEnabled()) {
logger.debug("Modify URL model " + model);
}
try {
FullTextStringIndex stringIndex = getFullTextStringIndex(model);
statements.beforeFirst();
while (statements.next()) {
Node subjectNode = resolverSession.globalize(statements.getSubject());
// Do not insert the triple if it contains a blank node in subject.
if (subjectNode instanceof BlankNode) {
if (logger.isInfoEnabled()) {
logger.info(statements.getSubject() + " is blank node; ignoring Lucene insert.");
}
continue;
}
Node predicateNode = resolverSession.globalize(statements.getPredicate());
Node objectNode = resolverSession.globalize(statements.getObject());
// Get the subject's string value.
String subject = ((URIReference) subjectNode).getURI().toString();
// Predicates can only ever be URIReferences.
String predicate = ((URIReference) predicateNode).getURI().toString();
if (objectNode instanceof URIReference) {
URIReference objectURI = (URIReference) objectNode;
String resource = objectURI.getURI().toString();
try {
// Assert or deny the statement in the Lucene model
if (occurs) {
InputStream input = null;
Reader reader = null;
try {
// Connect to the resource's content
URLConnection connection = objectURI.getURI().toURL().
openConnection();
String contentType = connection.getContentType();
if (logger.isDebugEnabled()) {
logger.debug("Content type of resource is " + contentType);
}
MimeType contentMimeType;
try {
contentMimeType = new MimeType(contentType);
} catch (MimeTypeParseException e) {
logger.warn("\"" + contentType + "\" didn't parse as MIME type",
e);
try {
contentMimeType = new MimeType("content", "unknown");
} catch (MimeTypeParseException em) {
throw new ResolverException("Failed to create mime-type", em);
}
}
assert contentMimeType != null;
// If no character encoding is specified, guess at Latin-1
String charSet = contentMimeType.getParameter("charset");
if (charSet == null) {
charSet = "ISO8859-1";
}
assert charSet != null;
// Get the content, performing appropriate character encoding
input = connection.getInputStream();
reader = new InputStreamReader(input, charSet);
// Add a filter if the content type is text/html, to strip out
// HTML keywords that will clutter the index
try {
if (contentMimeType.match(new MimeType("text", "html"))) {
reader = HtmlToTextConverter.convert(reader);
}
} catch (MimeTypeParseException em) {
throw new ResolverException("Failed to create mime-type", em);
}
if (logger.isDebugEnabled()) {
logger.debug("Inserting " + subject + " " + predicate + " " + resource);
}
if (!stringIndex.add(subject, predicate, resource, reader)) {
logger.warn("Unable to add {" + subject + ", " + predicate + ", " +
resource + "} to full text string index");
}
} catch (MalformedURLException e) {
logger.info(resource + " is not a URL; ignoring Lucene insert");
} catch (IOException e) {
throw new ResolverException("Can't obtain content of " + resource, e);
} catch (org.mulgara.util.conversion.html.ParseException e) {
throw new ResolverException("Couldn't parse content of " + resource, e);
} finally {
try {
if (reader != null) reader.close();
else if (input != null) input.close();
} catch (IOException e) {
logger.warn("Ignoring error closing resource content", e);
}
}
} else { // (!occurs)
if (logger.isDebugEnabled()) {
logger.debug("Deleting " + subject + " " + predicate + " " + resource);
}
if (!stringIndex.remove(subject, predicate, resource)) {
logger.warn("Unable to remove {" + subject + ", " + predicate +
", " + resource + "} from full text string index");
}
}
} catch (FullTextStringIndexException e) {
throw new ResolverException("Unable to modify full text index\n" + new StackTrace(e));
}
} else if (objectNode instanceof Literal) {
Literal objectLiteral = (Literal) objectNode;
String literal = objectLiteral.getLexicalForm();
// Insert the statement into the text index
try {
if (occurs) {
if (logger.isDebugEnabled()) {
logger.debug("Inserting " + subject + " " + predicate + " " + literal);
}
if (!stringIndex.add(subject, predicate, literal)) {
logger.warn("Unable to add {" + subject + ", " + predicate +
", " + literal + "} to full text string index");
}
} else {
if (logger.isDebugEnabled()) {
logger.debug("Deleting " + subject + " " + predicate + " " + literal);
}
if (!stringIndex.remove(subject, predicate, literal)) {
logger.warn("Unable to remove {" + subject + ", " + predicate +
", " + literal + "} from full text string index");
}
}
} catch (FullTextStringIndexException e) {
throw new ResolverException("Unable to " + (occurs ? "add" : "delete") + "'" +
literal + "' to full text string index\n" + new StackTrace(e));
}
} else {
if (logger.isInfoEnabled()) {
logger.info(objectNode + " is blank node; ignoring Lucene insert.");
}
}
}
} catch (TuplesException et) {
throw new ResolverException("Error fetching statements", et);
} catch (GlobalizeException eg) {
throw new ResolverException("Error localizing statements", eg);
} catch (IOException ioe) {
throw new ResolverException("Failed to open string index", ioe);
} catch (FullTextStringIndexException ef) {
throw new ResolverException("Error in string index\n" + new StackTrace(ef));
}
}
/**
* Remove the cached model containing the contents of a URL.
*/
public void removeModel(long model) throws ResolverException {
if (logger.isDebugEnabled()) {
logger.debug("Removing full-text model " + model);
}
try {
getFullTextStringIndex(model).removeAll();
} catch (IOException ioe) {
throw new ResolverException("Failed to open string index", ioe);
} catch (FullTextStringIndexException ef) {
throw new ResolverException("Query failed against string index\n" + new StackTrace(ef));
}
}
/**
* Resolve a constraint against an RDF/XML document.
*
* Resolution is by filtration of a URL stream, and thus very slow.
*/
public Resolution resolve(Constraint constraint) throws QueryException {
if (logger.isDebugEnabled()) {
logger.debug("Resolve " + constraint);
}
// check the model
ConstraintElement modelElement = constraint.getModel();
if (modelElement instanceof Variable) {
if (logger.isDebugEnabled()) logger.debug("Ignoring solutions for " + constraint);
return new EmptyResolution(constraint, false);
} else if (!(modelElement instanceof LocalNode)) {
throw new QueryException("Failed to localize Lucene Graph before resolution " + constraint);
}
/* temporary hack because $_from is not resolved before transformation occurs, and hence
* no LuceneConstraint's are created when doing ... from <lucene-model> where ... .
*/
if (!(constraint instanceof LuceneConstraint)) {
constraint = new LuceneConstraint(constraint);
}
// generate the tuples
try {
FullTextStringIndex stringIndex = getFullTextStringIndex(((LocalNode)modelElement).getValue());
return new FullTextStringIndexTuples(stringIndex, (LuceneConstraint)constraint, resolverSession);
} catch (IOException ioe) {
throw new QueryException("Failed to open string index", ioe);
} catch (FullTextStringIndexException ef) {
throw new QueryException("Query failed against string index\n" + new StackTrace(ef));
} catch (TuplesException te) {
throw new QueryException("Failed to query string index", te);
}
}
private FullTextStringIndex getFullTextStringIndex(long model)
throws FullTextStringIndexException, IOException {
FullTextStringIndex index = LuceneXAResource.getCurrentIndexes().get(model);
if (index == null) {
index = new FullTextStringIndex(resolverFactory.getIndexerCache(Long.toString(model)), forWrites);
LuceneXAResource.getCurrentIndexes().put(model, index);
}
return index;
}
public void abort() {
try {
closeIndexes(indexes, false);
} catch (Exception e) {
logger.error("Error closing fulltext index", e);
}
}
private static void closeIndexes(Collection<FullTextStringIndex> indexes, boolean commit)
throws Exception {
Exception exc = null;
for (FullTextStringIndex index : indexes) {
try {
if (commit) {
// index.optimize();
index.commit();
} else {
index.rollback();
}
} catch (Exception e) {
if (exc == null)
exc = e;
else
logger.error("Error rolling back fulltext index", e);
} finally {
try {
index.close();
} catch (Exception e) {
if (exc == null)
exc = e;
else
logger.error("Error closing fulltext index", e);
}
}
}
if (exc != null)
throw exc;
}
/**
* An XAResource to manage the lucene indexes.
*/
private static class LuceneXAResource
extends AbstractXAResource<RMInfo<LuceneXAResource.LuceneTxInfo>,LuceneXAResource.LuceneTxInfo> {
private static final ThreadLocal<Map<Long, FullTextStringIndex>> currentIndexes = new ThreadLocal<Map<Long, FullTextStringIndex>>();
private final LuceneResolver resolver;
/**
* Construct a {@link LuceneXAResource} with a specified transaction timeout.
*
* @param transactionTimeout transaction timeout period, in seconds
* @param resolverFactory the resolver-factory we belong to
*/
public LuceneXAResource(int transactionTimeout, ResolverFactory resolverFactory, LuceneResolver resolver) {
super(transactionTimeout, resolverFactory);
this.resolver = resolver;
}
protected RMInfo<LuceneTxInfo> newResourceManager() {
return new RMInfo<LuceneTxInfo>();
}
protected LuceneTxInfo newTransactionInfo() {
return new LuceneTxInfo();
}
public static Map<Long, FullTextStringIndex> getCurrentIndexes() {
return currentIndexes.get();
}
//
// Methods implementing XAResource
//
protected void doStart(LuceneTxInfo tx, int flags, boolean isNew) {
currentIndexes.set(tx.indexes);
resolver.indexes = tx.indexes.values();
}
protected void doEnd(LuceneTxInfo tx, int flags) {
currentIndexes.set(null);
}
protected int doPrepare(LuceneTxInfo tx) throws Exception {
for (FullTextStringIndex index : tx.indexes.values())
index.prepare();
return XA_OK;
}
protected void doCommit(LuceneTxInfo tx) throws Exception {
closeIndexes(tx.indexes.values(), true);
tx.indexes.clear(); // so transactionCompleted does not close a second time
}
protected void doRollback(LuceneTxInfo tx) throws Exception {
closeIndexes(tx.indexes.values(), false);
tx.indexes.clear(); // so transactionCompleted does not close a second time
}
protected void doForget(LuceneTxInfo tx) {
}
protected void transactionCompleted(LuceneTxInfo tx) {
super.transactionCompleted(tx);
try {
closeIndexes(tx.indexes.values(), false);
} catch (Exception e) {
logger.error("Error closing fulltext index", e);
}
}
static class LuceneTxInfo extends TxInfo {
public final Map<Long, FullTextStringIndex> indexes = new HashMap<Long, FullTextStringIndex>();
}
}
}