/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.content.mbox;
// Java 2 standard packages
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
// Third party packages
import org.jrdf.graph.*;
import org.apache.log4j.Logger; // Apache Log4J
// Locally written packages
import org.mulgara.content.*;
import org.mulgara.content.mbox.parser.model.*;
import org.mulgara.content.mbox.parser.model.exception.*;
import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
import org.mulgara.resolver.spi.LocalizeException;
import org.mulgara.resolver.spi.ResolverSession;
import org.mulgara.resolver.spi.Statements;
import org.mulgara.store.tuples.AbstractTuples;
import org.mulgara.store.tuples.Tuples;
/**
* Parses an RFC822 compliant mbox into {@link Statements};
*
* @created 2004-08-24
*
* @author Mark Ludlow
*
* @version $Revision: 1.8 $
*
* @modified $Date: 2005/01/05 04:57:39 $ @maintenanceAuthor $Author: newmana $
*
* @company <a href="mailto:info@PIsoftware.com">Plugged In Software</a>
*
* @copyright © 2004 <a href="http://www.PIsoftware.com/">Plugged In
* Software Pty Ltd</a>
*
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public class MBoxStatements extends AbstractTuples implements Statements {
/** Logger. */
private static final Logger logger =
Logger.getLogger(MBoxStatements.class.getName());
/** Column index for subjects */
public static final int SUBJECT = 0;
/** Column index for predicates */
public static final int PREDICATE = 1;
/** Column index for predicates */
public static final int OBJECT = 2;
/** The session used to globalize the RDF nodes from the stream. */
private ResolverSession resolverSession;
/** The number of statements in the MBox data. */
private long rowCount;
/** The current row. If the cursor is not on a row, this will be <code>null</code> */
private Triple tripleStatement;
/** The MBox manager we will use to manage our mboxes */
private MBoxManager mboxManager;
/** The mbox we are dealing with */
private MBox mbox;
/** The location of the MBox file. */
private URL url;
/** The mapping of blank nodes to their localised value */
protected static HashMap<Node,Long> blankNodeMap;
/** The content object containing information about the mbox file */
private Content content;
//
// Constructors
//
/**
* Construct an mbox parser.
*
* @param content The content object allowing acces to an input stream
* @param resolverSession session against which to localize RDF nodes
*
* @throws IllegalArgumentException if <var>inputStream</var> or
* <var>resolverSession</var> are <code>null</code>
* @throws NotModifiedException if these statements already exist in a valid
* cached copy
* @throws TuplesException if the <var>inputStream</var> can't be parsed as
* mbox
*/
MBoxStatements(Content content, ResolverSession resolverSession) throws
NotModifiedException, TuplesException {
// Validate "url" parameter
if (content == null) {
throw new IllegalArgumentException("Null \"content\" parameter");
}
// Validate "resolverSession" parameter
if (resolverSession == null) {
throw new IllegalArgumentException("Null \"resolverSession\" parameter");
}
// Initialize fields
try {
this.url = content.getURI() == null ? null : content.getURI().toURL();
} catch (MalformedURLException e) {
this.url = null;
}
this.resolverSession = resolverSession;
// Store the content object
this.content = content;
// Fix the magical column names for RDF statements
setVariables(new Variable[] {new Variable("subject"),
new Variable("predicate"),
new Variable("object")});
if (logger.isInfoEnabled()) {
logger.info("!! Created MBox");
}
// Initialise blank node map
blankNodeMap = new HashMap<Node,Long>();
// Load in the RDF conversion of the given mbox url
loadMBox();
}
/**
* Load in the RDF conversion from the given URL to allow for navigation
* and editing.
*
* @throws NotModifiedException if there's a valid cached version of this
* mbox
* @throws TuplesException
*/
private void loadMBox() throws NotModifiedException, TuplesException {
if (logger.isInfoEnabled()) logger.info("!! Loading in mbox data");
if (mboxManager == null) {
// Container for the model factory
ModelFactory factory = null;
try {
factory = ModelFactory.getInstance();
} catch (FactoryException factoryException) {
throw new TuplesException("Unable to initialise factory to create MBox parser.", factoryException);
}
try {
// Initialise the mbox object using the factory
mboxManager = factory.createMBoxManager();
} catch (FactoryException factoryException) {
throw new TuplesException("Unable to create a new mbox manager.", factoryException);
}
}
try {
// Get the mbox for our contentStream
mbox = mboxManager.getMBox(content);
} catch (ModelException modelException) {
if (content.getURI() == null) {
throw new TuplesException("Failed to create/retrieve MBox with content type " +
content.getContentType(), modelException);
}
throw new TuplesException("Failed to create/retrieve MBox for URI " +
content.getURI().toString(), modelException);
}
try {
// Perform the parsing and prepare for reading triples
mbox.start();
} catch (ModelException modelException) {
if (content.getURI() == null) {
throw new TuplesException("Failed to parse mbox data of type: " + content.getContentType(), modelException);
}
throw new TuplesException("Failed to parse mbox file: " + content.getURI().toString(), modelException);
} catch (InvalidMBoxException invalidMBoxException) {
logger.warn("MBox '" + content.getURI().toString() + "' was an invalid mbox file.", invalidMBoxException);
try {
// Remove the mbox from the cache
mboxManager.delete(mbox);
} catch (ModelException modelException) {
// With the current implementation this shouldn't happen, but we
// should still throw an exception just in case
throw new TuplesException("Failed to delete invalid mbox from manager.", modelException);
}
// Since we can't use the file, throw a tuples exception and stop parsing
throw new TuplesException("MBox '" + content.getURI().toString() +
"' was an invalid mbox file.", invalidMBoxException);
} catch (VocabularyException vocabularyException) {
throw new TuplesException("Unable to set up vocabulary for mbox parsing.", vocabularyException);
}
try {
// Initialize the metadata now that we know the statements
rowCount = mbox.getGraph().getNumberOfTriples();
} catch (GraphException graphException) {
throw new TuplesException("Unable to retrieve number of triples in graph.",graphException);
}
if (logger.isDebugEnabled()) logger.debug("Parsed MBox");
}
//
// Methods implementing Statements
//
/**
* Retrieves the value contained in the subject column for the current triple.
*
* @return The subject value for the current triple
*
* @throws TuplesException
*/
public long getSubject() throws TuplesException {
return getColumnValue(SUBJECT);
}
/**
* Retrieves the value contained in the predicate column for the current triple.
*
* @return The predicate value for the current triple
*
* @throws TuplesException
*/
public long getPredicate() throws TuplesException {
return getColumnValue(PREDICATE);
}
/**
* Retrieves the value contained in the object column for the current triple.
*
* @return The object value for the current triple
*
* @throws TuplesException
*/
public long getObject() throws TuplesException {
return getColumnValue(OBJECT);
}
//
// Methods implementing AbstractTuples
//
/**
* Resets the counter for triples to be the first.
*
* @param prefix The prefix to use
* @param suffixTruncation The truncation of suffixes to use
*
* @throws TuplesException
*/
public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
try {
// Reset the mbox
mbox.reset();
} catch (ModelException modelException) {
throw new TuplesException("Unable to reset the MBox graph.", modelException);
}
if (logger.isDebugEnabled()) logger.debug("-- Getting the before first value");
}
public Object clone() {
MBoxStatements cloned = (MBoxStatements)super.clone();
// Copy immutable fields by reference
cloned.resolverSession = resolverSession;
cloned.rowCount = rowCount;
cloned.tripleStatement = tripleStatement;
cloned.url = url;
return cloned;
}
/**
* Close the RDF/XML formatted input stream.
*/
public void close() throws TuplesException {
resolverSession = null;
tripleStatement = null;
url = null;
}
/**
* @param column 0 for the subject, 1 for the predicate, 2 for the object
*/
public long getColumnValue(int column) throws TuplesException {
// Pull the appropriate field from the current triple as a JRDF Node
Node node = null;
switch (column) {
case SUBJECT:
// Try creating the node with a URI reference
node = tripleStatement.getSubject();
break;
case PREDICATE:
// Try to create a URI reference node to represent the predicate
node = tripleStatement.getPredicate();
break;
case OBJECT:
// Create a literal node with the value for objects
node = tripleStatement.getObject();
break;
default:
throw new TuplesException("No such column " + column);
}
assert node != null;
// Container for our result
Long result = null;
if (blankNodeMap.containsKey(node)) {
// If the node is already mapped then get the value
result = blankNodeMap.get(node);
} else {
// If we haven't mapped the node already then create a new value and store it
// Localize the node and store the long object value
try {
result = new Long(resolverSession.localize(node));
} catch (LocalizeException e) {
throw new TuplesException("Couldn't get column " + column + " value", e);
}
// Store the new mapping
blankNodeMap.put(node, result);
}
if (column == SUBJECT && logger.isInfoEnabled()) logger.info("!! Using node value of: " + result.longValue());
return result.longValue();
}
public List<Tuples> getOperands() {
return Collections.emptyList();
}
public long getRowCount() throws TuplesException {
// Since we don't know how many messages and how many triples within a
// message we can't accurately determine the number of messages so we just
// give the highest number we can to cater for large messages and mboxes
return Long.MAX_VALUE;
}
public long getRowUpperBound() throws TuplesException {
return getRowCount();
}
public long getRowExpectedCount() throws TuplesException {
return getRowExpectedCount();
}
public boolean hasNoDuplicates() throws TuplesException {
return false;
}
public boolean isColumnEverUnbound(int column) throws TuplesException {
switch (column) {
case 0:
case 1:
case 2:
return false;
default:
throw new TuplesException("No such column " + column);
}
}
public boolean next() throws TuplesException {
try {
// Get the next statement in the iterator
tripleStatement = mbox.nextTriple();
} catch (ModelException modelException) {
throw new TuplesException("Failed to read next triple from mbox", modelException);
}
if (tripleStatement != null) {
if (logger.isInfoEnabled()) logger.info("-- Getting next statement: " + tripleStatement.toString());
// If there is a value for the triple then we have more data
return true;
} else {
// There is no more data to navigate
return false;
}
}
}