/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.content.n3;
// Java 2 standard packages
import java.io.InputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
// Third party packages
import antlr.collections.AST; // ANTLR compiler-compiler
import com.hp.hpl.jena.n3.N3Parser; // Jena
import com.hp.hpl.jena.n3.N3ParserEventHandler;
import org.apache.log4j.Logger; // Apache Log4J
import org.jrdf.graph.BlankNode; // JRDF
import org.jrdf.graph.Node;
import org.jrdf.graph.ObjectNode;
import org.jrdf.graph.PredicateNode;
import org.jrdf.graph.SubjectNode;
import org.jrdf.graph.Triple;
import org.jrdf.graph.URIReference;
import org.jrdf.vocabulary.OWL;
import org.jrdf.vocabulary.RDF;
// Locally written packages
import org.mulgara.content.Content;
import org.mulgara.content.NotModifiedException;
import org.mulgara.parser.MulgaraParserException;
import org.mulgara.query.TuplesException;
import org.mulgara.query.rdf.BlankNodeImpl;
import org.mulgara.query.rdf.LiteralImpl;
import org.mulgara.query.rdf.Mulgara;
import org.mulgara.query.rdf.TripleImpl;
import org.mulgara.query.rdf.URIReferenceImpl;
import org.mulgara.resolver.spi.LocalizeException;
import org.mulgara.resolver.spi.ResolverSession;
import org.mulgara.util.IntFile;
import org.mulgara.util.NumberUtil;
import org.mulgara.util.StringToLongMap;
import org.mulgara.util.TempDir;
/**
* nd
* <p>This class parses N3 data. It is implemented as a {@link Runnable} to allow it to be running in
* the background filling a queue, while a consumer thread drains the queue.</p>
*
* <p>Because ResolverSession (and the underlying StringPoolSession) may not be accessed
* concurrently from multiple threads, there is some extra complication when creating blank nodes,
* whereby blank-node instances are created in the parser thread but their id's are allocated later
* in the app-thread.</p>
*
* @created 2004-04-02
* @author <a href="http://staff.pisoftware.com/anewman">Andrew Newman</a>
* @author <a href="http://staff.pisoftware.com/davidm">David Makepeace</a>
* @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a>
* @copyright © 2004 <a href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</a>
*/
class Parser extends Thread implements N3ParserEventHandler {
/** Logger. */
private static final Logger logger = Logger.getLogger(Parser.class.getName());
private static final String ANON_TAG = "_:";
private static final String LOCAL_ANON_TAG = ANON_TAG + "node";
/**
* Maximum size that the {@link #triples} buffer can attain without the
* parser deliberately blocking and waiting for it to drain.
*/
private final long MAX_TRIPLES = 1000;
/** Mapping between parsed blank node IDs and local node numbers. */
private IntFile blankNodeIdMap;
/** Mapping between blank node rdf:nodeIDs and local node numbers. */
private StringToLongMap blankNodeNameMap;
/** Mapping between blank node IDs and blank-node instances. */
private Map<Long, BlankNodeImpl> unallocBlankNodeIdMap = new HashMap<Long, BlankNodeImpl>();
/** Mapping between blank node rdf:nodeIDs and blank-node instances. */
private Map<String, BlankNodeImpl> unallocBlankNodeNameMap = new HashMap<String, BlankNodeImpl>();
/** The resolverSession to create new internal identifiers for blank nodes. */
private ResolverSession resolverSession;
/** The stream containing the data to be parsed. */
private InputStream inputStream;
/** The queue of triples generated by the Notation-3 parser. */
private LinkedList<Triple> triples = new LinkedList<Triple>();
/**
* The number of statements parsed so far.
*
* When {@link #complete} is <code>true</code>, this will be the number of
* statements in the Notation-3 document.
*/
private long statementCount = 0;
/**
* <code>true</code> if statementCount is the count of the total number of statements in
* the entire file because the parser has reached the end of the file without error.
*/
private boolean statementCountIsTotal = false;
/** Flag used to indicate that the end of the N3 file has been reached. */
private boolean complete = false;
/** The exception which interrupted parsing, or <code>null</code> is parsing is successful. */
private Throwable exception = null;
/**
* The base URI from which the {@link #inputStream} came and where any
* relative URI references within the stream should be resolved to absolute
* form.
*
* This field may be <code>null</code> if the origin of the stream is
* unknown, although in that case all URI references within the stream must
* be absolute.
*/
private URI baseURI;
/**
* Map of <code>prefix</code> directives.
*
* Keys are {@link String}s of the form <code>p3p:</code>.
* Values are also {@link String}s, and of the form
* <code>http://www.example.org/meeting_organization#</code>.
*/
private final Map<String,String> prefixMap = new HashMap<String,String>();
//
// Constructor
//
/**
* Sole constructor.
*/
Parser(Content content, ResolverSession resolverSession) throws NotModifiedException, TuplesException {
// Validate parameters
if (content == null) throw new IllegalArgumentException("Null \"content\" parameter");
if (resolverSession == null) throw new IllegalArgumentException("Null \"resolverSession\" parameter");
// Initialize fields
this.resolverSession = resolverSession;
this.baseURI = content.getURI() != null ? content.getURI() : URI.create(Mulgara.NAMESPACE);
try {
this.blankNodeIdMap = IntFile.open(TempDir.createTempFile("n3idmap", null), true);
this.blankNodeNameMap = new StringToLongMap();
this.inputStream = content.newInputStream();
} catch (IOException e) {
throw new TuplesException("Unable to obtain input stream from " + baseURI, e);
}
}
/**
* @return the number of statements parsed so far
*/
synchronized long getStatementCount() throws TuplesException {
checkForException();
return statementCount;
}
/**
* @return the total number of statements in the file
*/
synchronized long waitForStatementTotal() throws TuplesException {
while (!complete) {
checkForException();
// Keep the LinkedList drained.
triples.clear();
unallocBlankNodeIdMap.clear();
unallocBlankNodeNameMap.clear();
notifyAll();
try {
wait();
} catch (InterruptedException ex) {
throw new TuplesException("Abort");
}
}
checkForException();
assert statementCountIsTotal;
return statementCount;
}
/**
* Returns true if getStatementCount() would return the total number
* of statements in the file.
*/
synchronized boolean isStatementCountTotal() throws TuplesException {
checkForException();
return statementCountIsTotal;
}
//
// Method implementing Runnable
//
public void run() {
Throwable t = null;
try {
(new N3Parser(inputStream, this)).parse();
if (logger.isDebugEnabled()) logger.debug("Parsed Notation-3");
return;
} catch (Throwable th) {
t = th;
} finally {
synchronized (this) {
if (t != null) {
exception = t;
} else if (exception == null) {
// End of file has been reached without error.
statementCountIsTotal = true;
}
complete = true;
notifyAll();
}
}
if (logger.isDebugEnabled()) logger.debug("Exception while parsing N3", exception);
}
//
// Methods implementing N3ParserEventHandler
//
public void startDocument() {
if (logger.isDebugEnabled()) logger.debug("Start N3 document");
prefixMap.clear();
}
public void endDocument() {
if (logger.isDebugEnabled()) logger.debug("End N3 document");
}
public void error(Exception ex, String message) {
if (logger.isDebugEnabled()) logger.debug(message, ex);
}
public void startFormula(int line, String context) {
if (logger.isDebugEnabled()) logger.debug("Start formula " + context);
}
public void endFormula(int line, String context) {
if (logger.isDebugEnabled()) logger.debug("End formula " + context);
}
public void quad(int line,
AST subj,
AST pred,
AST obj,
String context) {
if (logger.isDebugEnabled()) {
logger.debug("Parsing " + subj + " " + pred + " " + obj + " from " + baseURI);
}
// convert the triple components to JRDF Nodes
SubjectNode subjectNode = null;
PredicateNode predicateNode = null;
ObjectNode objectNode = null;
try {
subjectNode = (SubjectNode) toNode(subj);
predicateNode = (PredicateNode) toNode(pred);
objectNode = (ObjectNode) toNode(obj);
} catch (MulgaraParserException e) {
logger.error("Unable to parse at line " + line + ": " + e.getMessage());
return;
}
if (logger.isDebugEnabled()) {
logger.debug("Parsed " + subjectNode + " " + predicateNode + " " + objectNode + " from " + baseURI);
}
synchronized (this) {
// Wait for the triples buffer to drain if it's too full
while (triples.size() >= MAX_TRIPLES) {
try {
wait();
} catch (InterruptedException ex) {
throw new RuntimeException("Abort");
}
}
// Buffer the statement
triples.addLast(new TripleImpl(subjectNode, predicateNode, objectNode));
statementCount++;
notifyAll();
}
}
public void directive(int line, AST directive, AST[] args, String context) {
switch (directive.getType()) {
case N3Parser.AT_PREFIX:
assert args.length == 2;
assert args[0].getType() == N3Parser.QNAME;
assert args[1].getType() == N3Parser.URIREF;
prefixMap.put(args[0].toString(), args[1].toString());
return;
default:
logger.warn(
"Ignoring directive at line " + line +
": directive=" + directive + " (type " + directive.getType() + ") " +
"args=" + Arrays.asList(args) + " (type " + args[0].getType() + ") " +
"context=" + context
);
}
}
//
// Internal methods
//
/**
* Convert and validate an AST object into a node.
*
* @param ast The AST object to convert.
* @return a {@link Node} matching the AST object.
* @throws MulgaraParserException An unhandled element was encountered.
*/
private Node toNode(AST ast) throws MulgaraParserException {
if (ast == null) throw new IllegalArgumentException("Unable to load NULL nodes");
switch (ast.getType()) {
case N3Parser.LITERAL:
// check if this is a literal type
URI datatype = null;
String lang = null;
// get any modifiers
AST a1 = ast.getNextSibling();
AST a2 = (a1 == null ? null : a1.getNextSibling());
// find the language
lang = getLang(a1);
if (lang == null) lang = getLang(a2);
if (lang == null) lang = "";
// find the datatype
datatype = getDatatype(a1);
if (datatype == null) datatype = getDatatype(a2);
if (datatype == null) {
return new LiteralImpl(ast.toString(), lang);
} else {
return new LiteralImpl(ast.toString(), datatype);
}
case N3Parser.NUMBER:
datatype = NumberUtil.getXSD(NumberUtil.parseNumber(ast.toString()));
return new LiteralImpl(ast.toString(), datatype);
case N3Parser.ANON:
return getBlankNode(ast);
case N3Parser.QNAME:
String s = ast.toString();
if (isAnonymous(ast)) {
return getBlankNode(ast);
} else {
int colonIndex = s.indexOf(':');
assert colonIndex != -1;
String qnamePrefix = s.substring(0, colonIndex + 1);
String uriPrefix = prefixMap.get(qnamePrefix);
if (uriPrefix == null) throw new RuntimeException("No @prefix for " + s);
return toURIReference(uriPrefix + s.substring(colonIndex + 1));
}
case N3Parser.URIREF:
return toURIReference(ast.toString());
case N3Parser.KW_A:
return toURIReference(RDF.TYPE);
case N3Parser.TK_LIST_FIRST:
return toURIReference(RDF.FIRST);
case N3Parser.TK_LIST_REST:
return toURIReference(RDF.REST);
case N3Parser.TK_LIST_NIL:
return toURIReference(RDF.NIL);
case N3Parser.TK_LIST:
return toURIReference(RDF.LIST);
case N3Parser.EQUAL:
return toURIReference(OWL.SAME_AS);
case N3Parser.FORMULA:
throw new MulgaraParserException("Formulas are not supported");
default:
throw new Error("Unsupported N3 parser token type: " + ast.getType());
}
}
private URIReference toURIReference(String string) {
try {
return toURIReference(new URI(string));
} catch (URISyntaxException e) {
throw new RuntimeException("Invalid URI reference generated", e);
}
}
private URIReference toURIReference(URI u) {
if (!u.isAbsolute() && baseURI != null) u = baseURI.resolve(u);
return new URIReferenceImpl(u);
}
/**
* Tests if a node is anonymous.
*
* This is done by looking for the {@link #ANON_TAG} prefix.
*
* @param node The node to test.
* @return <code>true</code> if the node is anonymous.
*/
private boolean isAnonymous(AST node) {
String idStr = node.toString();
return idStr.startsWith(ANON_TAG);
}
/**
* Create a blank node from an AST object.
*
* @param n The AST node to convert to an anonymous node.
* @return An anonymous node that the AST node maps to.
*/
private BlankNode getBlankNode(AST n) {
// this is anonymous, so parse its ID
long anonId = parseAnonId(n);
String anonIdStr = null;
try {
synchronized (this) {
// look up the id in the blank node maps
long resourceNodeId;
if (anonId >= 0) {
resourceNodeId = blankNodeIdMap.getLong(anonId);
} else {
// don't expect to use this map
anonIdStr = n.toString();
resourceNodeId = blankNodeNameMap.get(anonIdStr);
}
// check if the node was found
BlankNodeImpl blankNode;
if (resourceNodeId == 0) {
if (anonId >= 0) {
blankNode = unallocBlankNodeIdMap.get(anonId);
} else {
blankNode = unallocBlankNodeNameMap.get(n.toString());
}
} else {
// Found the ID, so need to recreate the anonymous resource for it
blankNode = new BlankNodeImpl(resourceNodeId);
}
// check if the node was found
if (blankNode == null) {
// need a new anonymous node for this ID
blankNode = new BlankNodeImpl();
// need to put this node into a map
if (anonId >= 0) {
unallocBlankNodeIdMap.put(anonId, blankNode);
} else {
unallocBlankNodeNameMap.put(anonIdStr, blankNode);
}
}
return blankNode;
}
} catch (IOException e) {
throw new RuntimeException("Couldn't generate anonymous resource", e);
}
}
/**
* Parse out the node ID used by a blank node.
*
* @param node The node to get the ID from.
* @return The number part of the node.
*/
private long parseAnonId(AST node) {
String str = node.toString();
if (!str.startsWith(ANON_TAG)) return -1;
try {
int startPoint = node.toString().startsWith(LOCAL_ANON_TAG) ? LOCAL_ANON_TAG.length() : ANON_TAG.length();
return Long.parseLong(node.toString().substring(startPoint));
} catch (NumberFormatException nfe) {
return -1;
}
}
/**
* Get the language of a node.
*
* @param a node to test for language. May be null.
* @return The string representing the language, or <code>null</code> if this
* is not available.
*/
private String getLang(AST a) {
// empty nodes have no info
if (a == null) return null;
return a.getType() == N3Parser.AT_LANG ? a.getText().substring(1) : null;
}
/**
* Get the type of a node.
*
* @param a node to test for type. May be null.
* @return The URI representing the type, or <code>null</code> if this is not
* available.
*/
private URI getDatatype(AST a) {
// empty nodes have no info
if (a == null) return null;
// check if this is a datatype node
if (a.getType() != N3Parser.DATATYPE) return null;
// get the datatype details
AST dt = a.getFirstChild();
try {
if (dt == null) return null;
String uri = dt.toString();
// check for QName
int colonIndex = uri.indexOf(':');
// relative URI, so just return
if (colonIndex == -1) return new URI(uri);
// look for possible prefix
String qnamePrefix = uri.substring(0, colonIndex + 1);
String uriPrefix = prefixMap.get(qnamePrefix);
// if known prefix, then use it, otherwise just return the string as a URI
return uriPrefix == null ? new URI(uri) : new URI(uriPrefix + uri.substring(colonIndex + 1));
} catch (URISyntaxException e) {
logger.warn("Error parsing N3 datatype: " + dt.toString(), e);
return null;
}
}
/**
* If an exception occurred in the parser, throws a TuplesException that
* wraps the exception.
*/
private void checkForException() throws TuplesException {
if (exception != null) {
throw new TuplesException("Exception while reading " + baseURI, exception);
}
}
/**
* Returns a new triple from the queue or null if there are no more triples.
*/
synchronized Triple getTriple() throws TuplesException {
while (triples.isEmpty()) {
checkForException();
if (complete) {
// No more triples.
return null;
}
// Wait for more triples.
try {
wait();
} catch (InterruptedException ex) {
throw new TuplesException("Abort");
}
}
checkForException();
allocateBlankNodes();
notifyAll();
return triples.removeFirst();
}
/**
* Allocate the ids for the new blank nodes.
*/
private synchronized void allocateBlankNodes() {
try {
for (Map.Entry<Long, BlankNodeImpl> entry : unallocBlankNodeIdMap.entrySet()) {
resolverSession.localize(entry.getValue()); // This sets and returns the node ID
blankNodeIdMap.putLong(entry.getKey(), entry.getValue().getNodeId());
}
unallocBlankNodeIdMap.clear();
for (Map.Entry<String, BlankNodeImpl> entry : unallocBlankNodeNameMap.entrySet()) {
resolverSession.localize(entry.getValue()); // This sets and returns the node ID
blankNodeNameMap.put(entry.getKey(), entry.getValue().getNodeId());
}
unallocBlankNodeNameMap.clear();
} catch (LocalizeException le) {
throw new RuntimeException("Unable to create blank node", le);
} catch (IOException ioe) {
throw new RuntimeException("Unable to create blank node", ioe);
}
}
/**
* Stops the thread.
*/
synchronized void abort() {
interrupt();
// Clear the triples list and notify in case ARP uses an internal thread
// which has become blocked on the list being MAX_TRIPLES in size.
triples.clear();
notifyAll();
}
}