/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
*
* Licensed under the Aduna BSD-style license.
*/
package org.openrdf.rio.ntriples;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.helpers.RDFParserBase;
/**
* RDF parser for N-Triples files. A specification of NTriples can be found in
* <a href="http://www.w3.org/TR/rdf-testcases/#ntriples">this section</a> of
* the RDF Test Cases document. This parser is not thread-safe, therefore its
* public methods are synchronized.
*
* @author Arjohn Kampman
*/
public class NTriplesParser extends RDFParserBase {
/*-----------*
* Variables *
*-----------*/
private Reader reader;
private int lineNo;
private Resource subject;
private URI predicate;
private Value object;
/*--------------*
* Constructors *
*--------------*/
/**
* Creates a new NTriplesParser that will use a {@link ValueFactoryImpl} to
* create object for resources, bNodes and literals.
*/
public NTriplesParser() {
super();
}
/**
* Creates a new NTriplesParser that will use the supplied
* <tt>ValueFactory</tt> to create RDF model objects.
*
* @param valueFactory
* A ValueFactory.
*/
public NTriplesParser(ValueFactory valueFactory) {
super(valueFactory);
}
/*---------*
* Methods *
*---------*/
// implements RDFParser.getRDFFormat()
public final RDFFormat getRDFFormat() {
return RDFFormat.NTRIPLES;
}
/**
* Implementation of the <tt>parse(InputStream, String)</tt> method defined
* in the RDFParser interface.
*
* @param in
* The InputStream from which to read the data, must not be
* <tt>null</tt>. The InputStream is supposed to contain 7-bit
* US-ASCII characters, as per the N-Triples specification.
* @param baseURI
* The URI associated with the data in the InputStream, must not be
* <tt>null</tt>.
* @throws IOException
* If an I/O error occurred while data was read from the InputStream.
* @throws RDFParseException
* If the parser has found an unrecoverable parse error.
* @throws RDFHandlerException
* If the configured statement handler encountered an unrecoverable
* error.
* @throws IllegalArgumentException
* If the supplied input stream or base URI is <tt>null</tt>.
*/
public synchronized void parse(InputStream in, String baseURI)
throws IOException, RDFParseException, RDFHandlerException
{
if (in == null) {
throw new IllegalArgumentException("Input stream can not be 'null'");
}
// Note: baseURI will be checked in parse(Reader, String)
try {
parse(new InputStreamReader(in, "US-ASCII"), baseURI);
}
catch (UnsupportedEncodingException e) {
// Every platform should support the US-ASCII encoding...
throw new RuntimeException(e);
}
}
/**
* Implementation of the <tt>parse(Reader, String)</tt> method defined in
* the RDFParser interface.
*
* @param reader
* The Reader from which to read the data, must not be <tt>null</tt>.
* @param baseURI
* The URI associated with the data in the Reader, must not be
* <tt>null</tt>.
* @throws IOException
* If an I/O error occurred while data was read from the InputStream.
* @throws RDFParseException
* If the parser has found an unrecoverable parse error.
* @throws RDFHandlerException
* If the configured statement handler encountered an unrecoverable
* error.
* @throws IllegalArgumentException
* If the supplied reader or base URI is <tt>null</tt>.
*/
public synchronized void parse(Reader reader, String baseURI)
throws IOException, RDFParseException, RDFHandlerException
{
if (reader == null) {
throw new IllegalArgumentException("Reader can not be 'null'");
}
if (baseURI == null) {
throw new IllegalArgumentException("base URI can not be 'null'");
}
rdfHandler.startRDF();
this.reader = reader;
lineNo = 1;
reportLocation(lineNo, 1);
try {
int c = reader.read();
c = skipWhitespace(c);
while (c != -1) {
if (c == '#') {
// Comment, ignore
c = skipLine(c);
}
else if (c == '\r' || c == '\n') {
// Empty line, ignore
c = skipLine(c);
}
else {
c = parseTriple(c);
}
c = skipWhitespace(c);
}
}
finally {
clear();
}
rdfHandler.endRDF();
}
/**
* Reads characters from reader until it finds a character that is not a
* space or tab, and returns this last character. In case the end of the
* character stream has been reached, -1 is returned.
*/
private int skipWhitespace(int c)
throws IOException
{
while (c == ' ' || c == '\t') {
c = reader.read();
}
return c;
}
/**
* Reads characters from reader until the first EOL has been read. The first
* character after the EOL is returned. In case the end of the character
* stream has been reached, -1 is returned.
*/
private int skipLine(int c)
throws IOException
{
while (c != -1 && c != '\r' && c != '\n') {
c = reader.read();
}
// c is equal to -1, \r or \n. In case of a \r, we should
// check whether it is followed by a \n.
if (c == '\n') {
c = reader.read();
lineNo++;
reportLocation(lineNo, 1);
}
else if (c == '\r') {
c = reader.read();
if (c == '\n') {
c = reader.read();
}
lineNo++;
reportLocation(lineNo, 1);
}
return c;
}
private int parseTriple(int c)
throws IOException, RDFParseException, RDFHandlerException
{
c = parseSubject(c);
c = skipWhitespace(c);
c = parsePredicate(c);
c = skipWhitespace(c);
c = parseObject(c);
c = skipWhitespace(c);
if (c == -1) {
throwEOFException();
}
else if (c != '.') {
reportFatalError("Expected '.', found: " + (char)c);
}
c = skipLine(c);
Statement st = createStatement(subject, predicate, object);
rdfHandler.handleStatement(st);
subject = null;
predicate = null;
object = null;
return c;
}
private int parseSubject(int c)
throws IOException, RDFParseException
{
StringBuilder sb = new StringBuilder(100);
// subject is either an uriref (<foo://bar>) or a nodeID (_:node1)
if (c == '<') {
// subject is an uriref
c = parseUriRef(c, sb);
subject = createURI(sb.toString());
}
else if (c == '_') {
// subject is a bNode
c = parseNodeID(c, sb);
subject = createBNode(sb.toString());
}
else if (c == -1) {
throwEOFException();
}
else {
reportFatalError("Expected '<' or '_', found: " + (char)c);
}
return c;
}
private int parsePredicate(int c)
throws IOException, RDFParseException
{
StringBuilder sb = new StringBuilder(100);
// predicate must be an uriref (<foo://bar>)
if (c == '<') {
// predicate is an uriref
c = parseUriRef(c, sb);
predicate = createURI(sb.toString());
}
else if (c == -1) {
throwEOFException();
}
else {
reportFatalError("Expected '<', found: " + (char)c);
}
return c;
}
private int parseObject(int c)
throws IOException, RDFParseException
{
StringBuilder sb = new StringBuilder(100);
// object is either an uriref (<foo://bar>), a nodeID (_:node1) or a
// literal ("foo"-en or "1"^^<xsd:integer>).
if (c == '<') {
// object is an uriref
c = parseUriRef(c, sb);
object = createURI(sb.toString());
}
else if (c == '_') {
// object is a bNode
c = parseNodeID(c, sb);
object = createBNode(sb.toString());
}
else if (c == '"') {
// object is a literal
StringBuilder lang = new StringBuilder(8);
StringBuilder datatype = new StringBuilder(40);
c = parseLiteral(c, sb, lang, datatype);
object = createLiteral(sb.toString(), lang.toString(), datatype.toString());
}
else if (c == -1) {
throwEOFException();
}
else {
reportFatalError("Expected '<', '_' or '\"', found: " + (char)c);
}
return c;
}
private int parseUriRef(int c, StringBuilder uriRef)
throws IOException, RDFParseException
{
assert c == '<' : "Supplied char should be a '<', is: " + c;
// Read up to the next '>' character
c = reader.read();
while (c != '>') {
if (c == -1) {
throwEOFException();
}
uriRef.append((char)c);
c = reader.read();
}
// c == '>', read next char
c = reader.read();
return c;
}
private int parseNodeID(int c, StringBuilder name)
throws IOException, RDFParseException
{
assert c == '_' : "Supplied char should be a '_', is: " + c;
c = reader.read();
if (c == -1) {
throwEOFException();
}
else if (c != ':') {
reportError("Expected ':', found: " + (char)c);
}
c = reader.read();
if (c == -1) {
throwEOFException();
}
else if (!NTriplesUtil.isLetter(c)) {
reportError("Expected a letter, found: " + (char)c);
}
name.append((char)c);
// Read all following letter and numbers, they are part of the name
c = reader.read();
while (c != -1 && NTriplesUtil.isLetterOrNumber(c)) {
name.append((char)c);
c = reader.read();
}
return c;
}
private int parseLiteral(int c, StringBuilder value, StringBuilder lang, StringBuilder datatype)
throws IOException, RDFParseException
{
assert c == '"' : "Supplied char should be a '\"', is: " + c;
// Read up to the next '"' character
c = reader.read();
while (c != '"') {
if (c == -1) {
throwEOFException();
}
value.append((char)c);
if (c == '\\') {
// This escapes the next character, which might be a double quote
c = reader.read();
if (c == -1) {
throwEOFException();
}
value.append((char)c);
}
c = reader.read();
}
// c == '"', read next char
c = reader.read();
if (c == '@') {
// Read language
c = reader.read();
while (c != -1 && c != '.' && c != '^' && c != ' ' && c != '\t') {
lang.append((char)c);
c = reader.read();
}
}
else if (c == '^') {
// Read datatype
c = reader.read();
// c should be another '^'
if (c == -1) {
throwEOFException();
}
else if (c != '^') {
reportError("Expected '^', found: " + (char)c);
}
c = reader.read();
// c should be a '<'
if (c == -1) {
throwEOFException();
}
else if (c != '<') {
reportError("Expected '<', found: " + (char)c);
}
c = parseUriRef(c, datatype);
}
return c;
}
@Override
protected URI createURI(String uri)
throws RDFParseException
{
try {
uri = NTriplesUtil.unescapeString(uri);
}
catch (IllegalArgumentException e) {
reportError(e.getMessage());
}
return super.createURI(uri);
}
protected Literal createLiteral(String label, String lang, String datatype)
throws RDFParseException
{
try {
label = NTriplesUtil.unescapeString(label);
}
catch (IllegalArgumentException e) {
reportError(e.getMessage());
}
if (lang.length() == 0) {
lang = null;
}
if (datatype.length() == 0) {
datatype = null;
}
URI dtURI = null;
if (datatype != null) {
dtURI = createURI(datatype);
}
return super.createLiteral(label, lang, dtURI);
}
/**
* Overrides {@link RDFParserBase#reportWarning(String)}, adding line number
* information to the error.
*/
@Override
protected void reportWarning(String msg)
{
reportWarning(msg, lineNo, -1);
}
/**
* Overrides {@link RDFParserBase#reportError(String)}, adding line number
* information to the error.
*/
@Override
protected void reportError(String msg)
throws RDFParseException
{
reportError(msg, lineNo, -1);
}
/**
* Overrides {@link RDFParserBase#reportFatalError(String)}, adding line
* number information to the error.
*/
@Override
protected void reportFatalError(String msg)
throws RDFParseException
{
reportFatalError(msg, lineNo, -1);
}
/**
* Overrides {@link RDFParserBase#reportFatalError(Exception)}, adding line
* number information to the error.
*/
@Override
protected void reportFatalError(Exception e)
throws RDFParseException
{
reportFatalError(e, lineNo, -1);
}
private void throwEOFException()
throws RDFParseException
{
throw new RDFParseException("Unexpected end of file");
}
}