/**
* Copyright 2005-2014 Restlet
*
* The contents of this file are subject to the terms of one of the following
* open source licenses: Apache 2.0 or or EPL 1.0 (the "Licenses"). You can
* select the license that you prefer but you may not use this file except in
* compliance with one of these Licenses.
*
* You can obtain a copy of the Apache 2.0 license at
* http://www.opensource.org/licenses/apache-2.0
*
* You can obtain a copy of the EPL 1.0 license at
* http://www.opensource.org/licenses/eclipse-1.0
*
* See the Licenses for the specific language governing permissions and
* limitations under the Licenses.
*
* Alternatively, you can obtain a royalty free commercial license with less
* limitations, transferable or non-transferable, directly at
* http://restlet.com/products/restlet-framework
*
* Restlet is a registered trademark of Restlet S.A.S.
*/
package org.restlet.ext.rdf.internal.turtle;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.restlet.data.Reference;
import org.restlet.ext.rdf.Graph;
import org.restlet.ext.rdf.GraphHandler;
import org.restlet.ext.rdf.Literal;
import org.restlet.ext.rdf.internal.RdfConstants;
import org.restlet.ext.rdf.internal.ntriples.RdfNTriplesReader;
import org.restlet.representation.Representation;
/**
* Handler of RDF content according to the RDF Turtle notation.
*
* @author Thierry Boileau
*/
public class RdfTurtleReader extends RdfNTriplesReader {
/** Increment used to identify inner blank nodes. */
private int blankNodeId = 0;
/** The current context object. */
private Context context;
/**
* Constructor.
*
* @param rdfRepresentation
* The representation to read.
* @param graphHandler
* The graph handler invoked during the parsing.
* @throws IOException
*/
public RdfTurtleReader(Representation rdfN3Representation,
GraphHandler graphHandler) throws IOException {
super(rdfN3Representation, graphHandler);
this.context = new Context();
context.getKeywords().addAll(
Arrays.asList("a", "is", "of", "this", "has"));
}
/**
* Loops over the given list of lexical units and generates the adequat
* calls to link* methods.
*
* @see GraphHandler#link(Graph, Reference, Reference)
* @see GraphHandler#link(Reference, Reference, Literal)
* @see GraphHandler#link(Reference, Reference, Reference)
* @param lexicalUnits
* The list of lexical units used to generate the links.
*/
protected void generateLinks(List<LexicalUnit> lexicalUnits) {
Object currentSubject = null;
Reference currentPredicate = null;
Object currentObject = null;
int nbTokens = 0;
boolean swapSubjectObject = false;
for (int i = 0; i < lexicalUnits.size(); i++) {
LexicalUnit lexicalUnit = lexicalUnits.get(i);
nbTokens++;
switch (nbTokens) {
case 1:
if (",".equals(lexicalUnit.getValue())) {
nbTokens++;
} else if (!";".equals(lexicalUnit.getValue())) {
currentSubject = lexicalUnit.resolve();
}
break;
case 2:
if ("is".equalsIgnoreCase(lexicalUnit.getValue())) {
nbTokens--;
swapSubjectObject = true;
} else if ("has".equalsIgnoreCase(lexicalUnit.getValue())) {
nbTokens--;
} else if ("a".equalsIgnoreCase(lexicalUnit.getValue())) {
currentPredicate = RdfConstants.PREDICATE_TYPE;
} else if ("!".equalsIgnoreCase(lexicalUnit.getValue())) {
currentObject = new BlankNodeToken(newBlankNodeId())
.resolve();
currentPredicate = getPredicate(lexicalUnits.get(++i));
this.link(currentSubject, currentPredicate, currentObject);
currentSubject = currentObject;
nbTokens = 1;
} else if ("^".equalsIgnoreCase(lexicalUnit.getValue())) {
currentObject = currentSubject;
currentPredicate = getPredicate(lexicalUnits.get(++i));
currentSubject = new BlankNodeToken(newBlankNodeId())
.resolve();
this.link(currentSubject, currentPredicate, currentObject);
nbTokens = 1;
} else {
currentPredicate = getPredicate(lexicalUnit);
}
break;
case 3:
if ("of".equalsIgnoreCase(lexicalUnit.getValue())) {
nbTokens--;
} else {
if (swapSubjectObject) {
this.link(lexicalUnit.resolve(), currentPredicate,
currentSubject);
} else {
currentObject = lexicalUnit.resolve();
this.link(currentSubject, currentPredicate,
currentObject);
}
nbTokens = 0;
swapSubjectObject = false;
}
break;
default:
break;
}
}
}
/**
* Returns the current context.
*
* @return The current context.
*/
protected Context getContext() {
return context;
}
/**
* Returns the given lexical unit as a predicate.
*
* @param lexicalUnit
* The lexical unit to get as a predicate.
* @return A RDF URI reference of the predicate.
*/
private Reference getPredicate(LexicalUnit lexicalUnit) {
Reference result = null;
Object p = lexicalUnit.resolve();
if (p instanceof Reference) {
result = (Reference) p;
} else if (p instanceof String) {
result = new Reference((String) p);
}
return result;
}
/**
* Returns true if the given character is a delimiter.
*
* @param c
* The given character to check.
* @return true if the given character is a delimiter.
*/
@Override
protected boolean isDelimiter(int c) {
return isWhiteSpace(c) || c == '^' || c == '!' || c == '=' || c == '<'
|| c == '"' || c == '[' || c == ']' || c == '(' || c == ')'
|| c == '.' || c == ';' || c == ',' || c == '@';
}
/**
* Callback method used when a link is parsed or written.
*
* @param source
* The source or subject of the link.
* @param typeRef
* The type reference of the link.
* @param target
* The target or object of the link.
*/
protected void link(Object source, Reference typeRef, Object target) {
if (source instanceof Reference) {
if (target instanceof Reference) {
getGraphHandler().link((Reference) source, typeRef,
(Reference) target);
} else if (target instanceof Literal) {
getGraphHandler().link((Reference) source, typeRef,
(Literal) target);
} else {
org.restlet.Context
.getCurrentLogger()
.warning(
"The RDF Turtle document contains an object which is neither a Reference nor a literal: "
+ target);
org.restlet.Context.getCurrentLogger().warning(
getParsingMessage());
}
} else if (source instanceof Graph) {
if (target instanceof Reference) {
getGraphHandler().link((Graph) source, typeRef,
(Reference) target);
} else if (target instanceof Literal) {
getGraphHandler().link((Graph) source, typeRef,
(Literal) target);
} else {
org.restlet.Context
.getCurrentLogger()
.warning(
"The RDF Turtle document contains an object which is neither a Reference nor a literal: "
+ target);
org.restlet.Context.getCurrentLogger().warning(
getParsingMessage());
}
}
}
/**
* Returns the identifier of a new blank node.
*
* @return The identifier of a new blank node.
*/
protected String newBlankNodeId() {
return "#_bn" + blankNodeId++;
}
/**
* Parses the current representation.
*
* @throws IOException
*/
@Override
public void parse() throws IOException {
// Init the reading.
step();
do {
consumeWhiteSpaces();
switch (getChar()) {
case '@':
parseDirective(this.context);
break;
case '#':
parseComment();
break;
case '.':
step();
break;
default:
parseStatement(this.context);
break;
}
} while (!isEndOfFile(getChar()));
}
/**
* Parse the given blank node.
*
* @param blankNode
* The blank node to parse.
* @throws IOException
*/
protected void parseBlankNode(BlankNodeToken blankNode) throws IOException {
step();
do {
consumeWhiteSpaces();
switch (getChar()) {
case '(':
blankNode.getLexicalUnits().add(
new ListToken(this, this.context));
break;
case '<':
stepBack();
blankNode.getLexicalUnits().add(
new UriToken(this, this.context));
break;
case '_':
blankNode.getLexicalUnits().add(
new BlankNodeToken(this.parseToken()));
break;
case '"':
blankNode.getLexicalUnits().add(
new StringToken(this, this.context));
break;
case '[':
blankNode.getLexicalUnits().add(
new BlankNodeToken(this, this.context));
break;
case ']':
break;
case '.':
step();
break;
default:
if (!isEndOfFile(getChar())) {
blankNode.getLexicalUnits().add(
new Token(this, this.context));
}
break;
}
} while (!isEndOfFile(getChar()) && getChar() != ']');
if (getChar() == ']') {
// Set the cursor at the right of the list token.
step();
}
}
/**
* Parse the current directive and update the context according to the kind
* of directive ("base", "prefix", etc).
*
* @param context
* The context to update.
* @throws IOException
*/
protected void parseDirective(Context context) throws IOException {
// Remove the leading '@' character.
step();
discard();
String currentKeyword = parseToken();
if ("base".equalsIgnoreCase(currentKeyword)) {
consumeWhiteSpaces();
String base = parseUri();
Reference ref = new Reference(base);
if (ref.isRelative()) {
context.getBase().addSegment(base);
} else {
context.setBase(ref);
}
consumeStatement();
} else if ("prefix".equalsIgnoreCase(currentKeyword)) {
consumeWhiteSpaces();
String prefix = parseToken();
consumeWhiteSpaces();
String uri = parseUri();
context.getPrefixes().put(prefix, uri);
consumeStatement();
} else if ("keywords".equalsIgnoreCase(currentKeyword)) {
consumeWhiteSpaces();
int c;
do {
c = step();
} while (!isEndOfFile(c) && c != '.');
String strKeywords = getCurrentToken();
String[] keywords = strKeywords.split(",");
context.getKeywords().clear();
for (String keyword : keywords) {
context.getKeywords().add(keyword.trim());
}
consumeStatement();
} else {
org.restlet.Context.getCurrentLogger().warning(
"@" + currentKeyword + " directive is not supported.");
consumeStatement();
}
}
/**
* Parse the given list token.
*
* @param listToken
* The list token to parse.
* @throws IOException
*/
protected void parseList(ListToken listToken) throws IOException {
step();
do {
consumeWhiteSpaces();
switch (getChar()) {
case '(':
listToken.getLexicalUnits().add(
new ListToken(this, this.context));
break;
case '<':
stepBack();
listToken.getLexicalUnits().add(
new UriToken(this, this.context));
break;
case '_':
listToken.getLexicalUnits().add(
new BlankNodeToken(parseToken()));
break;
case '"':
listToken.getLexicalUnits().add(
new StringToken(this, this.context));
break;
case '[':
listToken.getLexicalUnits().add(
new BlankNodeToken(this, this.context));
break;
case ')':
break;
default:
if (!isEndOfFile(getChar())) {
listToken.getLexicalUnits().add(
new Token(this, this.context));
}
break;
}
} while (!isEndOfFile(getChar()) && getChar() != ')');
if (getChar() == ')') {
// Set the cursor at the right of the list token.
step();
}
}
/**
* Reads the current statement until its end, and parses it.
*
* @param context
* The current context.
* @throws IOException
*/
protected void parseStatement(Context context) throws IOException {
List<LexicalUnit> lexicalUnits = new ArrayList<LexicalUnit>();
do {
consumeWhiteSpaces();
switch (getChar()) {
case '(':
lexicalUnits.add(new ListToken(this, context));
break;
case '<':
stepBack();
lexicalUnits.add(new UriToken(this, context));
break;
case '_':
lexicalUnits.add(new BlankNodeToken(parseToken()));
break;
case '"':
lexicalUnits.add(new StringToken(this, context));
break;
case '[':
lexicalUnits.add(new BlankNodeToken(this, context));
break;
case '!':
lexicalUnits.add(new Token("!"));
step();
discard();
break;
case '^':
lexicalUnits.add(new Token("^"));
step();
discard();
break;
case '@':
// Remove the leading '@' character.
step();
discard();
lexicalUnits.add(new Token(this, context));
discard();
break;
case ';':
step();
discard();
lexicalUnits.add(new Token(";"));
break;
case ',':
step();
discard();
lexicalUnits.add(new Token(","));
break;
case '#':
parseComment();
break;
case '.':
break;
default:
if (!isEndOfFile(getChar())) {
lexicalUnits.add(new Token(this, context));
}
break;
}
} while (!isEndOfFile(getChar()) && getChar() != '.');
// Generate the links
generateLinks(lexicalUnits);
}
/**
* Parse the given String token.
*
* @param stringToken
* The String token to parse.
* @throws IOException
*/
protected void parseString(StringToken stringToken) throws IOException {
// Answer the question : is it multi lines or not?
// That is to say, is it delimited by 3 quotes or not?
int c1 = step();
int c2 = step();
if ((c1 == c2) && (c1 == '"')) {
stringToken.setMultiLines(true);
step();
discard();
int[] tab = new int[3];
int cpt = 0; // Number of consecutives '"' characters.
int c = getChar();
while (!isEndOfFile(c)) {
if (c == '"') {
tab[++cpt - 1] = c;
} else {
cpt = 0;
}
if (cpt == 3) {
// End of the string reached.
stepBack(2);
stringToken.setValue(getCurrentToken());
step(3);
discard();
break;
}
c = step();
}
} else {
stringToken.setMultiLines(false);
stepBack(1);
discard();
int c = getChar();
while (!isEndOfFile(c) && (c != '"')) {
c = step();
}
stringToken.setValue(getCurrentToken());
step();
discard();
}
// Parse the type and language of literals
int c = getChar();
if (c == '@') {
stringToken.setLanguage(parseToken());
} else if (c == '^') {
c = step();
if (c == '^') {
stringToken.setType(parseToken());
} else {
stepBack();
}
}
}
/**
* Parses the given token.
*
* @param token
* The token to parse.
* @throws IOException
*/
protected void parseToken(Token token) throws IOException {
int c;
do {
c = step();
} while (!isEndOfFile(c) && !isDelimiter(c));
token.setValue(getCurrentToken());
}
/**
* Parses the given URI token.
*
* @param token
* The URI token to parse.
* @throws IOException
*/
protected void parseUri(UriToken uriToken) throws IOException {
uriToken.setValue(parseUri());
}
}