/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2006.
*
* Licensed under the Aduna BSD-style license.
*/
package org.openrdf.rio.ntriples;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
/**
* Utility methods for N-Triples encoding/decoding.
*/
public class NTriplesUtil {
/**
* Parses an N-Triples value, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesValue The N-Triples value to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed value.
* @throws IllegalArgumentException If the supplied value could not be
* parsed correctly.
*/
public static Value parseValue(String nTriplesValue, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesValue.startsWith("<")) {
return parseURI(nTriplesValue, valueFactory);
}
else if (nTriplesValue.startsWith("_:")) {
return parseBNode(nTriplesValue, valueFactory);
}
else if (nTriplesValue.startsWith("\"")) {
return parseLiteral(nTriplesValue, valueFactory);
}
else {
throw new IllegalArgumentException("Not a legal N-Triples value: " + nTriplesValue);
}
}
/**
* Parses an N-Triples resource, creates an object for it using
* the supplied ValueFactory and returns this object.
*
* @param nTriplesResource The N-Triples resource to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed resource.
* @throws IllegalArgumentException If the supplied resource could not be
* parsed correctly.
*/
public static Resource parseResource(String nTriplesResource, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesResource.startsWith("<")) {
return parseURI(nTriplesResource, valueFactory);
}
else if (nTriplesResource.startsWith("_:")) {
return parseBNode(nTriplesResource, valueFactory);
}
else {
throw new IllegalArgumentException(
"Not a legal N-Triples resource: " + nTriplesResource);
}
}
/**
* Parses an N-Triples URI, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesURI The N-Triples URI to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed URI.
* @throws IllegalArgumentException If the supplied URI could not be
* parsed correctly.
*/
public static URI parseURI(String nTriplesURI, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesURI.startsWith("<") && nTriplesURI.endsWith(">")) {
String uri = nTriplesURI.substring(1, nTriplesURI.length() - 1);
uri = unescapeString(uri);
return valueFactory.createURI(uri);
}
else {
throw new IllegalArgumentException("Not a legal N-Triples URI: " + nTriplesURI);
}
}
/**
* Parses an N-Triples bNode, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesBNode The N-Triples bNode to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed bNode.
* @throws IllegalArgumentException If the supplied bNode could not be
* parsed correctly.
*/
public static BNode parseBNode(String nTriplesBNode, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesBNode.startsWith("_:")) {
return valueFactory.createBNode(nTriplesBNode.substring(2));
}
else {
throw new IllegalArgumentException("Not a legal N-Triples URI: " + nTriplesBNode);
}
}
/**
* Parses an N-Triples literal, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesLiteral The N-Triples literal to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed literal.
* @throws IllegalArgumentException If the supplied literal could not be
* parsed correctly.
*/
public static Literal parseLiteral(String nTriplesLiteral, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesLiteral.startsWith("\"")) {
// Find string separation points
int endLabelIdx = findEndOfLabel(nTriplesLiteral);
if (endLabelIdx != -1) {
int startLangIdx = nTriplesLiteral.indexOf("@", endLabelIdx);
int startDtIdx = nTriplesLiteral.indexOf("^^", endLabelIdx);
if (startLangIdx != -1 && startDtIdx != -1) {
throw new IllegalArgumentException(
"Literals can not have both a language and a datatype");
}
// Get label
String label = nTriplesLiteral.substring(1, endLabelIdx);
label = unescapeString(label);
if (startLangIdx != -1) {
// Get language
String language = nTriplesLiteral.substring(startLangIdx + 1);
return valueFactory.createLiteral(label, language);
}
else if (startDtIdx != -1) {
// Get datatype
String datatype = nTriplesLiteral.substring(startDtIdx + 2);
URI dtURI = parseURI(datatype, valueFactory);
return valueFactory.createLiteral(label, dtURI);
}
else {
return valueFactory.createLiteral(label);
}
}
}
throw new IllegalArgumentException("Not a legal N-Triples literal: " + nTriplesLiteral);
}
/**
* Finds the end of the label in a literal string. This method
* takes into account that characters can be escaped using
* backslashes.
*
* @return The index of the double quote ending the label, or
* <tt>-1</tt> if it could not be found.
*/
private static int findEndOfLabel(String nTriplesLiteral) {
// First character of literal is guaranteed to be a double
// quote, start search at second character.
boolean previousWasBackslash = false;
for (int i = 1; i < nTriplesLiteral.length(); i++) {
char c = nTriplesLiteral.charAt(i);
if (c == '"' && !previousWasBackslash) {
return i;
}
else if (c == '\\' && !previousWasBackslash) {
// start of escape
previousWasBackslash = true;
}
else if (previousWasBackslash) {
// c was escaped
previousWasBackslash = false;
}
}
return -1;
}
/**
* Creates an N-Triples string for the supplied value.
*/
public static String toNTriplesString(Value value) {
if (value instanceof Resource) {
return toNTriplesString((Resource)value);
}
else if (value instanceof Literal) {
return toNTriplesString((Literal)value);
}
else {
throw new IllegalArgumentException("Unknown value type: " + value.getClass());
}
}
/**
* Creates an N-Triples string for the supplied resource.
*/
public static String toNTriplesString(Resource resource) {
if (resource instanceof URI) {
return toNTriplesString((URI)resource);
}
else if (resource instanceof BNode) {
return toNTriplesString((BNode)resource);
}
else {
throw new IllegalArgumentException("Unknown resource type: " + resource.getClass());
}
}
/**
* Creates an N-Triples string for the supplied URI.
*/
public static String toNTriplesString(URI uri) {
return "<" + escapeString(uri.toString()) + ">";
}
/**
* Creates an N-Triples string for the supplied bNode.
*/
public static String toNTriplesString(BNode bNode) {
return "_:" + bNode.getID();
}
/**
* Creates an N-Triples string for the supplied literal.
*/
public static String toNTriplesString(Literal lit) {
// Do some character escaping on the label:
StringBuilder sb = new StringBuilder(128);
sb.append("\"");
sb.append(escapeString(lit.getLabel()));
sb.append("\"");
if (lit.getDatatype() != null) {
// Append the literal's datatype
sb.append("^^");
sb.append(toNTriplesString(lit.getDatatype()));
}
else if (lit.getLanguage() != null) {
// Append the literal's language
sb.append("@");
sb.append(lit.getLanguage());
}
return sb.toString();
}
/**
* Checks whether the supplied character is a letter or number
* according to the N-Triples specification.
* @see #isLetter
* @see #isNumber
*/
public static boolean isLetterOrNumber(int c) {
return isLetter(c) || isNumber(c);
}
/**
* Checks whether the supplied character is a letter according to
* the N-Triples specification. N-Triples letters are A - Z and a - z.
*/
public static boolean isLetter(int c) {
return (c >= 65 && c <= 90) || // A - Z
(c >= 97 && c <= 122); // a - z
}
/**
* Checks whether the supplied character is a number according to
* the N-Triples specification. N-Triples numbers are 0 - 9.
*/
public static boolean isNumber(int c) {
return (c >= 48 && c <= 57); // 0 - 9
}
/**
* Escapes a Unicode string to an all-ASCII character sequence. Any special
* characters are escaped using backslashes (<tt>"</tt> becomes <tt>\"</tt>,
* etc.), and non-ascii/non-printable characters are escaped using Unicode
* escapes (<tt>\uxxxx</tt> and <tt>\Uxxxxxxxx</tt>).
*/
public static String escapeString(String label) {
int labelLength = label.length();
StringBuilder sb = new StringBuilder(2 * labelLength);
for (int i = 0; i < labelLength; i++) {
char c = label.charAt(i);
int cInt = c;
if (c == '\\') {
sb.append("\\\\");
}
else if (c == '"') {
sb.append("\\\"");
}
else if (c == '\n') {
sb.append("\\n");
}
else if (c == '\r') {
sb.append("\\r");
}
else if (c == '\t') {
sb.append("\\t");
}
else if (
cInt >= 0x0 && cInt <= 0x8 ||
cInt == 0xB || cInt == 0xC ||
cInt >= 0xE && cInt <= 0x1F ||
cInt >= 0x7F && cInt <= 0xFFFF)
{
sb.append("\\u");
sb.append(toHexString(cInt, 4));
}
else if (cInt >= 0x10000 && cInt <= 0x10FFFF) {
sb.append("\\U");
sb.append(toHexString(cInt, 8));
}
else {
sb.append(c);
}
}
return sb.toString();
}
/**
* Unescapes an escaped Unicode string. Any Unicode sequences
* (<tt>\uxxxx</tt> and <tt>\Uxxxxxxxx</tt>) are restored to the
* value indicated by the hexadecimal argument and any backslash-escapes
* (<tt>\"</tt>, <tt>\\</tt>, etc.) are decoded to their original form.
*
* @param s An escaped Unicode string.
* @return The unescaped string.
* @throws IllegalArgumentException If the supplied string is not a
* correctly escaped N-Triples string.
*/
public static String unescapeString(String s) {
int backSlashIdx = s.indexOf('\\');
if (backSlashIdx == -1) {
// No escaped characters found
return s;
}
int startIdx = 0;
int sLength = s.length();
StringBuilder sb = new StringBuilder(sLength);
while (backSlashIdx != -1) {
sb.append(s.substring(startIdx, backSlashIdx));
if (backSlashIdx + 1 >= sLength) {
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
char c = s.charAt(backSlashIdx + 1);
if (c == 't') {
sb.append('\t');
startIdx = backSlashIdx + 2;
}
else if (c == 'r') {
sb.append('\r');
startIdx = backSlashIdx + 2;
}
else if (c == 'n') {
sb.append('\n');
startIdx = backSlashIdx + 2;
}
else if (c == '"') {
sb.append('"');
startIdx = backSlashIdx + 2;
}
else if (c == '\\') {
sb.append('\\');
startIdx = backSlashIdx + 2;
}
else if (c == 'u') {
// \\uxxxx
if (backSlashIdx + 5 >= sLength) {
throw new IllegalArgumentException(
"Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 6);
try {
c = (char)Integer.parseInt(xx, 16);
sb.append( c );
startIdx = backSlashIdx + 6;
}
catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Illegal Unicode escape sequence '\\u" + xx + "' in: " + s);
}
}
else if (c == 'U') {
// \\Uxxxxxxxx
if (backSlashIdx + 9 >= sLength) {
throw new IllegalArgumentException(
"Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 10);
try {
c = (char)Integer.parseInt(xx, 16);
sb.append( c );
startIdx = backSlashIdx + 10;
}
catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Illegal Unicode escape sequence '\\U" + xx + "' in: " + s);
}
}
else {
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
backSlashIdx = s.indexOf('\\', startIdx);
}
sb.append( s.substring(startIdx) );
return sb.toString();
}
/**
* Converts a decimal value to a hexadecimal string represention
* of the specified length.
*
* @param decimal A decimal value.
* @param stringLength The length of the resulting string.
*/
public static String toHexString(int decimal, int stringLength) {
StringBuilder sb = new StringBuilder(stringLength);
String hexVal = Integer.toHexString(decimal).toUpperCase();
// insert zeros if hexVal has less than stringLength characters:
int nofZeros = stringLength - hexVal.length();
for (int i = 0; i < nofZeros; i++) {
sb.append('0');
}
sb.append(hexVal);
return sb.toString();
}
}