/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.rdf.model.impl; import org.apache.jena.graph.GraphEvents ; import org.apache.jena.rdf.model.* ; import org.apache.jena.shared.* ; import org.apache.jena.util.FileUtils ; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URL; import java.io.*; import java.util.*; /** N-Triple Reader */ public class NTripleReader extends Object implements RDFReader { static final Logger log = LoggerFactory.getLogger(NTripleReader.class); private Model model = null; private Hashtable<String, Resource> anons = new Hashtable<>(); private IStream in = null; private boolean inErr = false; private int errCount = 0; private static final int sbLength = 200; private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler(); /** * Already with ": " at end for error messages. */ private String base; NTripleReader() { } @Override public void read(Model model, InputStream in, String base) { // N-Triples must be in ASCII, we permit UTF-8. read(model, FileUtils.asUTF8(in), base); } @Override public void read(Model model, Reader reader, String base) { if (!(reader instanceof BufferedReader)) { reader = new BufferedReader(reader); } this.model = model; this.base = base == null ? "" : (base + ": "); in = new IStream(reader); readRDF(); if (errCount != 0) { throw new SyntaxError( "unknown" ); } } @Override public void read(Model model, String url) { try { read( model, new InputStreamReader(((new URL(url))).openStream()), url); } catch (Exception e) { throw new JenaException(e); } finally { if (errCount != 0) { throw new SyntaxError( "unknown" ); } } } @Override public Object setProperty(String propName, Object propValue) { errorHandler.error(new UnknownPropertyException( propName )); return null; } @Override public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) { RDFErrorHandler old = this.errorHandler; this.errorHandler = errHandler; return old; } protected void readRDF() { try { model.notifyEvent( GraphEvents.startRead ); unwrappedReadRDF(); } finally { model.notifyEvent( GraphEvents.finishRead ); } } protected final void unwrappedReadRDF() { Resource subject; Property predicate = null; RDFNode object; while (!in.eof()) { while (!in.eof()) { inErr = false; skipWhiteSpace(); if (in.eof()) { return; } subject = readResource(); if (inErr) break; skipWhiteSpace(); try { Resource r = readResource() ; if (inErr) break; predicate = model.createProperty(r.getURI()); } catch (Exception e1) { errorHandler.fatalError(e1); } if (inErr) break; skipWhiteSpace(); object = readNode(); if (inErr) break; skipWhiteSpace(); if (badEOF()) break; if (!expect(".")) break; try { model.add(subject, predicate, object); } catch (Exception e2) { errorHandler.fatalError(e2); } } if (inErr) { errCount++; while (!in.eof() && in.readChar() != '\n') { } } } } public Resource readResource() { char inChar = in.readChar(); if (badEOF()) { System.err.println("**** Bad EOF") ; return null; } if (inChar == '_') { // anon resource if (!expect(":")) return null; String name = readName(); if (name == null) { syntaxError("expected bNode label"); return null; } return lookupResource(name); } else if (inChar == '<') { // uri String uri = readURI(); if (uri == null) { inErr = true; return null; } inChar = in.readChar(); if (inChar != '>') { syntaxError("expected '>'"); return null; } return model.createResource(uri); } else { syntaxError("unexpected input"); return null; } } public RDFNode readNode() { skipWhiteSpace(); switch (in.nextChar()) { case '"' : return readLiteral(); case '<' : case '_' : return readResource(); default : syntaxError("unexpected input"); return null; } } protected Literal readLiteral() { StringBuffer lit = new StringBuffer(sbLength); if (!expect("\"")) return null; while (true) { char inChar = in.readChar(); if (badEOF()) return null; if (inChar == '\\') { char c = in.readChar(); if (in.eof()) { inErr = true; return null; } if (c == 'n') { inChar = '\n'; } else if (c == 'r') { inChar = '\r'; } else if (c == 't') { inChar = '\t'; } else if (c == '\\' || c == '"') { inChar = c; } else if (c == 'u') { inChar = readUnicode4Escape(); if (inErr) return null; } else { syntaxError("illegal escape sequence '" + c + "'"); return null; } } else if (inChar == '"') { String lang; if ('@' == in.nextChar()) { expect("@"); lang = readLang(); } else if ('-' == in.nextChar()) { expect("-"); deprecated("Language tags should be introduced with @ not -."); lang = readLang(); } else { lang = ""; } if ('^' == in.nextChar()) { String datatypeURI = null; if (!expect("^^<")) { syntaxError("ill-formed datatype"); return null; } datatypeURI = readURI(); if (datatypeURI == null || !expect(">")) return null; if ( lang.length() > 0 ) deprecated("Language tags are not permitted on typed literals."); return model.createTypedLiteral( lit.toString(), datatypeURI); } else { return model.createLiteral(lit.toString(), lang); } } // Test for some raw characters else if ( inChar == '\n' || inChar == '\r' ) { deprecated("Raw NL or CR not permitted in N-Triples data") ; return null ; } lit = lit.append(inChar); } } private char readUnicode4Escape() { char buf[] = new char[] { in.readChar(), in.readChar(), in.readChar(), in.readChar()}; if (badEOF()) { return 0; } try { return (char) Integer.parseInt(new String(buf), 16); } catch (NumberFormatException e) { syntaxError("bad unicode escape sequence"); return 0; } } private void deprecated(String s) { errorHandler.warning( new SyntaxError( syntaxErrorMessage( "Deprecation warning", s, in.getLinepos(), in.getCharpos()))); } private void syntaxError(String s) { errorHandler.error( new SyntaxError( syntaxErrorMessage( "Syntax error", s, in.getLinepos(), in.getCharpos()))); inErr = true; } private String readLang() { StringBuffer lang = new StringBuffer(15); while (true) { char inChar = in.nextChar(); if (Character.isWhitespace(inChar) || inChar == '.' || inChar == '^') return lang.toString(); lang = lang.append(in.readChar()); } } private boolean badEOF() { if (in.eof()) { syntaxError("premature end of file"); } return inErr; } protected String readURI() { StringBuffer uri = new StringBuffer(sbLength); while (in.nextChar() != '>') { char inChar = in.readChar(); if (inChar == '\\') { expect("u"); inChar = readUnicode4Escape(); } if (badEOF()) { return null; } uri = uri.append(inChar); } return uri.toString(); } protected String readName() { StringBuffer name = new StringBuffer(sbLength); char nextChar; while (Character.isLetterOrDigit(nextChar=in.nextChar()) || '-'==nextChar ) { name = name.append(in.readChar()); if (badEOF()) return null; } return name.toString(); } private boolean expect(String str) { for (int i = 0; i < str.length(); i++) { char want = str.charAt(i); if (badEOF()) return false; char inChar = in.readChar(); if (inChar != want) { //System.err.println("N-triple reader error"); syntaxError("expected \"" + str + "\""); return false; } } return true; } protected void skipWhiteSpace() { while (Character.isWhitespace(in.nextChar()) || in.nextChar() == '#') { char inChar = in.readChar(); if (in.eof()) { return; } if (inChar == '#') { while (inChar != '\n') { inChar = in.readChar(); if (in.eof()) { return; } } } } } protected Resource lookupResource(String name) { Resource r; r = anons.get(name); if (r == null) { r = model.createResource(); anons.put(name, r); } return r; } protected String syntaxErrorMessage( String sort, String msg, int linepos, int charpos) { return base + sort + " at line " + linepos + " position " + charpos + ": " + msg; } } class IStream { // simple input stream handler Reader in; char[] thisChar = new char[1]; boolean eof; int charpos = 1; int linepos = 1; protected IStream(Reader in) { try { this.in = in; eof = (in.read(thisChar, 0, 1) == -1); } catch (IOException e) { throw new JenaException(e); } } protected char readChar() { try { if (eof) return '\000'; char rv = thisChar[0]; eof = (in.read(thisChar, 0, 1) == -1); if (rv == '\n') { linepos++; charpos = 0; } else { charpos++; } return rv; } catch (java.io.IOException e) { throw new JenaException(e); } } protected char nextChar() { return eof ? '\000' : thisChar[0]; } protected boolean eof() { return eof; } protected int getLinepos() { return linepos; } protected int getCharpos() { return charpos; } }