/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.rdfxml.xmlinput; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.util.Locale ; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.apache.jena.datatypes.RDFDatatype ; import org.apache.jena.datatypes.TypeMapper ; import org.apache.jena.graph.* ; import org.apache.jena.iri.IRIFactory; import org.apache.jena.rdf.model.Model ; import org.apache.jena.rdf.model.RDFErrorHandler ; import org.apache.jena.rdf.model.RDFReader ; import org.apache.jena.rdf.model.impl.RDFDefaultErrorHandler ; import org.apache.jena.rdfxml.xmlinput.impl.RDFXMLParser ; import org.apache.jena.shared.DoesNotExistException ; import org.apache.jena.shared.JenaException ; import org.apache.jena.shared.UnknownPropertyException ; import org.apache.jena.shared.WrappedIOException ; /** * Interface between Jena and ARP. */ public class JenaReader implements RDFReader, ARPErrorNumbers { static private final String saxFeaturesURL = "http://xml.org/sax/features/"; static private final String saxPropertiesURL = "http://xml.org/sax/properties/"; static private final String apacheFeaturesURL = "http://apache.org/xml/features/"; static private final String apachePropertiesURL = "http://apache.org/xml/properties/"; static final String arpPropertiesURL = "http://jena.hpl.hp.com/arp/properties/"; static final int arpPropertiesURLLength = arpPropertiesURL.length(); /** * Creates new JenaReader */ public JenaReader() { arpf = RDFXMLParser.create(); } final private RDFXMLParser arpf; private Model model; /** * Reads from url, using url as base, adding triples to model. * Uses content negotiation to ask for application/rdf+xml, if available. * * @param m * A model to add triples to. * @param url * The URL of the RDF/XML document. */ @Override public void read(Model m, String url) throws JenaException { try { URLConnection conn = new URL(url).openConnection(); conn.setRequestProperty("accept", "application/rdf+xml, application/xml; q=0.8, text/xml; q=0.7, application/rss+xml; q=0.3, */*; q=0.2"); String encoding = conn.getContentEncoding(); if (encoding == null) read(m, conn.getInputStream(), url); else read(m, new InputStreamReader(conn.getInputStream(), encoding), url); } catch (FileNotFoundException e) { throw new DoesNotExistException(url); } catch (IOException e) { throw new JenaException(e); } } private static Node convert(ALiteral lit) { String dtURI = lit.getDatatypeURI(); if (dtURI == null) return NodeFactory.createLiteral(lit.toString(), lit.getLang()); if (lit.isWellFormedXML()) { return NodeFactory.createLiteral(lit.toString(), null, true); } RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(dtURI); return NodeFactory.createLiteral(lit.toString(), dt); } private static Node convert(AResource r) { if (!r.isAnonymous()) return NodeFactory.createURI(r.getURI()); // String id = r.getAnonymousID(); Node rr = (Node) r.getUserData(); if (rr == null) { rr = NodeFactory.createBlankNode(); r.setUserData(rr); } return rr; } static Triple convert(AResource s, AResource p, AResource o) { return Triple.create(convert(s), convert(p), convert(o)); } static Triple convert(AResource s, AResource p, ALiteral o) { return Triple.create(convert(s), convert(p), convert(o)); } /** * Reads from reader, using base URI xmlbase, adding triples to model. If * xmlbase is "" then relative URIs may be added to model. * * @param m * A model to add triples to. * @param reader * The RDF/XML document. * @param xmlBase * The base URI of the document or "". */ private void read(Model m, InputSource inputS, String xmlBase) throws JenaException { model = m; read(model.getGraph(), inputS, xmlBase, model); } private JenaHandler handler; synchronized private void read(final Graph g, InputSource inputS, String xmlBase, Model m) { try { g.getEventManager().notifyEvent(g, GraphEvents.startRead); inputS.setSystemId(xmlBase); handler = new JenaHandler(g, m, errorHandler); handler.useWith(arpf.getHandlers()); arpf.parse(inputS, xmlBase); } catch (IOException e) { throw new WrappedIOException(e); } catch (SAXException e) { throw new JenaException(e); } finally { g.getEventManager().notifyEvent(g, GraphEvents.finishRead); handler = null; } } /** * Reads from reader, using base URI xmlbase, adding triples to model. If * xmlbase is "" then relative URIs may be added to model. * * @param m * A model to add triples to. * @param reader * The RDF/XML document. * @param xmlBase * The base URI of the document or "". */ @Override public void read(final Model m, Reader reader, String xmlBase) throws JenaException { read(m, new InputSource(reader), xmlBase); } /** * Reads from reader, using base URI xmlbase, adding triples to graph. If * xmlbase is "" then relative URIs may be added to graph. * * @param g * A graph to add triples to. * @param reader * The RDF/XML document. * @param xmlBase * The base URI of the document or "". */ public void read(Graph g, Reader reader, String xmlBase) throws JenaException { read(g, new InputSource(reader), xmlBase, null); } /** * Reads from inputStream, using base URI xmlbase, adding triples to model. * If xmlbase is "" then relative URIs may be added to model. * * @param m * A model to add triples to. * @param in * The RDF/XML document stream. * @param xmlBase * The base URI of the document or "". */ @Override public void read(final Model m, InputStream in, String xmlBase) throws JenaException { read(m, new InputSource(in), xmlBase); } /** * Reads from inputStream, using base URI xmlbase, adding triples to graph. * If xmlbase is "" then relative URIs may be added to graph. * * @param g * A graph to add triples to. * @param in * The RDF/XML document stream. * @param xmlBase * The base URI of the document or "". */ public void read(Graph g, InputStream in, String xmlBase) { read(g, new InputSource(in), xmlBase, null); } private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler(); /** * Change the error handler. * <p> * Note that errors of class {@link ParseException}can be promoted using * the {@link ParseException#promote}method. See ARP documentation for * {@link org.xml.sax.ErrorHandler}for the details of error promotion. * * @param errHandler * The new error handler. * @return The old error handler. */ @Override public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) { RDFErrorHandler old = this.errorHandler; this.errorHandler = errHandler; JenaHandler h = handler; if (h != null) { h.setErrorHandler(errHandler); } return old; } /** * * Change a property of the RDF or XML parser. * <p> * I do not believe that many of the XML features or properties are in fact * useful for ARP users. The ARP properties allow fine-grained control over * error reporting. * <p> * This interface can be used to set and get: * <dl> * <dt>SAX2 features</dt> * <dd>See <a href="http://xml.apache.org/xerces-j/features.html">Xerces * features </a>. Value should be given as a String "true" or "false" or a * Boolean.</dd> * <dt>SAX2 properties</dt> * <dd>See <a href="http://xml.apache.org/xerces-j/properties.html">Xerces * properties </a>.</dd> * <dt>Xerces features</dt> * <dd>See <a href="http://xml.apache.org/xerces-j/features.html">Xerces * features </a>. Value should be given as a String "true" or "false" or a * Boolean.</dd> * <dt>Xerces properties</dt> * <dd>See <a href="http://xml.apache.org/xerces-j/properties.html">Xerces * properties </a>.</dd> * <dt>ARP properties</dt> * <dd>These are referred to either by their property name, (see below) or * by an absolute URL of the form * <code>http://jena.hpl.hp.com/arp/properties/<PropertyName></code>. * The value should be a String, an Integer or a Boolean depending on the * property. <br> * ARP property names and string values are case insensitive. <br> * <TABLE BORDER="1" CELLPADDING="3" CELLSPACING="0"> * <TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"> * <TD COLSPAN=4><FONT SIZE="+2"> <B>ARP Properties </B> </FONT></TD> * </TR> * <tr BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor"> * <th>Property Name</th> * <th>Description</th> * <th>Value class</th> * <th>Legal Values</th> * </tr> * <tr BGCOLOR="white" CLASS="TableRowColor"> * <td><CODE>error-mode</CODE></td> * <td>{@link ARPOptions#setDefaultErrorMode}<br> * {@link ARPOptions#setLaxErrorMode}<br> * {@link ARPOptions#setStrictErrorMode()}<br> * {@link ARPOptions#setStrictErrorMode(int)}<br> * </td> * <td>String</td> * <td><CODE>default</CODE><br> * <CODE>lax</CODE><br> * <CODE>strict</CODE><br> * <CODE>strict-ignore</CODE><br> * <CODE>strict-warning</CODE><br> * <CODE>strict-error</CODE><br> * <CODE>strict.error</CODE><br> * </td> * </tr> * <tr BGCOLOR="white" CLASS="TableRowColor"> * <td><CODE>embedding</CODE></td> * <td>{@link ARPOptions#setEmbedding}</td> * <td>String or Boolean</td> * <td><CODE>true</CODE> or <CODE>false</CODE></td> * </tr> * <tr BGCOLOR="white" CLASS="TableRowColor"> * <td><code>ERR_<XXX></code><br> * <code>WARN_<XXX></code><br> * <code>IGN_<XXX></code></td> * <td>{@link ARPErrorNumbers}<br> * Any of the error condition numbers listed. <br> * {@link ARPOptions#setErrorMode(int, int)}</td> * <td>String or Integer</td> * <td>{@link ARPErrorNumbers#EM_IGNORE EM_IGNORE}<br> * {@link ARPErrorNumbers#EM_WARNING EM_WARNING}<br> * {@link ARPErrorNumbers#EM_ERROR EM_ERROR}<br> * {@link ARPErrorNumbers#EM_FATAL EM_FATAL}<br> * </td> * </tr> * </table></dd> * </dl> * * @param str * The property to set. * @param value * The new value; values of class String will be converted into * appropriate classes. Values of class Boolean or Integer will * be used for appropriate properties. * @throws JenaException * For bad values. * @return The old value, or null if none, or old value is inaccesible. */ @Override public Object setProperty(String str, Object value) throws JenaException { Object obj = value; if (str.startsWith("http:")) { if (str.startsWith(arpPropertiesURL)) { return setArpProperty(str.substring(arpPropertiesURLLength), obj); } if (str.startsWith(saxPropertiesURL) || str.startsWith(apachePropertiesURL)) { Object old; try { old = arpf.getSAXParser().getProperty(str); } catch (SAXNotSupportedException ns) { old = null; } catch (SAXNotRecognizedException nr) { errorHandler.error(new UnknownPropertyException(str)); return null; } try { arpf.getSAXParser().setProperty(str, obj); } catch (SAXNotSupportedException ns) { errorHandler.error(new JenaException(ns)); } catch (SAXNotRecognizedException nr) { errorHandler.error(new UnknownPropertyException(str)); return null; } return old; } if (str.startsWith(saxFeaturesURL) || str.startsWith(apacheFeaturesURL)) { Boolean old; try { old = arpf.getSAXParser().getFeature( str ); } catch (SAXNotSupportedException ns) { old = null; } catch (SAXNotRecognizedException nr) { errorHandler.error(new UnknownPropertyException(str)); return null; } try { arpf.getSAXParser().setFeature(str, ((Boolean) obj).booleanValue()); } catch (SAXNotSupportedException ns) { errorHandler.error(new JenaException(ns)); } catch (SAXNotRecognizedException nr) { errorHandler.error(new UnknownPropertyException(str)); return null; } catch (ClassCastException cc) { errorHandler.error(new JenaException( new SAXNotSupportedException("Feature: '" + str + "' can only have a boolean value."))); } return old; } } return setArpProperty(str, obj); } private Object setArpProperty(String str, Object v) { return processArpOptions(getOptions(), str, v, errorHandler); } public ARPOptions getOptions() { return arpf.getOptions(); } public void setOptionsWith(ARPOptions opts) { arpf.setOptionsWith(opts); } /** * Supported properties: * error-mode (String) default, lax, strict, * strict-ignore, strict-warning, strict-error, strict.error <br/> * embedding (String/Boolean) true, false<br/> * ERR_* (String/Integer) em_warning, em.error, em_ignore, em_error<br/> * IGN_* ditto<br/> * WARN_* ditto<br/> * iri-rules (String), "Jena", "IRI", "strict", "lax" */ @SuppressWarnings("deprecation") static Object processArpOptions(ARPOptions options, String str, Object v, RDFErrorHandler eh) { // ARPOptions options = arpf.getOptions(); str = str.toUpperCase(); if (v == null) v = ""; if (v instanceof String) { v = ((String) v).toUpperCase(Locale.ENGLISH); } if (str.equals("ERROR-MODE")) { if (v instanceof String) { String val = (String) v; if (val.equals("LAX")) { options.setLaxErrorMode(); return null; } if (val.equals("DEFAULT")) { options.setDefaultErrorMode(); return null; } if (val.equals("STRICT")) { options.setStrictErrorMode(); return null; } if (val.equals("STRICT-WARNING")) { options.setStrictErrorMode(EM_WARNING); return null; } if (val.equals("STRICT-FATAL")) { options.setStrictErrorMode(EM_FATAL); return null; } if (val.equals("STRICT-IGNORE")) { options.setStrictErrorMode(EM_IGNORE); return null; } if (val.equals("STRICT-ERROR")) { options.setStrictErrorMode(EM_ERROR); return null; } } eh.error(new IllegalArgumentException( "Property \"ERROR-MODE\" takes the following values: " + "\"default\", \"lax\", \"strict\", \"strict-ignore\", \"strict-warning\", \"strict-error\", \"strict.error\".")) ; return null; } if (str.equals("EMBEDDING")) { if (v instanceof String) { v = Boolean.valueOf((String) v); } if ((v instanceof Boolean)) return options.setEmbedding( ( (Boolean) v ).booleanValue() ); // Illegal value. eh.error(new IllegalArgumentException( "Property \"EMBEDDING\" requires a boolean value.")); boolean old = options.setEmbedding(false); options.setEmbedding(old); return old; } if (str.startsWith("ERR_") || str.startsWith("IGN_") || str.startsWith("WARN_")) { int cond = ParseException.errorCode(str); if (cond == -1) { // error, see end of function. } else { if (v instanceof String) { if (!((String) v).startsWith("EM_")) { // error, see below. } else { int val = ParseException.errorCode((String) v); if (val == -1) { // error, see below. } else { int rslt = options.setErrorMode(cond, val); return rslt; } } } else if (v instanceof Integer) { int val = ((Integer) v).intValue(); switch (val) { case EM_IGNORE: case EM_WARNING: case EM_ERROR: case EM_FATAL: int rslt = options.setErrorMode(cond, val); return rslt; default: // error, see below. } } // Illegal value. eh.error(new IllegalArgumentException("Property \"" + str + "\" cannot have value: " + v.toString())); int old = options.setErrorMode(cond, EM_ERROR); options.setErrorMode(cond, old); return old; } } if ( str.equals("IRI-RULES") ) { IRIFactory old = options.getIRIFactory() ; if ( v.equals("STRICT") ) { options.setIRIFactory(IRIFactory.semanticWebImplementation()) ; } else if ( v.equals("IRI") ) { options.setIRIFactory(IRIFactory.iriImplementation()) ; } else if ( v.equals("LAX") ) { options.setIRIFactory(IRIFactory.jenaImplementation()) ; } else eh.error(new IllegalArgumentException( "Property \"IRI-RULES\" requires one of 'STRICT', 'IRI' or 'LAX'")); return old ; } eh.error(new UnknownPropertyException(str)); return null; } }