/* (c) Copyright 2007 Hewlett-Packard Development Company, LP [See end of file] $Id: GRDDLReaderBase.java 1393 2007-05-25 12:21:58Z jeremy_carroll $ */ package com.hp.hpl.jena.grddl.impl; import java.net.URLConnection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.Map.Entry; import javax.xml.transform.ErrorListener; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import net.sf.saxon.TransformerFactoryImpl; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.XMLReader; import com.hp.hpl.jena.grddl.GRDDLSecurityException; import com.hp.hpl.jena.rdf.arp.impl.ARPSaxErrorHandler; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFErrorHandler; import com.hp.hpl.jena.rdf.model.RDFReader; import com.hp.hpl.jena.rdf.model.impl.RDFDefaultErrorHandler; import com.hp.hpl.jena.shared.BrokenException; import com.hp.hpl.jena.shared.JenaException; /** * GRDDLReaderBase * * @author Jeremy J. Carroll */ public class GRDDLReaderBase { // static public class ProhibitUnparsedText { // public ProhibitUnparsedText() { // throw new GRDDLSecurityException("unparsed-text() not permitted in this // implementation"); // } // } // static { // Entry unparsedText = StandardFunction.getFunction("unparsed-text", 1); // unparsedText.implementationClass = ProhibitUnparsedText.class; // unparsedText = StandardFunction.getFunction("unparsed-text-available", // 1); // unparsedText.implementationClass = ProhibitUnparsedText.class; // // } XMLReader tidyParser = new org.cyberneko.html.parsers.SAXParser(); XMLReader saxParser = new org.apache.xerces.parsers.SAXParser(); RDFReader rdfxml, n3; boolean disabled; private boolean rdfa; Set<String> xmlXforms = null; Set<String> htmlXforms = null; Map<String, String> headers = new HashMap<String, String>(); private RDFErrorHandler eHandler = new RDFDefaultErrorHandler() { public void error(Exception e) { super.error(w(e)); } public void fatalError(Exception e) { super.fatalError(w(e)); } public void warning(Exception e) { super.warning(w(e)); } }; final TransformerFactory xformFactory = new TransformerFactoryImpl(); { // headers.put("negotiate", "*"); // System.err.println(xformFactory.getClass()); xformFactory.setErrorListener(new ErrorListener() { public void error(TransformerException e) throws TransformerException { if (e.getCause() instanceof SeenEnoughExpectedException) { throw e; } checkException(e); eHandler.error(e); } public void fatalError(TransformerException e) throws TransformerException { checkException(e); eHandler.error(e); throw e; } public void warning(TransformerException e) throws TransformerException { eHandler.warning(e); } }); xformFactory.setAttribute( net.sf.saxon.FeatureKeys.ALLOW_EXTERNAL_FUNCTIONS, Boolean.FALSE); try { xformFactory.setFeature( "http://javax.xml.XMLConstants/feature/secure-processing", true); } catch (TransformerConfigurationException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } xformFactory.setAttribute( "http://saxon.sf.net/feature/version-warning", Boolean.FALSE); // xformFactory.setURIResolver(new SafeURIResolver(xformFactory // .getURIResolver())); Model m = ModelFactory.createDefaultModel(); rdfxml = m.getReader("RDF/XML"); n3 = m.getReader("N3"); rdfxml.setErrorHandler(eHandler); n3.setErrorHandler(eHandler); setProperty("http://cyberneko.org/html/features/insert-namespaces", "true"); setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); ARPSaxErrorHandler saxErrorHandler = new ARPSaxErrorHandler(eHandler); tidyParser.setErrorHandler(saxErrorHandler); saxParser.setErrorHandler(saxErrorHandler); try { saxParser.setFeature( "http://xml.org/sax/features/use-entity-resolver2", false); saxParser.setFeature( "http://xml.org/sax/features/external-general-entities" , false); saxParser.setFeature( "http://xml.org/sax/features/external-parameter-entities" , false); // saxParser.setFeature( // "http://apache.org/xml/features/nonvalidating/load-external-dtd" // // , false); } catch (SAXNotRecognizedException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (SAXNotSupportedException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) { RDFErrorHandler old = eHandler; eHandler = errHandler; rdfxml.setErrorHandler(eHandler); n3.setErrorHandler(eHandler); ((ARPSaxErrorHandler) tidyParser.getErrorHandler()) .setErrorHandler(eHandler); return old; } protected Exception w(Exception e) { if (e instanceof TransformerException) { System.err .println(((TransformerException) e).getLocationAsString()); } return e; } protected RDFErrorHandler eHandler() { return eHandler; } /** * There are currently no properties specific to the GRDDL reader. * Properties starting <code>"http://cyberneko.org/"</code> modify the * behaviour of the HTML parser, as documented; and other properties modify * the behaviour of the RDF/XML parser. * * @param propName * A property name. * @param propValue * The new value of the property. * @return Thge old value of the property. */ public Object setProperty(String propName, Object propValue) { Object old; String propNameLC = propName.toLowerCase(); // "http://apache.org/xml/features/xinclude" // "http://apache.org/xml/features/nonvalidating/load-external-dtd" if (propNameLC.startsWith("grddl.")) { String p = propName.substring(6).toLowerCase(); if (p.equals("xml-xform")) { old = xmlXforms; xmlXforms = addXform(propValue, xmlXforms ); } else if (p.equals("html-xform")) { old = htmlXforms; htmlXforms = addXform(propValue, htmlXforms ); } else if (p.equals("disable")) { old = new Boolean(disabled); disabled = toBoolean(propValue, disabled); } else if (p.equals("rdfa")) { old = new Boolean(rdfa); rdfa = toBoolean(propValue, rdfa); if (rdfa) { disabled = true; htmlXforms = addXform("http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML_v_0_8.xsl", htmlXforms ); } } else { error(propName); old = null; } return old; } if (propNameLC.startsWith("header.")) { String hdr = propName.substring(7).toLowerCase(); old = headers.get(hdr); headers.put(hdr, (String) propValue); return old; } if (propName.startsWith("http://cyberneko.org/")) { try { return setSAXFeatureOrProperty(propName, propValue, tidyParser); } catch (SAXNotRecognizedException e) { eHandler.error(e); return null; } catch (SAXNotSupportedException e) { eHandler.error(e); return null; } } if (propName.startsWith("http://apache.org/")) { try { old = setSAXFeatureOrProperty(propName, propValue, saxParser); } catch (SAXNotRecognizedException e) { eHandler.error(e); return null; } catch (SAXNotSupportedException e) { eHandler.error(e); return null; } try { setSAXFeatureOrProperty(propName, propValue, tidyParser); } catch (SAXNotRecognizedException e) { // ignore errors. } catch (SAXNotSupportedException e) { } rdfxml.setProperty(propName, propValue); return old; } if (propName.startsWith("http://saxon.sf.net/")) { int b = toBoolean(propValue); switch (b) { case 0: case 1: try { boolean oldb = xformFactory.getFeature(propName); xformFactory.setFeature(propName, b == 1); return new Boolean(oldb); } catch (Exception e) { // fall through } case -1: try { old = xformFactory.getAttribute(propName); xformFactory.setAttribute(propName, propValue); return old; } catch (Exception e) { eHandler.error(e); return null; } } } return rdfxml.setProperty(propName, propValue); } private Set<String> addXform(Object propValue, Set<String> xf) { if (propValue != null) { if (xf == null) { xf = new HashSet<String>(); } xf.add(propValue.toString()); return xf; } else { return null; } } private void error(String msg) { eHandler.error(new JenaException("unrecognised option: " + msg)); } private boolean toBoolean(Object propValue, boolean def) { switch (toBoolean(propValue)) { case 1: return true; case 0: return false; case -1: default: error("illegal value for boolean option: " + propValue); return def; } } /** * * @param propValue * @return 0 if propValue is false or variant, 1 if propValue is true, -1 if * non-Boolean */ private int toBoolean(Object propValue) { if (propValue instanceof Boolean) { return ((Boolean) propValue).booleanValue() ? 1 : 0; } else if (propValue instanceof String) { if ("true".equalsIgnoreCase((String) propValue)) return 1; if ("false".equalsIgnoreCase((String) propValue)) return 0; } return -1; } private Object setSAXFeatureOrProperty(String propName, Object propValue, XMLReader parser) throws SAXNotRecognizedException, SAXNotSupportedException { int b = toBoolean(propValue); switch (b) { case 0: case 1: boolean oldb = parser.getFeature(propName); parser.setFeature(propName, b == 1); return new Boolean(oldb); case -1: Object old = parser.getProperty(propName); parser.setProperty(propName, propValue); return old; } throw new BrokenException("impossible"); } GRDDLSecurityException lastSecurityException; void checkException(Exception e) { if (e.getCause() instanceof GRDDLSecurityException) { lastSecurityException = (GRDDLSecurityException) e.getCause(); throw lastSecurityException; } String msg = e.getMessage(); if (msg.contains("result-document") || msg.contains("disabled") || msg.contains("extension")) { lastSecurityException = new GRDDLSecurityException(e); throw lastSecurityException; } } void setHeaders(URLConnection conn) { Iterator<Entry<String, String>> i = headers.entrySet().iterator(); while (i.hasNext()) { Entry<String, String> e = i.next(); conn.setRequestProperty(e.getKey(), e.getValue()); // System.err.println(e.getKey() + ": "+ e.getValue()); } } } /* * (c) Copyright 2007 Hewlett-Packard Development Company, LP All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */