/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.rdfxml.xmlinput.impl; import java.util.Iterator ; import java.util.regex.Pattern ; import org.apache.jena.iri.IRI ; import org.apache.jena.iri.IRIComponents ; import org.apache.jena.iri.Violation ; import org.apache.jena.iri.ViolationCodes ; import org.apache.jena.rdfxml.xmlinput.ARPErrorNumbers ; import org.xml.sax.SAXParseException ; public abstract class AbsXMLContext implements ARPErrorNumbers { // protected static String truncateXMLBase(String rslt) { // if (rslt == null) // return null; // int hash = rslt.indexOf('#'); // if (hash != -1) { // return rslt.substring(0, hash); // } // return rslt; // } protected final String lang; protected final Taint langTaint; final Taint baseTaint; protected final IRI uri; protected final AbsXMLContext document; protected AbsXMLContext(boolean useDoc, AbsXMLContext document, IRI uri, Taint baseT, String lang, Taint langT) { // this.base=base; this.lang = lang; langTaint = langT; baseTaint = baseT; this.uri = uri; this.document = useDoc ? (document == null ? this : document) : null; } protected static Taint initTaint(XMLHandler h, IRI base) throws SAXParseException { Taint rslt = new TaintImpl(); checkURI(h, rslt, base); return rslt; } // protected AbsXMLContext withBase(XMLHandler forErrors, String b) // throws SAXParseException { // TaintImpl taintB = new TaintImpl(); // IRI newB = resolveAsURI(forErrors, taintB, b, false); // // TO DO update MALFORMED_CONTEXT // if (newB.isVeryBad()) // return new XMLBaselessContext(forErrors, // ERR_RESOLVING_AGAINST_MALFORMED_BASE, b); // return new XMLContext(keepDocument(forErrors), document, newB // .create(""), taintB, lang, langTaint); // } public AbsXMLContext withBase(XMLHandler forErrors, String b) throws SAXParseException { TaintImpl taintB = new TaintImpl(); IRI newB = resolveAsURI(forErrors, taintB, b, false); if (newB.isRelative()) return new XMLBaselessContext(forErrors,ERR_RESOLVING_AGAINST_RELATIVE_BASE, newB.create("")); if (newB.hasViolation(false)) return new XMLBaselessContext(forErrors, ERR_RESOLVING_AGAINST_MALFORMED_BASE, newB); return new XMLContext(keepDocument(forErrors), document, newB .create(""), taintB, lang, langTaint); } abstract boolean keepDocument(XMLHandler forErrors); protected AbsXMLContext withLang(XMLHandler forErrors, String l) throws SAXParseException { Taint taint = new TaintImpl(); checkXMLLang(forErrors, taint, l); return clone(uri, baseTaint, l, taint); } abstract AbsXMLContext clone(IRI base, Taint baseT, String l, Taint langT); public String getLang(Taint taint) { if (langTaint.isTainted()) taint.taint(); return lang; } // protected RDFURIReference getURI(XMLHandler forErrors, Taint taintMe, // String relUri) throws SAXParseException { // baseUsed(forErrors, taintMe, relUri, null); // if (baseTaint.isTainted()) // taintMe.taint(); // return uri; // } final IRI resolveAsURI(XMLHandler forErrors, Taint taintMe, String relUri) throws SAXParseException { return resolveAsURI(forErrors, taintMe, relUri, true); } final IRI resolveAsURI(XMLHandler forErrors, Taint taintMe, String relUri, boolean checkBaseUse) throws SAXParseException { IRI rslt = uri.create(relUri); if (checkBaseUse) checkBaseUse(forErrors, taintMe, relUri, rslt); checkURI(forErrors, taintMe, rslt); return rslt; } abstract void checkBaseUse(XMLHandler forErrors, Taint taintMe, String relUri, IRI rslt) throws SAXParseException; // abstract void baseUsed(XMLHandler forErrors, Taint taintMe, String // relUri, // String string) throws SAXParseException; protected static void checkURI(XMLHandler forErrors, Taint taintMe, IRI rslt) throws SAXParseException { if (rslt.hasViolation(false)) { Iterator<Violation> it = rslt.violations(false); while (it.hasNext()) { Violation irie = it.next(); // if (irie.getViolationCode() == // ViolationCodes.REQUIRED_COMPONENT_MISSING) String msg = irie.getShortMessage(); if (irie.getViolationCode() == ViolationCodes.REQUIRED_COMPONENT_MISSING && irie.getComponent() == IRIComponents.SCHEME) { if (!forErrors.allowRelativeURIs()) forErrors.warning(taintMe, WARN_RELATIVE_URI, "Relative URIs are not permitted in RDF: specifically <"+rslt.toString()+">"); } else forErrors.warning(taintMe, WARN_MALFORMED_URI, "Bad URI: " + msg); } } } public String resolve(XMLHandler forErrors, Taint taintMe, String u) throws SAXParseException { return resolveAsURI(forErrors, taintMe, u, true).toString(); } private static Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ; /* This is just a light syntactic check of the language tag. * See JENA-827. * Jena, when parsing RDF/XML, used to check the syntax and against (an encoded copy of) the IANA registry. * Elsewhere, Turtle et al and SPARQL, Jena has always only performed this syntax check. */ private void checkXMLLang(XMLHandler arp, Taint taintMe, String newLang) throws SAXParseException { if (newLang.equals("")) return; if (newLang.equalsIgnoreCase("und") ) arp.warning(taintMe, WARN_BAD_XMLLANG, "Bad language tag: "+newLang+" (not allowed)") ; if ( ! langPattern.matcher(newLang).matches() ) arp.warning(taintMe, WARN_BAD_XMLLANG, "Bad language tag: "+newLang) ; } }