/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * S.java * * Created on July 15, 2001, 7:13 AM */ package org.apache.jena.rdfxml.xmlinput.impl; import java.util.HashMap ; import java.util.Map ; import org.apache.jena.iri.IRI ; import org.apache.jena.rdfxml.xmlinput.ARPErrorNumbers ; import org.apache.xerces.util.XML11Char ; import org.apache.xerces.util.XMLChar ; import org.xml.sax.SAXParseException ; public class ParserSupport implements ARPErrorNumbers, Names { // protected void checkBadURI(Taint taintMe,RDFURIReference uri) throws SAXParseException { // arp.checkBadURI(taintMe,uri); // } protected ParserSupport(XMLHandler arp, AbsXMLContext xml) { this.arp = arp; this.xml= xml; } Map<IRI, Map<String,ARPLocation>> idsUsed() { return arp.idsUsed; } protected final XMLHandler arp; public final AbsXMLContext xml; /** * @param str The fully expanded URI */ protected void checkIdSymbol(Taint taintMe, AbsXMLContext ctxt, String str) throws SAXParseException { if (arp.idsUsed != null) { IRI uri = ctxt.uri; Map<String,ARPLocation> idsUsedForBase = idsUsed().get(uri); if (idsUsedForBase == null) { idsUsedForBase = new HashMap<>(); idsUsed().put(uri, idsUsedForBase); } ARPLocation prev = idsUsedForBase.get(str); if (prev != null) { arp.warning(taintMe, WARN_REDEFINITION_OF_ID, "Redefinition of ID: " + str); arp.warning(taintMe, WARN_REDEFINITION_OF_ID, prev, "Previous definition of '" + str + "'."); } else { idsUsedForBase.put(str, arp.location()); arp.idsUsedCount++; if (arp.idsUsedCount > 10000) { arp.idsUsed = null; arp.warning(taintMe, WARN_BIG_FILE, "Input is large. Switching off checking for illegal reuse of rdf:ID's."); } } } checkID_XMLName(taintMe,str); checkEncoding(taintMe,str); } protected void checkNodeID_XMLName( Taint taintMe, String str) throws SAXParseException { if ( ! XMLChar.isValidNCName(str) ) { warning(taintMe, WARN_BAD_NAME, "Not an XML Name: '" + str + "'"); } } protected void checkID_XMLName( Taint taintMe, String str) throws SAXParseException { // Was called "checkXMLName" and same code as checkNodeID_XMLName until Jena 3.1.0. // See JENA-1071 // Java and xerces are XML 1.0 4th edition. // XML 1.0 5th edition and XML 1.1 allow a wider range of characters in an NCName. // rdf:about="..." is any string but rdf:ID="..." is an XML NCName. // This operation here should allow the wider range to make // it compatible with rdf:about="...full URI..." //if (!XMLChar.isValidNCName(str)) { if ( ! XML11Char.isXML11ValidNCName(str) ) { warning(taintMe, WARN_BAD_NAME, "Not an XML Name: '" + str + "'"); } } public void checkString(Taint taintMe,String t) throws SAXParseException { if (!CharacterModel.isNormalFormC(t)) warning(taintMe, WARN_STRING_NOT_NORMAL_FORM_C, "String not in Unicode Normal Form C: \"" + t +"\""); checkEncoding(taintMe,t); checkComposingChar(taintMe,t); } void checkComposingChar(Taint taintMe,String t) throws SAXParseException { if (CharacterModel.startsWithComposingCharacter(t)) warning(taintMe, WARN_STRING_COMPOSING_CHAR, "String is not legal in XML 1.1; starts with composing char: \"" + t + "\" (" + ((int)t.charAt(0))+ ")"); } public void checkComposingChar(Taint taintMe,char ch[], int st, int ln) throws SAXParseException { if (ln>0 && CharacterModel.isComposingChar(ch[st])) warning(taintMe, WARN_STRING_COMPOSING_CHAR, "String is not legal in XML 1.1; starts with composing char: \"" + new String(ch,st,ln) + "\" (" + (int)ch[st]+ ")"); } // public void checkXMLLang(Taint taintMe, String lang) throws SAXParseException { // if (lang.equals("")) // return; // try { // LanguageTag tag = new LanguageTag(lang); // int tagType = tag.tagType(); // if (tagType == LT_ILLEGAL) { // warning(taintMe, // WARN_BAD_XMLLANG, // tag.errorMessage()); // } // if ((tagType & LT_UNDETERMINED) == LT_UNDETERMINED) { // warning(taintMe, // WARN_BAD_XMLLANG, // "Unnecessary use of language tag \"und\" prohibited by RFC3066"); // } // if ((tagType & LT_IANA_DEPRECATED) == LT_IANA_DEPRECATED) { // warning(taintMe, // WARN_DEPRECATED_XMLLANG, // "Use of deprecated language tag \"" + lang + "\"."); // } // if ((tagType & LT_PRIVATE_USE) == LT_PRIVATE_USE) { // warning(taintMe, // IGN_PRIVATE_XMLLANG, // "Use of (IANA) private language tag \"" + lang + "\"."); // } else if ((tagType & LT_LOCAL_USE) == LT_LOCAL_USE) { // warning(taintMe, // IGN_PRIVATE_XMLLANG, // "Use of (ISO639-2) local use language tag \"" // + lang // + "\"."); // } else if ((tagType & LT_EXTRA) == LT_EXTRA) { // warning(taintMe, // IGN_PRIVATE_XMLLANG, // "Use of additional private subtags on language \"" // + lang // + "\"."); // } // } catch (LanguageTagSyntaxException e) { // warning(taintMe, // WARN_MALFORMED_XMLLANG, // e.getMessage()); // } // } public void checkEncoding(Taint taintMe, String s) throws SAXParseException { if (arp.encodingProblems) { for (int i = s.length() - 1; i >= 0; i--) { if (s.charAt(i) < 0 || s.charAt(i)> 127) { warning(taintMe, ERR_ENCODING_MISMATCH, "Encoding error with non-ascii characters."); break; } } } } /** * whether this is a warning or an error is determined later. * @param i * @param msg */ protected void warning(Taint taintMe, int i, String msg) throws SAXParseException { arp.warning(taintMe, i,msg); } protected boolean isWhite(char ch[], int st, int ln) { for (int i=0;i<ln;i++) if (! isWhite(ch[st+i]) ) return false; return true; } protected boolean isWhite(StringBuffer buf) { for (int i=buf.length()-1;i>=0;i--) if (! isWhite(buf.charAt(i)) ) return false; return true; } private boolean isWhite(char c) { switch (c) { case '\n' : case '\r' : case '\t' : case ' ' : return true; default : return false; } } protected void triple(ANode a, ANode b, ANode c) { arp.triple(a,b,c); } public AbsXMLContext getXMLContext() { return xml; } public XMLHandler getXMLHandler() { return arp; } protected String resolve(Taint taintMe,AbsXMLContext x, String uri) throws SAXParseException { IRI ref = x.resolveAsURI(arp,taintMe,uri); // checkBadURI(taintMe,ref); return ref.toString(); } }