/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.riot.checker ; import java.util.Objects ; import java.util.regex.Pattern ; import org.apache.jena.JenaRuntime ; import org.apache.jena.datatypes.RDFDatatype ; import org.apache.jena.datatypes.xsd.impl.XSDAbstractDateTimeType ; import org.apache.jena.datatypes.xsd.impl.XSDBaseNumericType ; import org.apache.jena.datatypes.xsd.impl.XSDDouble ; import org.apache.jena.datatypes.xsd.impl.XSDFloat ; import org.apache.jena.graph.Node ; import org.apache.jena.riot.SysRIOT ; import org.apache.jena.riot.system.ErrorHandler ; import org.apache.jena.sparql.graph.NodeConst ; import org.apache.xerces.impl.dv.InvalidDatatypeValueException ; import org.apache.xerces.impl.dv.ValidatedInfo ; import org.apache.xerces.impl.dv.ValidationContext ; import org.apache.xerces.impl.dv.XSSimpleType ; import org.apache.xerces.impl.validation.ValidationState ; public class CheckerLiterals implements NodeChecker { // A flag to enable the test suite to read bad data. public static boolean WarnOnBadLiterals = true ; private ErrorHandler handler ; public CheckerLiterals(ErrorHandler handler) { this.handler = handler ; } @Override public boolean check(Node node, long line, long col) { return node.isLiteral() && checkLiteral(node, handler, line, col) ; } final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ; public static boolean checkLiteral(Node node, ErrorHandler handler, long line, long col) { if ( !node.isLiteral() ) { handler.error("Not a literal: " + node, line, col) ; return false ; } return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype(), handler, line, col) ; } public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { return checkLiteral(lexicalForm, null, datatype, handler, line, col) ; } public static boolean checkLiteral(String lexicalForm, String lang, ErrorHandler handler, long line, long col) { return checkLiteral(lexicalForm, lang, null, handler, line, col) ; } public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( !WarnOnBadLiterals ) return true ; boolean hasLang = lang != null && !lang.equals("") ; if ( !hasLang ) { // Datatype check (and RDF 1.0 simpl literals are always well // formed) if ( datatype != null ) return validateByDatatype(lexicalForm, datatype, handler, line, col) ; return true ; } // Has a language. if ( JenaRuntime.isRDF11 ) { if ( datatype != null && !Objects.equals(datatype.getURI(), NodeConst.rdfLangString.getURI()) ) { handler.error("Literal has language but wrong datatype", line, col) ; return false ; } } else { if ( datatype != null ) { handler.error("Literal has datatype and language", line, col) ; return false ; } } // Test language tag format -- not a perfect test. if ( !lang.isEmpty() && !langPattern.matcher(lang).matches() ) { handler.warning("Language not valid: " + lang, line, col) ; return false ; } return true ; } protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( SysRIOT.StrictXSDLexicialForms ) { if ( datatype instanceof XSDBaseNumericType || datatype instanceof XSDFloat || datatype instanceof XSDDouble ) return validateByDatatypeNumeric(lexicalForm, datatype, handler, line, col) ; if ( datatype instanceof XSDAbstractDateTimeType ) return validateByDatatypeDateTime(lexicalForm, datatype, handler, line, col) ; } return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; } protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( datatype.isValid(lexicalForm) ) return true ; handler.warning("Lexical form '" + lexicalForm + "' not valid for datatype " + datatype.getURI(), line, col) ; return false ; } protected static boolean validateByDatatypeDateTime(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { if ( lexicalForm.contains(" ") ) { handler.warning("Whitespace in XSD date or time literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\n") ) { handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\r") ) { handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ; return false ; } // if ( ! StrictXSDLexicialForms ) // Jena is already strict. return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; } protected static boolean validateByDatatypeNumeric(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) { // Do a white space check as well for numerics. if ( lexicalForm.contains(" ") ) { handler.warning("Whitespace in numeric XSD literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\n") ) { handler.warning("Newline in numeric XSD literal: '" + lexicalForm + "'", line, col) ; return false ; } if ( lexicalForm.contains("\r") ) { handler.warning("Carriage return in numeric XSD literal: '" + lexicalForm + "'", line, col) ; return false ; } // if ( lit.getDatatype() instanceof XSDAbstractDateTimeType ) // { // // Do a white space check as well for numerics. // if ( lex.contains(" ") ) { handler.warning("Whitespace in XSD date or time literal: "+node, line, col) ; return false ; } // if ( lex.contains("\n") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; } // if ( lex.contains("\r") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; } // } // if ( ! SysRIOT.StrictXSDLexicialForms ) return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ; // From Jena 2.6.3, XSDDatatype.parse XSSimpleType typeDeclaration = (XSSimpleType)datatype.extendedTypeDefinition() ; try { ValidationContext context = new ValidationState(); ValidatedInfo resultInfo = new ValidatedInfo(); Object result = typeDeclaration.validate(lexicalForm, context, resultInfo); return true ; } catch (InvalidDatatypeValueException e) { handler.warning("Lexical form '"+lexicalForm+"' not valid for datatype "+datatype.getURI(), line, col) ; return false ; } } }