/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.riot.process.normalize; import java.math.BigDecimal ; import java.math.BigInteger ; import java.text.DecimalFormat ; import java.text.DecimalFormatSymbols ; import java.text.NumberFormat ; import java.util.Locale ; import javax.xml.datatype.XMLGregorianCalendar ; import org.apache.jena.datatypes.RDFDatatype ; import org.apache.jena.datatypes.xsd.XSDDatatype ; import org.apache.jena.graph.Node ; import org.apache.jena.graph.NodeFactory ; import org.apache.jena.sparql.expr.NodeValue ; import org.apache.jena.sparql.graph.NodeConst ; import org.apache.jena.sparql.util.DateTimeStruct ; /** Operation to convert the given Node to a normalized form */ class NormalizeValue { /** Handler that makes no changes and returns the input node */ private static DatatypeHandler identity = (Node node, String lexicalForm, RDFDatatype datatype) -> node ; // What about whitespace for // hexBinary, base64Binary. // Auxillary class of datatype handers, placed here to avoid static initialization // ordering problems (if in CanonicalizeLiteral, all this low-level machinary would // need to be in the file before the external API, which I consider bad style). It // is a source of obscure bugs. // See Normalizevalue2 for "faster" versions (less parsing overhead). static DatatypeHandler dtBoolean = (Node node, String lexicalForm, RDFDatatype datatype) -> { if ( lexicalForm.equals("1") ) return NodeConst.nodeTrue ; if ( lexicalForm.equals("0") ) return NodeConst.nodeFalse ; return node ; } ; static DatatypeHandler dtAnyDateTime = (Node node, String lexicalForm, RDFDatatype datatype) -> { // Fast test: if ( lexicalForm.indexOf('.') < 0 ) // No fractional seconds. return node ; // Could use XMLGregorianCalendar but still need to canonicalize fractional seconds. // Record for history. if ( false ) { XMLGregorianCalendar xcal = NodeValue.xmlDatatypeFactory.newXMLGregorianCalendar(lexicalForm) ; if ( xcal.getFractionalSecond() != null ) { if ( xcal.getFractionalSecond().compareTo(BigDecimal.ZERO) == 0 ) xcal.setFractionalSecond(null) ; else // stripTrailingZeros does the right thing on fractional values. xcal.setFractionalSecond(xcal.getFractionalSecond().stripTrailingZeros()) ; } String lex2 = xcal.toXMLFormat() ; if ( lex2.equals(lexicalForm) ) return node ; return NodeFactory.createLiteral(lex2, datatype) ; } // The only variablity for a valid date/dateTime/g* type is: // Second part can have fractional seconds '.' s+ (if present) represents the fractional seconds; DateTimeStruct dts = DateTimeStruct.parseDateTime(lexicalForm) ; int idx = dts.second.indexOf('.') ; // We have already tested for the existence of '.' int i = dts.second.length()-1 ; for ( ; i > idx ; i-- ) { if ( dts.second.charAt(i) != '0' ) break ; } if ( i == dts.second.length() ) return node ; if ( i == idx ) // All trailings zeros, drop the '.' as well. dts.second = dts.second.substring(0, idx) ; else dts.second = dts.second.substring(0, i+1) ; String lex2 = dts.toString() ; // Can't happen. We munged dts.second. // if ( lex2.equals(lexicalForm) ) // return node ; return NodeFactory.createLiteral(lex2, datatype) ; } ; static DatatypeHandler dtDateTime = dtAnyDateTime ; static DatatypeHandler dtInteger = (Node node, String lexicalForm, RDFDatatype datatype) -> { char[] chars = lexicalForm.toCharArray() ; if ( chars.length == 0 ) // Illegal lexical form. return node ; // If valid and one char, it must be legal. // If valid, and two chars and not leading 0, it must be valid. String lex2 = lexicalForm ; if ( lex2.startsWith("+") ) lex2 = lex2.substring(1) ; if ( lex2.length() > 8 ) // Maybe large than an int so do carefully. lex2 = new BigInteger(lexicalForm).toString() ; else { // Avoid object churn. int x = Integer.parseInt(lex2) ; lex2 = Integer.toString(x) ; } // If it's a subtype of integer, then output a new node of datatype integer. if ( datatype.equals(XSDDatatype.XSDinteger) && lex2.equals(lexicalForm) ) return node ; return NodeFactory.createLiteral(lex2, XSDDatatype.XSDinteger) ; } ; static DatatypeHandler dtDecimal = (Node node, String lexicalForm, RDFDatatype datatype) -> { BigDecimal bd = new BigDecimal(lexicalForm).stripTrailingZeros() ; String lex2 = bd.toPlainString() ; // Ensure there is a "." //if ( bd.scale() <= 0 ) if ( lex2.indexOf('.') == -1 ) // Must contain .0 lex2 = lex2+".0" ; if ( lex2.equals(lexicalForm) ) return node ; return NodeFactory.createLiteral(lex2, datatype) ; } ; static private DecimalFormatSymbols decimalNumberSymbols = new DecimalFormatSymbols(Locale.ROOT) ; static private NumberFormat fmtFloatingPoint = new DecimalFormat("0.0#################E0", decimalNumberSymbols) ; /* http://www.w3.org/TR/xmlschema-2/#double-canonical-representation */ /* * The canonical representation for double is defined by prohibiting certain * options from the Lexical representation (ยง3.2.5.1). Specifically, the * exponent must be indicated by "E". Leading zeroes and the preceding * optional "+" sign are prohibited in the exponent. If the exponent is * zero, it must be indicated by "E0". For the mantissa, the preceding * optional "+" sign is prohibited and the decimal point is required. * Leading and trailing zeroes are prohibited subject to the following: * number representations must be normalized such that there is a single * digit which is non-zero to the left of the decimal point and at least a * single digit to the right of the decimal point unless the value being * represented is zero. The canonical representation for zero is 0.0E0. */ static DatatypeHandler dtDouble = (Node node, String lexicalForm, RDFDatatype datatype) -> { double d = Double.parseDouble(lexicalForm) ; String lex2 = fmtFloatingPoint.format(d) ; if ( lex2.equals(lexicalForm) ) return node ; return NodeFactory.createLiteral(lex2, datatype) ; } ; static DatatypeHandler dtFloat = (Node node, String lexicalForm, RDFDatatype datatype) -> { float f = Float.parseFloat(lexicalForm) ; String lex2 = fmtFloatingPoint.format(f) ; if ( lex2.equals(lexicalForm) ) return node ; return NodeFactory.createLiteral(lex2, datatype) ; } ; /** Convert xsd:string to simple literal */ static DatatypeHandler dtXSDString = (Node node, String lexicalForm, RDFDatatype datatype) -> NodeFactory.createLiteral(lexicalForm) ; /** Convert simple literal to xsd:string */ static DatatypeHandler dtSimpleLiteral = (Node node, String lexicalForm, RDFDatatype datatype) -> NodeFactory.createLiteral(lexicalForm, datatype) ; /** rdf:langString */ static DatatypeHandler dtLangString = identity ; static DatatypeHandler dtPlainLiteral = (Node node, String lexicalForm, RDFDatatype datatype) -> { int idx = lexicalForm.lastIndexOf('@') ; if ( idx == -1 ) // Bad rdf:PlainLiteral return node ; String lex = lexicalForm.substring(0, idx) ; if ( idx == lexicalForm.length()-1 ) return NodeFactory.createLiteral(lex) ; String lang = lexicalForm.substring(idx+1) ; return NodeFactory.createLiteral(lex, lang) ; } ; }