/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.datatypes.xsd;
import java.io.Reader ;
import java.math.BigDecimal ;
import java.math.BigInteger ;
import java.net.URI ;
import java.util.ArrayList ;
import java.util.List ;
import org.apache.jena.datatypes.BaseDatatype ;
import org.apache.jena.datatypes.DatatypeFormatException ;
import org.apache.jena.datatypes.RDFDatatype ;
import org.apache.jena.datatypes.TypeMapper ;
import org.apache.jena.datatypes.xsd.impl.* ;
import org.apache.jena.graph.impl.LiteralLabel ;
import org.apache.xerces.impl.dv.* ;
import org.apache.xerces.impl.dv.util.Base64 ;
import org.apache.xerces.impl.dv.util.HexBin ;
import org.apache.xerces.impl.dv.xs.DecimalDV ;
import org.apache.xerces.impl.dv.xs.XSSimpleTypeDecl ;
import org.apache.xerces.impl.validation.ValidationState ;
import org.apache.xerces.parsers.XMLGrammarPreparser ;
import org.apache.xerces.util.SymbolHash ;
import org.apache.xerces.xni.grammars.XMLGrammarDescription ;
import org.apache.xerces.xni.grammars.XSGrammar ;
import org.apache.xerces.xni.parser.XMLInputSource ;
import org.apache.xerces.xs.XSConstants ;
import org.apache.xerces.xs.XSNamedMap ;
import org.apache.xerces.xs.XSTypeDefinition ;
/**
* Representation of an XSD datatype based on the Xerces-2
* XSD implementation.
*/
public class XSDDatatype extends BaseDatatype {
//=======================================================================
// Global statics - define single instance for each import XSD type
/** The xsd namespace */
public static final String XSD = "http://www.w3.org/2001/XMLSchema";
/** Datatype representing xsd:float */
public static final XSDDatatype XSDfloat = new XSDFloat("float", Float.class);
/** Datatype representing xsd:double */
public static final XSDDatatype XSDdouble = new XSDDouble("double", Double.class);
/** Datatype representing xsd:int */
public static final XSDDatatype XSDint = new XSDBaseNumericType("int", Integer.class);
/** Datatype representing xsd:long */
public static final XSDDatatype XSDlong = new XSDBaseNumericType("long", Long.class);
/** Datatype representing xsd:short */
public static final XSDDatatype XSDshort = new XSDBaseNumericType("short", Short.class);
/** Datatype representing xsd:byte */
public static final XSDDatatype XSDbyte = new XSDByteType("byte", Byte.class);
/** Datatype representing xsd:unsignedByte */
public static final XSDDatatype XSDunsignedByte = new XSDBaseNumericType("unsignedByte");
/** Datatype representing xsd:unsignedShort */
public static final XSDDatatype XSDunsignedShort = new XSDBaseNumericType("unsignedShort");
/** Datatype representing xsd:unsignedInt */
public static final XSDDatatype XSDunsignedInt = new XSDBaseNumericType("unsignedInt");
/** Datatype representing xsd:unsignedLong */
public static final XSDDatatype XSDunsignedLong = new XSDBaseNumericType("unsignedLong");
/** Datatype representing xsd:decimal */
public static final XSDDatatype XSDdecimal = new XSDBaseNumericType("decimal", BigDecimal.class);
/** Datatype representing xsd:integer */
public static final XSDDatatype XSDinteger = new XSDBaseNumericType("integer", BigInteger.class);
/** Datatype representing xsd:nonPositiveInteger */
public static final XSDDatatype XSDnonPositiveInteger = new XSDBaseNumericType("nonPositiveInteger");
/** Datatype representing xsd:nonNegativeInteger */
public static final XSDDatatype XSDnonNegativeInteger = new XSDBaseNumericType("nonNegativeInteger");
/** Datatype representing xsd:positiveInteger */
public static final XSDDatatype XSDpositiveInteger = new XSDBaseNumericType("positiveInteger");
/** Datatype representing xsd:negativeInteger */
public static final XSDDatatype XSDnegativeInteger = new XSDBaseNumericType("negativeInteger");
/** Datatype representing xsd:boolean */
public static final XSDDatatype XSDboolean = new XSDDatatype("boolean", Boolean.class);
/** Datatype representing xsd:string */
public static final XSDDatatype XSDstring = new XSDBaseStringType("string", String.class);
/** Datatype representing xsd:normalizedString */
public static final XSDDatatype XSDnormalizedString = new XSDBaseStringType("normalizedString", String.class);
/** Datatype representing xsd:anyURI */
// If you see this, remove commented lines.
// Merely temporary during switch over and testing.
//public static final XSDDatatype XSDanyURI = new XSDDatatype("anyURI", URI.class);
public static final XSDDatatype XSDanyURI = new XSDPlainType("anyURI", URI.class);
/** Datatype representing xsd:token */
public static final XSDDatatype XSDtoken = new XSDBaseStringType("token");
/** Datatype representing xsd:Name */
public static final XSDDatatype XSDName = new XSDBaseStringType("Name");
/** Datatype representing xsd:QName */
// If you see this, remove commented lines.
// Merely temporary during switch over and testing.
// public static final XSDDatatype XSDQName = new XSDDatatype("QName");
public static final XSDDatatype XSDQName = new XSDPlainType("QName");
/** Datatype representing xsd:language */
public static final XSDDatatype XSDlanguage = new XSDBaseStringType("language");
/** Datatype representing xsd:NMTOKEN */
public static final XSDDatatype XSDNMTOKEN = new XSDBaseStringType("NMTOKEN");
/** Datatype representing xsd:ENTITY */
public static final XSDDatatype XSDENTITY = new XSDBaseStringType("ENTITY");
/** Datatype representing xsd:ID */
public static final XSDDatatype XSDID = new XSDBaseStringType("ID");
/** Datatype representing xsd:NCName */
public static final XSDDatatype XSDNCName = new XSDBaseStringType("NCName");
/** Datatype representing xsd:IDREF */
// If you see this, remove commented lines.
// Merely temporary during switch over and testing.
//public static final XSDDatatype XSDIDREF = new XSDDatatype("IDREF");
public static final XSDDatatype XSDIDREF = new XSDPlainType("IDREF");
/** Datatype representing xsd:NOTATION */
// If you see this, remove commented lines.
// Merely temporary during switch over and testing.
//public static final XSDDatatype XSDNOTATION = new XSDDatatype("NOTATION");
public static final XSDDatatype XSDNOTATION = new XSDPlainType("NOTATION");
/** Datatype representing xsd:hexBinary */
public static final XSDDatatype XSDhexBinary = new XSDhexBinary("hexBinary");
/** Datatype representing xsd:base64Binary */
public static final XSDDatatype XSDbase64Binary = new XSDbase64Binary("base64Binary");
/** Datatype representing xsd:date */
public static final XSDDatatype XSDdate = new XSDDateType("date");
/** Datatype representing xsd:time */
public static final XSDDatatype XSDtime = new XSDTimeType("time");
/** Datatype representing xsd:dateTime */
public static final XSDDatatype XSDdateTime = new XSDDateTimeType("dateTime");
/** Datatype representing xsd:dateTime */
public static final XSDDatatype XSDdateTimeStamp = new XSDDateTimeStampType("dateTimeStamp");
/** Datatype representing xsd:duration */
public static final XSDDatatype XSDduration = new XSDDurationType();
/** Datatype representing xsd:dayTimeDration */
public static final XSDDatatype XSDdayTimeDuration = new XSDDayTimeDurationType();
/** Datatype representing xsd:yearMonthDuration */
public static final XSDDatatype XSDyearMonthDuration = new XSDYearMonthDurationType();
/** Datatype representing xsd:gDay */
public static final XSDDatatype XSDgDay = new XSDDayType("gDay");
/** Datatype representing xsd:gMonth */
public static final XSDDatatype XSDgMonth = new XSDMonthType("gMonth");
/** Datatype representing xsd:gYear */
public static final XSDDatatype XSDgYear = new XSDYearType("gYear");
/** Datatype representing xsd:gYearMonth */
public static final XSDDatatype XSDgYearMonth = new XSDYearMonthType("gYearMonth");
/** Datatype representing xsd:gMonthDay */
public static final XSDDatatype XSDgMonthDay = new XSDMonthDayType("gMonthDay");
// The following are list rather than simple types and are omitted for now
// /** Datatype representing xsd:ENTITIES */
// public static final XSDDatatype XSDENTITIES = new XSDBaseStringType("ENTITIES");
//
// /** Datatype representing xsd:NMTOKENS */
// public static final XSDDatatype XSDNMTOKENS = new XSDBaseStringType("NMTOKENS");
//
// /** Datatype representing xsd:IDREFS */
// public static final XSDDatatype XSDIDREFS = new XSDBaseStringType("IDREFS");
//=======================================================================
// local variables
/** the Xerces internal type declaration */
XSSimpleType typeDeclaration;
/** the corresponding java primitive class, if any */
protected Class<?> javaClass = null;
/** Used to access the values and facets of any of the decimal numeric types */
static final DecimalDV decimalDV = new DecimalDV();
//=======================================================================
// Methods
/**
* Constructor.
* @param typeName the name of the XSD type to be instantiated, this is
* used to lookup a type definition from the Xerces schema factory.
*/
public XSDDatatype(String typeName) {
super("");
typeDeclaration = SchemaDVFactory.getInstance().getBuiltInType(typeName);
uri = typeDeclaration.getNamespace() + "#" + typeDeclaration.getName();
}
/**
* Constructor.
* @param typeName the name of the XSD type to be instantiated, this is
* used to lookup a type definition from the Xerces schema factory.
* @param javaClass the java class for which this xsd type is to be
* treated as the cannonical representation
*/
public XSDDatatype(String typeName, Class<?> javaClass) {
this(typeName);
this.javaClass = javaClass;
}
/**
* Constructor used when loading in external user defined XSD types via
*
* @param xstype the XSSimpleType definition to be wrapped
* @param namespace the namespace for the type (used because the grammar loading doesn't seem to keep that)
*/
protected XSDDatatype(XSSimpleType xstype, String namespace) {
super("");
typeDeclaration = xstype;
this.uri = namespace + "#" + typeDeclaration.getName();
}
/**
* Parse a lexical form of this datatype to a value
* @throws DatatypeFormatException if the lexical form is not legal
*/
@Override
public Object parse(String lexicalForm) throws DatatypeFormatException {
try {
ValidationContext context = new ValidationState();
ValidatedInfo resultInfo = new ValidatedInfo();
typeDeclaration.validate(lexicalForm, context, resultInfo);
return convertValidatedDataValue(resultInfo);
} catch (InvalidDatatypeValueException e) {
throw new DatatypeFormatException(lexicalForm, this, "during parse -" + e);
}
}
/**
* Convert a value of this datatype out
* to lexical form.
*/
@Override
public String unparse(Object value) {
return value.toString();
}
/**
* Compares two instances of values of the given datatype.
*/
@Override
public boolean isEqual(LiteralLabel value1, LiteralLabel value2) {
return typeDeclaration.isEqual(value1.getValue(), value2.getValue());
}
/**
* If this datatype is used as the cannonical representation
* for a particular java datatype then return that java type,
* otherwise returns null.
*/
@Override
public Class< ? > getJavaClass() {
return javaClass;
}
/**
* Returns the Xerces datatype representation for this type, this
* is an XSSimpleType, in fact an XSSimpleTypeDecl.
*/
@Override
public Object extendedTypeDefinition() {
return typeDeclaration;
}
/**
* Create and register a set of types specified in a user schema file.
* We use the (illegal) DAML+OIL approach that the uriref of the type
* is the url of the schema file with fragment ID corresponding the
* the name of the type.
*
* @param uri the absolute uri of the schema file to be loaded
* @param reader the Reader stream onto the file (useful if you wish to load a cached copy of the schema file)
* @param encoding the encoding of the source file (can be null)
* @param tm the type mapper into which to load the definitions
* @return a List of strings giving the uri's of the newly defined datatypes
* @throws DatatypeFormatException if there is a problem during load (not that we use Xerces
* in default mode for load which may provide diagnostic output direct to stderr)
*/
public static List<String> loadUserDefined(String uri, Reader reader, String encoding, TypeMapper tm) throws DatatypeFormatException {
return loadUserDefined(new XMLInputSource(null, uri, uri, reader, encoding), tm);
}
/**
* Create and register a set of types specified in a user schema file.
* We use the (illegal) DAML+OIL approach that the uriref of the type
* is the url of the schema file with fragment ID corresponding the
* the name of the type.
*
* @param uri the absolute uri of the schema file to be loaded, this should be a resolvable URL
* @param encoding the encoding of the source file (can be null)
* @param tm the type mapper into which to load the definitions
* @return a List of strings giving the uri's of the newly defined datatypes
* @throws DatatypeFormatException if there is a problem during load (not that we use Xerces
* in default mode for load which may provide diagnostic output direct to stderr)
*/
public static List<String> loadUserDefined(String uri, String encoding, TypeMapper tm) throws DatatypeFormatException {
return loadUserDefined(new XMLInputSource(null, uri, uri), tm);
}
/**
* Internal implementation of loadUserDefined
*
* @param uri the absolute uri of the schema file to be loaded
* @param reader the Reader stream onto the file (useful if you wish to load a cached copy of the schema file)
* @param encoding the encoding of the source file (can be null)
* @param tm the type mapper into which to load the definitions
* @return a List of strings giving the uri's of the newly defined datatypes
* @throws DatatypeFormatException if there is a problem during load (not that we use Xerces
* in default mode for load which may provide diagnostic output direct to stderr)
*/
private static List<String> loadUserDefined(XMLInputSource source, TypeMapper tm) throws DatatypeFormatException {
XMLGrammarPreparser parser = new XMLGrammarPreparser();
parser.registerPreparser(XMLGrammarDescription.XML_SCHEMA, null);
try {
XSGrammar xsg = (XSGrammar) parser.preparseGrammar(XMLGrammarDescription.XML_SCHEMA, source);
org.apache.xerces.xs.XSModel xsm = xsg.toXSModel();
XSNamedMap map = xsm.getComponents(XSTypeDefinition.SIMPLE_TYPE);
int numDefs = map.getLength();
ArrayList<String> names = new ArrayList<>(numDefs);
for (int i = 0; i < numDefs; i++) {
XSSimpleType xstype = (XSSimpleType) map.item(i);
// Filter built in types - only needed for 2.6.0
if ( ! XSD.equals(xstype.getNamespace()) ) {
//xstype.derivedFrom()
XSDDatatype definedType = new XSDGenericType(xstype, source.getSystemId());
tm.registerDatatype(definedType);
names.add(definedType.getURI());
}
}
return names;
} catch (Exception e) {
e.printStackTrace(); // Temp
throw new DatatypeFormatException(e.toString());
}
}
/**
* Convert a validated xerces data value into the corresponding java data value.
* This function is currently the most blatently xerces-version dependent part
* of this subsystem. In many cases it also involves reparsing data which has
* already been parsed as part of the validation.
*
* @param validatedInfo a fully populated Xerces data validation context
* @return the appropriate java wrapper type
*/
Object convertValidatedDataValue(ValidatedInfo validatedInfo) throws DatatypeFormatException {
switch (validatedInfo.actualValueType) {
case XSConstants.BASE64BINARY_DT:
byte[] decoded = Base64.decode(validatedInfo.normalizedValue);
return (decoded);
case XSConstants.BOOLEAN_DT:
return validatedInfo.actualValue;
case XSConstants.HEXBINARY_DT:
decoded = HexBin.decode(validatedInfo.normalizedValue);
return (decoded);
case XSConstants.UNSIGNEDSHORT_DT:
case XSConstants.INT_DT:
return Integer.valueOf(trimPlus(validatedInfo.normalizedValue));
case XSConstants.UNSIGNEDINT_DT:
case XSConstants.LONG_DT:
return suitableInteger( trimPlus(validatedInfo.normalizedValue) );
case XSConstants.UNSIGNEDBYTE_DT:
case XSConstants.SHORT_DT:
case XSConstants.BYTE_DT:
return Integer.valueOf(trimPlus(validatedInfo.normalizedValue));
case XSConstants.UNSIGNEDLONG_DT:
case XSConstants.INTEGER_DT:
case XSConstants.NONNEGATIVEINTEGER_DT:
case XSConstants.NONPOSITIVEINTEGER_DT:
case XSConstants.POSITIVEINTEGER_DT:
case XSConstants.NEGATIVEINTEGER_DT:
case XSConstants.DECIMAL_DT:
Object xsdValue = validatedInfo.actualValue;
if (decimalDV.getTotalDigits(xsdValue) == 0) {
return new Integer(0);
}
if (decimalDV.getFractionDigits(xsdValue) >= 1) {
BigDecimal value = new BigDecimal(trimPlus(validatedInfo.normalizedValue));
return XSDdecimal.cannonicalise( value );
}
// Can have 0 fractionDigits but still have a trailing .000
String lexical = trimPlus(validatedInfo.normalizedValue);
int dotx = lexical.indexOf('.');
if (dotx != -1) {
lexical = lexical.substring(0, dotx);
}
if (decimalDV.getTotalDigits(xsdValue) > 18) {
return new BigInteger(lexical);
} else {
return suitableInteger( lexical );
}
default:
return parseValidated(validatedInfo.normalizedValue);
}
}
/**
@param lexical
@return Number
*/
protected Number suitableInteger( String lexical )
{
long number = Long.parseLong( lexical );
return suitableInteger( number );
}
/**
@param number
@return Number
*/
protected static Number suitableInteger( long number )
{
if (number > Integer.MAX_VALUE || number < Integer.MIN_VALUE)
return new Long( number );
else
return new Integer( (int) number );
}
/**
* Parse a validated lexical form. Subclasses which use the default
* parse implementation and are not convered by the explicit convertValidatedData
* cases should override this.
*/
public Object parseValidated(String lexical) {
return lexical;
}
/**
* Test whether the given LiteralLabel is a valid instance
* of this datatype. This takes into accound typing information
* as well as lexical form - for example an xsd:string is
* never considered valid as an xsd:integer (even if it is
* lexically legal like "1").
*/
@Override
public boolean isValidLiteral(LiteralLabel lit) {
return isBaseTypeCompatible(lit) && isValid(lit.getLexicalForm());
}
/**
* Test if the given typed value is in the right partition of the XSD type space.
* If this test passes then if the typed value has a legal lexical form for
* this type then it is a legal instance.
*/
public boolean isBaseTypeCompatible(LiteralLabel lit) {
XSTypeDefinition base = getFoundingType();
RDFDatatype litDT = lit.getDatatype();
if (litDT instanceof XSDDatatype) {
XSTypeDefinition litBase = ((XSDDatatype)litDT).getFoundingType();
return base.equals(litBase);
} else if (litDT == null && lit.language().equals("")) {
// Special RDF case, a plain literal is type compatible with and xsd:string-based type
return base.equals(XSDstring.typeDeclaration);
} else {
return false;
}
}
/**
* Return the most specific type below xsd:anySimpleType that this type is derived from.
*/
private XSTypeDefinition getFoundingType() {
XSTypeDefinition founding = typeDeclaration;
XSTypeDefinition parent = founding.getBaseType();
if ( parent == null )
// it is xsd:anySimpleType
return founding;
while (parent.getBaseType() != null) {
founding = parent;
parent = parent.getBaseType();
}
return founding;
}
/**
* Helper function to return the substring of a validated number string
* omitting any leading + sign.
*/
public static String trimPlus(String str) {
int i = str.indexOf('+');
if (i == -1) {
return str;
} else {
return str.substring(i+1);
}
}
/**
* Add all of the XSD pre-defined simple types to the given
* type mapper registry.
*/
public static void loadXSDSimpleTypes(TypeMapper tm) {
tm.registerDatatype(new XSDDatatype("anySimpleType"));
tm.registerDatatype(XSDdecimal);
tm.registerDatatype(XSDinteger);
tm.registerDatatype(XSDnonPositiveInteger);
tm.registerDatatype(XSDnonNegativeInteger);
tm.registerDatatype(XSDpositiveInteger);
tm.registerDatatype(XSDnegativeInteger);
tm.registerDatatype(XSDbyte);
tm.registerDatatype(XSDunsignedByte);
tm.registerDatatype(XSDdouble);
tm.registerDatatype(XSDfloat);
tm.registerDatatype(XSDlong);
tm.registerDatatype(XSDunsignedInt);
tm.registerDatatype(XSDunsignedShort);
tm.registerDatatype(XSDunsignedLong);
tm.registerDatatype(XSDint);
tm.registerDatatype(XSDshort);
tm.registerDatatype(XSDboolean);
tm.registerDatatype(XSDbase64Binary);
tm.registerDatatype(XSDhexBinary);
tm.registerDatatype(XSDdate);
tm.registerDatatype(XSDtime);
tm.registerDatatype(XSDdateTime);
tm.registerDatatype(XSDdateTimeStamp);
tm.registerDatatype(XSDduration);
tm.registerDatatype(XSDyearMonthDuration);
tm.registerDatatype(XSDdayTimeDuration) ;
tm.registerDatatype(XSDgYearMonth);
tm.registerDatatype(XSDgMonthDay);
tm.registerDatatype(XSDgMonth);
tm.registerDatatype(XSDgDay);
tm.registerDatatype(XSDgYear);
tm.registerDatatype(XSDnormalizedString);
tm.registerDatatype(XSDstring);
tm.registerDatatype(XSDanyURI);
tm.registerDatatype(XSDtoken);
tm.registerDatatype(XSDName);
tm.registerDatatype(XSDlanguage);
tm.registerDatatype(XSDQName);
tm.registerDatatype(XSDNMTOKEN);
tm.registerDatatype(XSDID);
tm.registerDatatype(XSDENTITY);
tm.registerDatatype(XSDNCName);
tm.registerDatatype(XSDNOTATION);
tm.registerDatatype(XSDIDREF);
// tm.registerDatatype(XSDIDREFS);
// tm.registerDatatype(XSDENTITIES);
// tm.registerDatatype(XSDNMTOKENS);
}
/**
* Generic XML Schema datatype (outside the xsd: namespace)
* <p>
* Datatype template that adapts any response back from Xerces type parsing
* to an appropriate java representation. This is primarily used in creating
* user defined types - the built in types have a fixed mapping.
*/
public static class XSDGenericType extends XSDDatatype {
/**
* Hidden constructor used when loading in external user defined XSD
* types
*
* @param xstype
* the XSSimpleType definition to be wrapped
* @param namespace
* the namespace for the type (used because the grammar
* loading doesn't seem to keep that)
*/
XSDGenericType(XSSimpleType xstype, String namespace) {
super(xstype, namespace);
}
}
// Used to bootstrap the above initialization code
public static void main(String[] args) {
SymbolHash types = SchemaDVFactory.getInstance().getBuiltInTypes();
int len = types.getLength();
Object[] values = new Object[len];
types.getValues(values, 0);
for ( Object value : values ) {
if ( value instanceof XSSimpleTypeDecl ) {
XSSimpleTypeDecl decl = (XSSimpleTypeDecl)value ;
System.out.println("tm.registerDatatype(new XSDDatatype(\"" + decl.getName() + "\"));") ;
} else {
System.out.println(" - " + value) ;
}
}
}
}