/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.graph.impl; import java.util.Locale ; import java.util.Objects ; import org.apache.jena.JenaRuntime ; import org.apache.jena.datatypes.* ; import org.apache.jena.datatypes.xsd.* ; import org.apache.jena.datatypes.xsd.impl.* ; import org.apache.jena.rdf.model.impl.Util ; import org.apache.jena.shared.JenaException ; import org.apache.jena.shared.impl.JenaParameters ; import org.apache.jena.vocabulary.RDF ; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Represents the "contents" of a Node_Literal. * These contents comprise a lexical form, an optional language tag, * and optional datatype structure and a value. */ final /*public*/ class LiteralLabelImpl implements LiteralLabel { static private Logger log = LoggerFactory.getLogger( LiteralLabelImpl.class ); /** * The lexical form of the literal, may be null if the literal was * created programatically and has not yet been serialized */ private String lexicalForm; /** * The value form of the literal. It will be null only if the value * has not been parsed or if it is an illegal value. * For plain literals and xsd:string literals * the value is the same as the lexicalForm. */ private Object value; /** * The type of the literal. A null type indicates a classic "plain" literal. * The type of a literal is fixed when it is created. */ private RDFDatatype dtype; /** * The xml:lang tag. For xsd literals this is ignored and not part of * equality. For plain literals it is not ignored. The lang of a * literal is fixed when it is created. */ /*final*/ private String lang; /** * Indicates whether this is a legal literal. The working groups requires * ill-formed literals to be treated as syntactically correct so instead * of only storing well-formed literals we hack around it this way. * N.B. This applies to any literal, not just XML well-formed literals. */ private boolean wellformed = true; /** * keeps the message provided by the DatatypeFormatException * if parsing failed for delayed exception thrown in getValue() */ private String exceptionMsg = null; // Suggested by Andreas Langegger //======================================================================= // Constructors /** * Build a typed literal label from its lexical form. The * lexical form will be parsed now and the value stored. If * the form is not legal this will throw an exception. * * @param lex the lexical form of the literal * @param lang the optional language tag, only relevant for plain literals * @param dtype the type of the literal, null for old style "plain" literals * @throws DatatypeFormatException if lex is not a legal form of dtype */ LiteralLabelImpl(String lex, String lang, RDFDatatype dtype) throws DatatypeFormatException { setLiteralLabel_1(lex, lang, dtype) ; } private void setLiteralLabel_1(String lex, String lang, RDFDatatype dtype) throws DatatypeFormatException { this.lexicalForm = lex; this.dtype = dtype; this.lang = (lang == null ? "" : lang); if (dtype == null) { value = lex; } else { setValue(lex); } normalize(); } /** * Build a typed literal label from its value form. If the value is a string we * assume this is inteded to be a lexical form after all. * * @param value the value of the literal * @param lang the optional language tag, only relevant for plain literals * @param dtype the type of the literal, null for old style "plain" literals */ LiteralLabelImpl(Object value, String lang, RDFDatatype dtype) throws DatatypeFormatException { setLiteralLabel_2(value, lang, dtype) ; } /** * Build a typed literal label from its value form using * whatever datatype is currently registered as the the default * representation for this java class. No language tag is supplied. * @param value the literal value to encapsulate */ LiteralLabelImpl( Object value ) { RDFDatatype dt = TypeMapper.getInstance().getTypeByValue( value ); if (dt == null) { setWithNewDatatypeForValueClass(value); } else { setLiteralLabel_2( value, "", dt ); } } private void setWithNewDatatypeForValueClass( Object value ) { Class<?> c = value.getClass(); log.warn( "inventing a datatype for " + c ); RDFDatatype dt = new AdhocDatatype( c ); TypeMapper.getInstance().registerDatatype( dt ); this.lang = ""; this.dtype = dt; this.value = value; this.lexicalForm = value.toString(); } private void setLiteralLabel_2(Object value, String language, RDFDatatype dtype) throws DatatypeFormatException { // Constructor extraction: Preparation for moving into Node_Literal. this.dtype = dtype; this.lang = (language == null ? "" : language); if (value instanceof String) { String lex = (String)value; lexicalForm = lex; if (dtype == null) { this.value = lex; } else { setValue(lex); } } else { this.value = (dtype == null) ? value : dtype.cannonicalise( value ); } normalize(); if (dtype != null && lexicalForm == null) { // We are creating a literal from a java object, check the lexical form of the object is acceptable // Done here and uses this.dtype so it can use the normalized type wellformed = this.dtype.isValidValue( value ); if (JenaParameters.enableEagerLiteralValidation && !wellformed) { throw new DatatypeFormatException(value.toString(), dtype, "in literal creation"); } } } /** * Old style constructor. Creates either a plain literal or an * XMLLiteral. * @param xml If true then s is exclusive canonical XML of type rdf:XMLLiteral, and no checking will be invoked. */ LiteralLabelImpl(String s, String lang, boolean xml) { setLiteralLabel_3(s, lang, xml) ; } private void setLiteralLabel_3(String s, String lang, boolean xml) { // Constructor extraction: Preparation for moving into Node_Literal. this.lexicalForm = s; this.lang = (lang == null ? "" : lang); if (xml) { // XML Literal this.dtype = XMLLiteralType.theXMLLiteralType; value = s; wellformed = true; } else { // Plain literal this.value = s; this.dtype = null; } } /** * Internal function to set the object value from the lexical form. * Requires datatype to be set. * @throws DatatypeFormatException if the value is ill-formed and * eager checking is on. */ private void setValue(String lex) throws DatatypeFormatException { try { value = dtype.parse(lex); wellformed = true; } catch (DatatypeFormatException e) { if (JenaParameters.enableEagerLiteralValidation) { e.fillInStackTrace(); throw e; } else { wellformed = false; exceptionMsg = e.getMessage(); } } } /** * Normalize the literal. If the value is narrower than the current data type * (e.g. value is xsd:date but the time is xsd:datetime) it will narrow * the type. If the type is narrower than the value then it may normalize * the value (e.g. set the mask of an XSDDateTime) */ protected void normalize() { if (dtype != null && value != null) { dtype = dtype.normalizeSubType(value, dtype); } } //======================================================================= // Methods /** Answer true iff this is a well-formed XML literal. */ @Override public boolean isXML() { return dtype == XMLLiteralType.theXMLLiteralType && this.wellformed; } /** Answer true iff this is a well-formed literal. */ @Override public boolean isWellFormed() { return dtype != null && this.wellformed; } @Override public boolean isWellFormedRaw() { return wellformed; } /** Answer a human-acceptable representation of this literal value. This is NOT intended for a machine-processed result. */ @Override public String toString(boolean quoting) { StringBuilder b = new StringBuilder() ; if ( quoting ) b.append('"') ; String lex = getLexicalForm() ; lex = Util.replace(lex, "\"", "\\\"") ; b.append(lex) ; if ( quoting ) b.append('"') ; if ( lang != null && !lang.equals("") ) b.append("@").append(lang) ; else if ( dtype != null ) { if ( ! ( JenaRuntime.isRDF11 && dtype.equals(XSDDatatype.XSDstring) ) ) b.append("^^").append(dtype.getURI()) ; } return b.toString() ; } @Override public String toString() { return toString(false); } /** Answer the lexical form of this literal, constructing it on-the-fly (and remembering it) if necessary. */ @Override public String getLexicalForm() { if (lexicalForm == null) lexicalForm = (dtype == null ? value.toString() : dtype.unparse(value)); return lexicalForm; } /** Answer the value used to index this literal TODO Consider pushing indexing decisions down to the datatype */ @Override public Object getIndexingValue() { return isXML() ? this : !lang.equals( "" ) ? getLexicalForm() + "@" + lang.toLowerCase(Locale.ROOT) : wellformed ? getValue() : getLexicalForm() ; } /** Answer the language associated with this literal (the empty string if there's no language). */ @Override public String language() { return lang; } /** Answer a suitable instance of a Java class representing this literal's value. May throw an exception if the literal is ill-formed. */ @Override public Object getValue() throws DatatypeFormatException { if (wellformed) { return value; } else { throw new DatatypeFormatException( lexicalForm, dtype, exceptionMsg); } } /** Answer the datatype of this literal, null if it is untyped. */ @Override public RDFDatatype getDatatype() { return dtype; } /** Answer the datatype URI of this literal, null if it untyped. */ @Override public String getDatatypeURI() { if (dtype == null) return null; return dtype.getURI(); } /** Answer true iff this literal is syntactically equal to <code>other</code>. Note: this is <i>not</i> <code>sameValueAs</code>. */ @Override public boolean equals(Object other) { if ( this == other ) return true ; if (other == null || !(other instanceof LiteralLabel)) { return false; } LiteralLabel otherLiteral = (LiteralLabel) other; boolean typeEquals = Objects.equals(dtype, otherLiteral.getDatatype()) ; if ( !typeEquals ) return false ; // Don't just use this.lexcialForm -- need to force delayed calculation from values. boolean lexEquals = Objects.equals(getLexicalForm(), otherLiteral.getLexicalForm()); if ( ! lexEquals ) return false ; boolean langEquals = Objects.equals(lang, otherLiteral.language()) ; if ( ! langEquals ) return false ; // Ignore xml flag as it is calculated from the lexical form + datatype // Ignore value as lexical form + datatype -> value is a function. return true ; } /** Answer true iff this literal represents the same (abstract) value as the other one. */ @Override public boolean sameValueAs( LiteralLabel other ) { return sameValueAs(this, other) ; } /** * Two literal labels are the "same value" if they are the same string, * or same language string or same value-by-datatype or .equals (= Same RDF Term) * @param lit1 * @param lit2 * @return */ private static boolean sameValueAs(LiteralLabel lit1, LiteralLabel lit2) { //return lit1.sameValueAs(lit2) ; if ( lit1 == null ) throw new NullPointerException() ; if ( lit2 == null ) throw new NullPointerException() ; // Strings. if ( isStringValue(lit1) && isStringValue(lit2) ) { // Complete compatibility mode. if ( JenaParameters.enablePlainLiteralSameAsString ) return lit1.getLexicalForm().equals(lit2.getLexicalForm()) ; else return lit1.getLexicalForm().equals(lit2.getLexicalForm()) && Objects.equals(lit1.getDatatype(), lit2.getDatatype()) ; } if ( isStringValue(lit1) ) return false ; if ( isStringValue(lit2) ) return false ; // Language tag strings if ( isLangString(lit1) && isLangString(lit2) ) { String lex1 = lit1.getLexicalForm() ; String lex2 = lit2.getLexicalForm() ; return lex1.equals(lex2) && lit1.language().equalsIgnoreCase(lit2.language()) ; } if ( isLangString(lit1) ) return false ; if ( isLangString(lit2) ) return false ; // Both not strings, not lang strings. // Datatype set. if ( lit1.isWellFormedRaw() && lit2.isWellFormedRaw() ) // Both well-formed. return lit1.getDatatype().isEqual(lit1, lit2) ; if ( ! lit1.isWellFormedRaw() && ! lit2.isWellFormedRaw() ) return lit1.equals(lit2) ; // One is well formed, the other is not. return false ; } /** Return true if the literal lable is a string value (RDF 1.0 and RDF 1.1) */ private static boolean isStringValue(LiteralLabel lit) { if ( lit.getDatatype() == null ) // RDF 1.0 return ! isLangString(lit) ; if ( lit.getDatatype().equals(XSDDatatype.XSDstring) ) return true; return false ; } /** Return true if the literal label is a language string. (RDF 1.0 and RDF 1.1) */ public static boolean isLangString(LiteralLabel lit) { String lang = lit.language() ; if ( lang == null ) return false ; // Check. if ( lang.equals("") ) return false ; // This is an additional check. if ( JenaRuntime.isRDF11 ) { if ( ! Objects.equals(lit.getDatatype(), RDF.dtLangString) ) throw new JenaException("Literal with language string which is not rdf:langString: "+lit) ; } return true ; } private int hash = 0 ; /** Answer the hashcode of this literal, derived from its value if it's well-formed and otherwise its lexical form. */ @Override public int hashCode() { // Literal labels are immutable. if ( hash == 0 ) hash = (dtype == null ? getDefaultHashcode() : dtype.getHashCode( this )); return hash ; } /** Answer the default hash value, suitable for datatypes which have values which support hashCode() naturally: it is derived from its value if it is well-formed and otherwise from its lexical form. */ @Override public int getDefaultHashcode() { return (wellformed ? value : getLexicalForm()).hashCode(); } }