/* * eXist Open Source Native XML Database * Copyright (C) 2001-2007 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * $Id$ */ package org.exist.xquery.value; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.text.Collator; import java.util.BitSet; import org.exist.xmldb.XmldbURI; import org.exist.xquery.Constants; import org.exist.xquery.XPathException; import org.exist.xquery.functions.FunEscapeURI; /** * @author Wolfgang Meier (wolfgang@exist-db.org) */ public class AnyURIValue extends AtomicValue { static BitSet needEncoding; static final int caseDiff = ('a' - 'A'); static { needEncoding = new BitSet(128); int i; for (i = 0x00; i <= 0x1F; i++) { needEncoding.set(i); } needEncoding.set(0x7F); needEncoding.set(0x20); needEncoding.set('<'); needEncoding.set('>'); needEncoding.set('"'); needEncoding.set('{'); needEncoding.set('}'); needEncoding.set('|'); needEncoding.set('\\'); needEncoding.set('^'); needEncoding.set('`'); } public static final AnyURIValue EMPTY_URI = new AnyURIValue(); /* Very important - this string does not need to be a valid uri. * * From XML Linking (see below for link), with some wording changes: * The value of the [anyURI] must be a URI reference as defined in * [IETF RFC 2396], or must result in a URI reference after the escaping * procedure described below is applied. The procedure is applied when * passing the URI reference to a URI resolver. * * Some characters are disallowed in URI references, even if they are * allowed in XML; the disallowed characters include all non-ASCII * characters, plus the excluded characters listed in Section 2.4 of * [IETF RFC 2396], except for the number sign (#) and percent sign (%) * and the square bracket characters re-allowed in [IETF RFC 2732]. * Disallowed characters must be escaped as follows: * 1. Each disallowed character is converted to UTF-8 [IETF RFC 2279] * as one or more bytes. * 2. Any bytes corresponding to a disallowed character are escaped * with the URI escaping mechanism (that is, converted to %HH, * where HH is the hexadecimal notation of the byte value). * 3. The original character is replaced by the resulting character * sequence. * * See Section 5.4 of XML Linking: * http://www.w3.org/TR/2000/PR-xlink-20001220/#link-locators */ private String uri; //TODO: save escaped(URI) version? AnyURIValue() { this.uri = ""; } public AnyURIValue(URI uri) { this.uri = uri.toString(); } public AnyURIValue(XmldbURI uri) { this.uri = uri.toString(); } public AnyURIValue(String s) throws XPathException { String escapedString = escape(StringValue.trimWhitespace(s)); try { new URI(escapedString); } catch (URISyntaxException e) { try { XmldbURI.xmldbUriFor(escapedString); } catch (URISyntaxException ex) { throw new XPathException( "Type error: the given string '" + s + "' cannot be cast to " + Type.getTypeName(getType())); } } /* The URI value is whitespace normalized according to the rules for the xs:anyURI type in [XML Schema]. <xs:simpleType name="anyURI" id="anyURI"> ... <xs:restriction base="xs:anySimpleType"> <xs:whiteSpace fixed="true" value="collapse" id="anyURI.whiteSpace"/> </xs:restriction> </xs:simpleType> */ //TODO : find a way to perform the 3 operations at the same time //s = StringValue.expand(s); //Should we have character entities s = StringValue.normalizeWhitespace(s); //Should we have TABs, new lines... this.uri = StringValue.collapseWhitespace(s); } /** * This function accepts a String representation of an xs:anyURI and applies * the escaping method described in Section 5.4 of XML Linking (http://www.w3.org/TR/2000/PR-xlink-20001220/#link-locators) * to turn it into a valid URI * * @see <a href="http://www.w3.org/TR/2000/PR-xlink-20001220/#link-locators">http://www.w3.org/TR/2000/PR-xlink-20001220/#link-locators</A> * @param uri The xs:anyURI to escape into a valid URI * @return An escaped string representation of the provided xs:anyURI */ public static String escape(String uri) { return FunEscapeURI.escape(uri, false); //TODO: TEST TEST TEST! // // basically copied from URLEncoder.encode // try { // boolean needToChange = false; // boolean wroteUnencodedChar = false; // int maxBytesPerChar = 10; // rather arbitrary limit, but safe for now // StringBuffer out = new StringBuffer(uri.length()); // ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar); // // OutputStreamWriter writer = new OutputStreamWriter(buf, "UTF-8"); // // for (int i = 0; i < uri.length(); i++) { // int c = (int) uri.charAt(i); // if (c>127 || needEncoding.get(c)) { // try { // if (wroteUnencodedChar) { // Fix for 4407610 // writer = new OutputStreamWriter(buf, "UTF-8"); // wroteUnencodedChar = false; // } // writer.write(c); // /* // * If this character represents the start of a Unicode // * surrogate pair, then pass in two characters. It's not // * clear what should be done if a bytes reserved in the // * surrogate pairs range occurs outside of a legal // * surrogate pair. For now, just treat it as if it were // * any other character. // */ // if (c >= 0xD800 && c <= 0xDBFF) { // /* // System.out.println(Integer.toHexString(c) // + " is high surrogate"); // */ // if ( (i+1) < uri.length()) { // int d = (int) uri.charAt(i+1); // /* // System.out.println("\tExamining " // + Integer.toHexString(d)); // */ // if (d >= 0xDC00 && d <= 0xDFFF) { // /* // System.out.println("\t" // + Integer.toHexString(d) // + " is low surrogate"); // */ // writer.write(d); // i++; // } // } // } // writer.flush(); // } catch(IOException e) { // buf.reset(); // continue; // } // byte[] ba = buf.toByteArray(); // for (int j = 0; j < ba.length; j++) { // out.append('%'); // char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16); // // converting to use uppercase letter as part of // // the hex value if ch is a letter. // if (Character.isLetter(ch)) { // ch -= caseDiff; // } // out.append(ch); // ch = Character.forDigit(ba[j] & 0xF, 16); // if (Character.isLetter(ch)) { // ch -= caseDiff; // } // out.append(ch); // } // buf.reset(); // needToChange = true; // } else { // out.append((char)c); // wroteUnencodedChar = true; // } // } // // return (needToChange? out.toString() : uri); // } catch(UnsupportedEncodingException e) { // throw new RuntimeException(e); // } } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#getType() */ public int getType() { return Type.ANY_URI; } /* (non-Javadoc) * @see org.exist.xquery.value.Sequence#getStringValue() */ public String getStringValue() throws XPathException { return uri; } public boolean effectiveBooleanValue() throws XPathException { // If its operand is a singleton value of type xs:string, xs:anyURI, xs:untypedAtomic, //or a type derived from one of these, fn:boolean returns false if the operand value has zero length; otherwise it returns true. return uri.length() > 0; } /* (non-Javadoc) * @see org.exist.xquery.value.Sequence#convertTo(int) */ public AtomicValue convertTo(int requiredType) throws XPathException { switch (requiredType) { case Type.ITEM : case Type.ATOMIC : case Type.ANY_URI : return this; case Type.STRING : return new StringValue(uri); case Type.UNTYPED_ATOMIC : return new UntypedAtomicValue(getStringValue()); default : throw new XPathException( "Type error: cannot cast xs:anyURI to " + Type.getTypeName(requiredType)); } } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#compareTo(int, org.exist.xquery.value.AtomicValue) */ public boolean compareTo(Collator collator, int operator, AtomicValue other) throws XPathException { if (other.getType() == Type.ANY_URI) { String otherURI = other.getStringValue(); int cmp = uri.compareTo(otherURI); switch (operator) { case Constants.EQ : return cmp == 0; case Constants.NEQ : return cmp != 0; case Constants.GT : return cmp > 0; case Constants.GTEQ : return cmp >= 0; case Constants.LT : return cmp < 0; case Constants.LTEQ : return cmp <= 0; default : throw new XPathException( "XPTY0004: cannot apply operator " + Constants.OPS[operator] + " to xs:anyURI"); } } else return compareTo(collator, operator, other.convertTo(Type.ANY_URI)); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#compareTo(org.exist.xquery.value.AtomicValue) */ public int compareTo(Collator collator, AtomicValue other) throws XPathException { if (other.getType() == Type.ANY_URI) { String otherURI = other.getStringValue(); return uri.compareTo(otherURI); } else { return compareTo(collator, other.convertTo(Type.ANY_URI)); } } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#max(org.exist.xquery.value.AtomicValue) */ public AtomicValue max(Collator collator, AtomicValue other) throws XPathException { throw new XPathException("max is not supported for values of type xs:anyURI"); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#min(org.exist.xquery.value.AtomicValue) */ public AtomicValue min(Collator collator, AtomicValue other) throws XPathException { throw new XPathException("min is not supported for values of type xs:anyURI"); } /* (non-Javadoc) * @see org.exist.xquery.value.Item#conversionPreference(java.lang.Class) */ public int conversionPreference(Class javaClass) { if (javaClass.isAssignableFrom(AnyURIValue.class)) return 0; if (javaClass == XmldbURI.class) return 1; if (javaClass == URI.class) return 2; if (javaClass == URL.class) return 3; if (javaClass == String.class || javaClass == CharSequence.class) return 4; if (javaClass == Object.class) return 20; return Integer.MAX_VALUE; } /* (non-Javadoc) * @see org.exist.xquery.value.Item#toJavaObject(java.lang.Class) */ public Object toJavaObject(Class target) throws XPathException { if (target.isAssignableFrom(AnyURIValue.class)) { return this; } else if (target == XmldbURI.class) { return toXmldbURI(); } else if (target == URI.class) { return toURI(); } else if (target == URL.class) { try { return new URL(uri); } catch (MalformedURLException e) { throw new XPathException( "failed to convert " + uri + " into a Java URL: " + e.getMessage(), e); } } else if (target == String.class || target == CharSequence.class) return uri; else if (target == Object.class) return uri; throw new XPathException( "cannot convert value of type " + Type.getTypeName(getType()) + " to Java object of type " + target.getName()); } public XmldbURI toXmldbURI() throws XPathException { try { return XmldbURI.xmldbUriFor(uri, false); } catch (URISyntaxException e) { throw new XPathException( "failed to convert " + uri + " into an XmldbURI: " + e.getMessage(), e); } } public URI toURI() throws XPathException { try { return new URI(escape(uri)); } catch (URISyntaxException e) { throw new XPathException( "failed to convert " + uri + " into an URI: " + e.getMessage(), e); } } }