/* * @(#)$Id: AnyURIType.java,v 1.22 2002/10/08 22:01:26 kk122374 Exp $ * * Copyright 2001 Sun Microsystems, Inc. All Rights Reserved. * * This software is the proprietary information of Sun Microsystems, Inc. * Use is subject to license terms. * */ package com.sun.msv.datatype.xsd; import java.io.ByteArrayInputStream; import com.sun.msv.datatype.SerializationContext; import java.util.regex.Pattern; import org.relaxng.datatype.ValidationContext; /** * "anyURI" type. * * See http://www.w3.org/TR/xmlschema-2/#anyURI for the spec. * type of the value object is <code>java.lang.String</code>. * * @author <a href="mailto:kohsuke.kawaguchi@eng.sun.com">Kohsuke KAWAGUCHI</a> */ public class AnyURIType extends BuiltinAtomicType implements Discrete { public static final AnyURIType theInstance = new AnyURIType(); private AnyURIType() { super("anyURI"); } protected boolean checkFormat( String content, ValidationContext context ) { return regexp.matcher(escape(content)).matches(); } private static void appendHex( StringBuffer buf, int hex ) { if( hex<10 ) buf.append( (char)(hex+'0') ); else buf.append( (char)(hex-10+'A') ); } private static void appendByte( StringBuffer buf, int ch ) { buf.append('%'); appendHex( buf, ch/16 ); appendHex( buf, ch%16 ); } /** convert one 'char' in BMP to UTF-8 encoding. */ private static void appendEscaped( StringBuffer buf, char ch ) { if( ch<0x7F ) { appendByte(buf,(int)ch); return; } if( ch<0x7FF ) { appendByte(buf, 0xC0 + (ch>>6)); appendByte(buf, 0x80 + (ch%64)); return; } if( ch<0xFFFF ) { appendByte(buf, 0xE0 + (ch>>12) ); appendByte(buf, 0x80 + ((ch>>6)%64) ); appendByte(buf, 0x80 + (ch%64) ); } } /** convert one surrogate pair to UTF-8 encoding. */ private static void appendEscaped( StringBuffer buf, char ch1, char ch2 ) { int ucs = (((int)(ch1&0x3FF))<<10) + (ch2&0x3FF); appendByte(buf, 0xF0 + (ucs>>18) ); appendByte(buf, 0x80 + ((ucs>>12)%64) ); appendByte(buf, 0x80 + ((ucs>> 6)%64) ); appendByte(buf, 0x80 + (ucs%64) ); } /** * a table that indicates whether a particular character has to be * escaped or not. false indicates it has to be escaped. * this table is of length 128. */ private static final boolean[] isUric = createUricMap(); private static boolean[] createUricMap() { boolean r[] = new boolean[128]; for( int i='a'; i<='z'; i++ ) r[i] = true; for( int i='A'; i<='Z'; i++ ) r[i] = true; for( int i='0'; i<='9'; i++ ) r[i] = true; char[] mark = new char[]{'-','_','.','!','~','*','\'','(',')','#','%','[',']'}; for( int i=0; i<mark.length; i++ ) r[mark[i]] = true; char[] reserved = new char[]{';','/','?',':','@','&','=','+','$',','}; for( int i=0; i<reserved.length; i++ ) r[reserved[i]] = true; return r; } /** escape non-ASCII characters in URL */ public static String escape( String content ) { StringBuffer escaped = new StringBuffer(content.length()); for( int i=0; i<content.length(); i++ ) { char ch = content.charAt(i); if( ch<128 && isUric[ch]) escaped.append(ch); else { // escape it if( 0xD800 <= ch && ch < 0xDC00 ) // surrogate pair appendEscaped( escaped, ch, content.charAt(++i) ); else // other characters. appendEscaped( escaped, ch ); } } return new String(escaped); } final static Pattern regexp = createRegExp(); static Pattern createRegExp() { String alpha = "[a-zA-Z]"; String alphanum = "[0-9a-zA-Z]"; String hex = "[0-9a-fA-F]"; String escaped = "%"+hex+"{2}"; String mark = "[\\-_\\.!~\\*'\\(\\)]"; String unreserved = "("+alphanum +"|"+ mark+")"; String reserved = "[;/\\?:@&=\\+$,\\[\\]]"; String uric = "("+reserved+"|"+unreserved+"|"+escaped+")"; String fragment = uric+"*"; String query = uric+"*"; String pchar = "("+unreserved+"|"+escaped+"|[:@&=\\+$,])"; String param = pchar+"*"; String segment = "("+param + "(;"+param+")*)"; String pathSegments = "("+segment+"(/"+segment+")*)"; String port = "[0-9]*"; String __upTo3digits= "[0-9]{1,3}"; String IPv4address = __upTo3digits+"\\."+__upTo3digits+"\\."+__upTo3digits+"\\."+__upTo3digits; String hex4 = hex+"{1,4}"; String hexseq = hex4+"(:"+hex4+")*"; String hexpart = "(("+hexseq+"(::("+hexseq+")?)?)|(::("+hexseq+")?))"; String IPv6address = "(("+hexpart+"(:"+IPv4address+")?)|(::"+IPv4address+"))"; String IPv6reference= "\\["+IPv6address+"\\]"; String domainlabel = alphanum+"([0-9A-Za-z\\-]*"+alphanum+")?"; String toplabel = alpha+"([0-9A-Za-z\\-]*"+alphanum+")?"; String hostname = "("+domainlabel+"\\.)*"+toplabel + "(\\.)?"; String host = "(("+hostname+")|("+IPv4address+")|("+IPv6reference+"))"; String hostport = host+"(:"+port+")?"; String userinfo = "("+unreserved+"|"+escaped+"|[;:&=\\+$,])*"; String server = "(("+userinfo+"@)?"+hostport+")?"; String regName = "("+unreserved+"|"+escaped+"|[$,;:@&=\\+])+"; String authority = "(("+server+")|("+regName+"))"; String scheme = alpha+"[A-Za-z0-9\\+\\-\\.]*"; String relSegment = "("+unreserved+"|"+escaped+"|[;@&=\\+$,])+"; String absPath = "/"+pathSegments; String relPath = relSegment+"("+absPath+")?"; String netPath = "//"+authority+"("+absPath+")?"; String uricNoSlash = "("+unreserved+"|"+escaped+"|[;\\?:@&=\\+$,])"; String opaquePart = uricNoSlash+"("+uric+")*"; String hierPart = "(("+netPath+")|("+absPath+"))(\\?"+query+")?"; String path = "(("+absPath+")|("+opaquePart+"))?"; String relativeURI = "(("+netPath+")|("+absPath+")|("+relPath+"))(\\?"+query+")?"; String absoluteURI = scheme+":(("+hierPart+")|("+opaquePart+"))"; String uriRef = "("+absoluteURI+"|"+relativeURI+")?(#"+fragment+")?"; return Pattern.compile(uriRef); } public Object _createValue( final String content, ValidationContext context ) { // we can't use java.net.URL (for example, it cannot handle IPv6.) if(!regexp.matcher(escape(content)).matches()) return null; // the value space and the lexical space is the same. // escaped characters are only used for validation. return content; } public String convertToLexicalValue( Object value, SerializationContext context ) { if( value instanceof String ) return (String)value; else throw new IllegalArgumentException(); } public final int isFacetApplicable( String facetName ) { if( facetName.equals(FACET_LENGTH) || facetName.equals(FACET_MINLENGTH) || facetName.equals(FACET_MAXLENGTH) || facetName.equals(FACET_PATTERN) || facetName.equals(FACET_WHITESPACE) || facetName.equals(FACET_ENUMERATION) ) return APPLICABLE; else return NOT_ALLOWED; } public final int countLength( Object value ) { // the spec does not define this. // TODO: check the update of the spec and modify this if necessary. return UnicodeUtil.countLength( (String)value ); } public Class getJavaObjectType() { return String.class; } public XSDatatype getBaseType() { return SimpleURType.theInstance; } }