/* * eXist Open Source Native XML Database * Copyright (C) 2001-2007 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * $Id$ */ package org.exist.xquery.value; import java.text.Collator; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.exist.dom.QName; import org.exist.util.Collations; import org.exist.util.UTF8; import org.exist.util.XMLChar; import org.exist.xquery.Constants; import org.exist.xquery.XPathException; public class StringValue extends AtomicValue { public final static StringValue EMPTY_STRING = new StringValue(""); private final static String langRegex = //http://www.w3.org/TR/xmlschema-2/#language //The lexical space of language is the set of all strings that conform //to the pattern [a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})* . "[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*"; //Old definition : not sure where it comes from //"/(([a-z]|[A-Z])([a-z]|[A-Z])|" // ISO639Code //+ "([iI]-([a-z]|[A-Z])+)|" // IanaCode //+ "([xX]-([a-z]|[A-Z])+))" // UserCode //+ "(-([a-z]|[A-Z])+)*/"; // Subcode" private final static Pattern langPattern = Pattern.compile(langRegex); protected int type = Type.STRING; protected String value; public StringValue(String string, int type) throws XPathException { this(string, type, true); } public StringValue(String string, int type, boolean expand) throws XPathException { this.type = type; if (expand) string = StringValue.expand(string); //Should we have character entities if(type == Type.STRING) this.value = string; else if(type == Type.NORMALIZED_STRING) this.value = normalizeWhitespace(string); else { this.value = collapseWhitespace(string); checkType(); } } public StringValue(String string) { //string = StringValue.expand(string); //Should we have character entities value = string; } public StringValue expand() throws XPathException { value = expand(value); return this; } private void checkType() throws XPathException { switch(type) { case Type.NORMALIZED_STRING: case Type.TOKEN: return; case Type.LANGUAGE: Matcher matcher = langPattern.matcher(value); if (!matcher.matches()) throw new XPathException( "Type error: string " + value + " is not valid for type xs:language"); return; case Type.NAME: if(!QName.isQName(value)) throw new XPathException("Type error: string " + value + " is not a valid xs:Name"); return; case Type.NCNAME: case Type.ID: case Type.IDREF: case Type.ENTITY: if(!XMLChar.isValidNCName(value)) throw new XPathException("Type error: string " + value + " is not a valid " + Type.getTypeName(type)); case Type.NMTOKEN: if(!XMLChar.isValidNmtoken(value)) throw new XPathException("Type error: string " + value + " is not a valid xs:NMTOKEN"); } } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#getType() */ public int getType() { return type; } /* (non-Javadoc) * @see org.exist.xquery.value.Item#getStringValue() */ public String getStringValue() { return getStringValue(false); } /* (non-Javadoc) * @see org.exist.xquery.value.Item#getStringValue() */ public String getStringValue(boolean bmpCheck) { if (bmpCheck) { StringBuilder buf = new StringBuilder(value.length()); char ch; for (int i = 0; i < value.length(); i++) { ch = value.charAt(i); if (XMLChar.isSurrogate(ch)) { // Compose supplemental from high and low surrogate int suppChar = XMLChar.supplemental(ch, value.charAt(++i)); buf.append("&#"); buf.append(Integer.toString(suppChar)); buf.append(";"); } else { buf.append(ch); } } return buf.toString(); } else { return value; } } public Item itemAt(int pos) { return pos == 0 ? this : null; } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#convertTo(int) */ public AtomicValue convertTo(int requiredType) throws XPathException { switch (requiredType) { //TODO : should we allow these 2 type under-promotions ? case Type.ATOMIC : case Type.ITEM : case Type.STRING : return this; case Type.NORMALIZED_STRING: case Type.TOKEN: case Type.LANGUAGE: case Type.NMTOKEN: case Type.NAME: case Type.NCNAME: case Type.ID: case Type.IDREF: case Type.ENTITY: return new StringValue(value, requiredType); case Type.ANY_URI : return new AnyURIValue(value); case Type.BOOLEAN : String trimmed = trimWhitespace(value); if (trimmed.equals("0") || trimmed.equals("false")) return BooleanValue.FALSE; else if (trimmed.equals("1") || trimmed.equals("true")) return BooleanValue.TRUE; else throw new XPathException( "cannot convert string '" + value + "' to boolean"); case Type.FLOAT : return new FloatValue(value); case Type.DOUBLE : case Type.NUMBER : return new DoubleValue(this); case Type.DECIMAL : return new DecimalValue(value); case Type.INTEGER : case Type.NON_POSITIVE_INTEGER : case Type.NEGATIVE_INTEGER : case Type.POSITIVE_INTEGER : case Type.LONG : case Type.INT : case Type.SHORT : case Type.BYTE : case Type.NON_NEGATIVE_INTEGER : case Type.UNSIGNED_LONG : case Type.UNSIGNED_INT : case Type.UNSIGNED_SHORT : case Type.UNSIGNED_BYTE : return new IntegerValue(value, requiredType); case Type.BASE64_BINARY : return new Base64Binary(value); case Type.HEX_BINARY : return new HexBinary(value); case Type.DATE_TIME : return new DateTimeValue(value); case Type.TIME : return new TimeValue(value); case Type.DATE : return new DateValue(value); case Type.DURATION : return new DurationValue(value); case Type.YEAR_MONTH_DURATION : return new YearMonthDurationValue(value); case Type.DAY_TIME_DURATION : return new DayTimeDurationValue(value); case Type.GYEAR : return new GYearValue(value); case Type.GMONTH : return new GMonthValue(value); case Type.GDAY : return new GDayValue(value); case Type.GYEARMONTH : return new GYearMonthValue(value); case Type.GMONTHDAY : return new GMonthDayValue(value); case Type.UNTYPED_ATOMIC : return new UntypedAtomicValue(getStringValue()); case Type.QNAME : return new QNameValue(null, new QName(value)); default : throw new XPathException("FORG0001: cannot cast '" + Type.getTypeName(this.getItemType()) + "(\"" + getStringValue() + "\")' to " + Type.getTypeName(requiredType)); } } public int conversionPreference(Class javaClass) { if(javaClass.isAssignableFrom(StringValue.class)) return 0; if(javaClass == String.class || javaClass == CharSequence.class) return 1; if(javaClass == Character.class || javaClass == char.class) return 2; if(javaClass == Double.class || javaClass == double.class) return 10; if(javaClass == Float.class || javaClass == float.class) return 11; if(javaClass == Long.class || javaClass == long.class) return 12; if(javaClass == Integer.class || javaClass == int.class) return 13; if(javaClass == Short.class || javaClass == short.class) return 14; if(javaClass == Byte.class || javaClass == byte.class) return 15; if(javaClass == Boolean.class || javaClass == boolean.class) return 16; if(javaClass == Object.class) return 20; return Integer.MAX_VALUE; } public Object toJavaObject(Class target) throws XPathException { if(target.isAssignableFrom(StringValue.class)) return this; else if(target == Object.class || target == String.class || target == CharSequence.class) return value; else if(target == double.class || target == Double.class) { DoubleValue v = (DoubleValue)convertTo(Type.DOUBLE); return new Double(v.getValue()); } else if(target == float.class || target == Float.class) { FloatValue v = (FloatValue)convertTo(Type.FLOAT); return new Float(v.value); } else if(target == long.class || target == Long.class) { IntegerValue v = (IntegerValue)convertTo(Type.LONG); return new Long(v.getInt()); } else if(target == int.class || target == Integer.class) { IntegerValue v = (IntegerValue)convertTo(Type.INT); return new Integer(v.getInt()); } else if(target == short.class || target == Short.class) { IntegerValue v = (IntegerValue)convertTo(Type.SHORT); return new Short((short)v.getInt()); } else if(target == byte.class || target == Byte.class) { IntegerValue v = (IntegerValue)convertTo(Type.BYTE); return new Byte((byte)v.getInt()); } else if(target == boolean.class || target == Boolean.class) { return Boolean.valueOf(effectiveBooleanValue()); } else if(target == char.class || target == Character.class) { if(value.length() > 1 || value.length() == 0) throw new XPathException("cannot convert string with length = 0 or length > 1 to Java character"); return new Character(value.charAt(0)); } throw new XPathException("cannot convert value of type " + Type.getTypeName(type) + " to Java object of type " + target.getName()); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#compareTo(int, org.exist.xquery.value.AtomicValue) */ public boolean compareTo(Collator collator, int operator, AtomicValue other) throws XPathException { if (other.isEmpty()) return false; //A value of type xs:anyURI (or any type derived by restriction from xs:anyURI) //can be promoted to the type xs:string. //The result of this promotion is created by casting the original value to the type xs:string. if (Type.subTypeOf(other.getType(), Type.ANY_URI)) other = other.convertTo(Type.STRING); if (Type.subTypeOf(other.getType(), Type.STRING)) { int cmp = Collations.compare(collator, value, other.getStringValue()); switch (operator) { case Constants.EQ : return cmp == 0; case Constants.NEQ : return cmp != 0; case Constants.LT : return cmp < 0; case Constants.LTEQ : return cmp <= 0; case Constants.GT : return cmp > 0; case Constants.GTEQ : return cmp >= 0; default : throw new XPathException("Type error: cannot apply operand to string value"); } } throw new XPathException( "XPTY0004: can not compare xs:string('" + value + "') with " + Type.getTypeName(other.getType()) + "('" + other.getStringValue() + "')"); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#compareTo(org.exist.xquery.value.AtomicValue) */ public int compareTo(Collator collator, AtomicValue other) throws XPathException { if (Type.subTypeOf(other.getType(),Type.NUMBER)) { //No possible comparisons if (((NumericValue)other).isNaN()) return Constants.INFERIOR; if (((NumericValue)other).isInfinite()) return Constants.INFERIOR; } return Collations.compare(collator, value, other.getStringValue()); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#startsWith(org.exist.xquery.value.AtomicValue) */ public boolean startsWith(Collator collator, AtomicValue other) throws XPathException { return Collations.startsWith(collator, value, other.getStringValue()); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#endsWith(org.exist.xquery.value.AtomicValue) */ public boolean endsWith(Collator collator, AtomicValue other) throws XPathException { return Collations.endsWith(collator, value, other.getStringValue()); } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#contains(org.exist.xquery.value.AtomicValue) */ public boolean contains(Collator collator, AtomicValue other) throws XPathException { return Collations.indexOf(collator, value, other.getStringValue()) != Constants.STRING_NOT_FOUND; } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#effectiveBooleanValue() */ public boolean effectiveBooleanValue() throws XPathException { // If its operand is a singleton value of type xs:string, xs:anyURI, xs:untypedAtomic, //or a type derived from one of these, fn:boolean returns false if the operand value has zero length; otherwise it returns true. return value.length() > 0; } /* (non-Javadoc) * @see java.lang.Object#toString() */ public String toString() { return value; } public final static String normalizeWhitespace(CharSequence seq) { if (seq == null) return ""; StringBuilder copy = new StringBuilder(seq.length()); char ch; for (int i = 0; i < seq.length(); i++) { ch = seq.charAt(i); switch (ch) { case '\n' : case '\r' : case '\t' : copy.append(' '); break; default : copy.append(ch); } } return copy.toString(); } /** * Collapses all sequences of adjacent whitespace chars in the input string * into a single space. * * @param in */ public static String collapseWhitespace(CharSequence in) { if (in == null) return ""; if (in.length() == 0) { return in.toString(); } int i = 0; // this method is performance critical, so first test if we need to collapse at all for (; i < in.length(); i++) { char c = in.charAt(i); if(XMLChar.isSpace(c)) { if(i + 1 < in.length() && XMLChar.isSpace(in.charAt(i + 1))) break; } } if(i == in.length()) // no whitespace to collapse, just return return in.toString(); // start to collapse whitespace StringBuilder sb = new StringBuilder(in.length()); sb.append(in.subSequence(0, i + 1).toString()); boolean inWhitespace = true; for (; i < in.length(); i++) { char c = in.charAt(i); if(XMLChar.isSpace(c)) { if (inWhitespace) { // remove the whitespace } else { sb.append(' '); inWhitespace = true; } } else { sb.append(c); inWhitespace = false; } } if (sb.charAt(sb.length() - 1) == ' ') { sb.deleteCharAt(sb.length() - 1); } return sb.toString(); } public final static String trimWhitespace(String in) { if (in == null) { return ""; } if (in.length()==0) { return in; } int first = 0; int last = in.length() - 1; while (in.charAt(first) <= 0x20) { if (first++ >= last) { return ""; } } while (in.charAt(last) <= 0x20) { last--; } return in.substring(first, last+1); } public final static String expand(CharSequence seq) throws XPathException { if (seq == null) return ""; StringBuilder buf = new StringBuilder(seq.length()); StringBuilder entityRef = null; char ch; for (int i = 0; i < seq.length(); i++) { ch = seq.charAt(i); switch (ch) { case '&' : if (entityRef == null) entityRef = new StringBuilder(); else entityRef.setLength(0); if ((i+1)==seq.length()) { throw new XPathException("XPST0003 : Ampersands (&) must be escaped."); } if ((i+2)==seq.length()) { throw new XPathException("XPST0003 : Ampersands (&) must be escaped (missing ;)."); } ch = seq.charAt(i+1); if (ch!='#') { if (!Character.isLetter(ch)) { throw new XPathException("XPST0003 : Ampersands (&) must be escaped (following character was not a name start character)."); } entityRef.append(ch); boolean found = false; for (int j = i + 2; j < seq.length(); j++) { ch = seq.charAt(j); if (ch != ';' && (ch=='.' || ch=='_' || ch=='-' || Character.isLetterOrDigit(ch))) { entityRef.append(ch); } else if (ch==';') { found = true; i = j; break; } else { break; } } if (found) { buf.append((char) expandEntity(entityRef.toString())); } else { throw new XPathException("XPST0003 : Invalid character in entity name ("+ch+") or missing ;"); } } else { entityRef.append(ch); ch = seq.charAt(i+2); boolean found = false; if (ch=='x') { entityRef.append(ch); // hex number for (int j = i + 3; j < seq.length(); j++) { ch = seq.charAt(j); if (ch != ';' && (ch=='0' || ch=='1' || ch=='2' || ch=='3' || ch=='4' || ch=='5' || ch=='6' || ch=='7' || ch=='8' || ch=='9' || ch=='a' || ch=='b' || ch=='c' || ch=='d' || ch=='e' || ch=='f' || ch=='A' || ch=='B' || ch=='C' || ch=='D' || ch=='E' || ch=='F')) { entityRef.append(ch); } else if (ch==';') { found = true; i = j; break; } else { break; } } } else { // decimal number for (int j = i + 2; j < seq.length(); j++) { ch = seq.charAt(j); if (ch != ';' && (ch=='0' || ch=='1' || ch=='2' || ch=='3' || ch=='4' || ch=='5' || ch=='6' || ch=='7' || ch=='8' || ch=='9')) { entityRef.append(ch); } else if (ch==';') { found = true; i = j; break; } else { break; } } } if (found) { int charref = expandEntity(entityRef.toString()); if (XMLChar.isSupplemental(charref)) { buf.append(XMLChar.highSurrogate(charref)); buf.append(XMLChar.lowSurrogate(charref)); } else { buf.append((char) charref); } } else { throw new XPathException("XPST0003 : Invalid character in character reference ("+ch+") or missing ;"); } } break; case '\r': // drop carriage returns if ((i+1)!=seq.length()) { ch = seq.charAt(i+1); if (ch!='\n') { buf.append('\n'); } } break; default : buf.append(ch); } } return buf.toString(); } /** * The method <code>expandEntity</code> * * @param buf a <code>String</code> value * @return an <code>int</code> value * @exception XPathException if an error occurs */ private final static int expandEntity(String buf) throws XPathException { if (buf.equals("amp")) return '&'; else if (buf.equals("lt")) return '<'; else if (buf.equals("gt")) return '>'; else if (buf.equals("quot")) return '"'; else if (buf.equals("apos")) return '\''; else if (buf.length() > 1 && buf.charAt(0) == '#') { return expandCharRef(buf.substring(1)); } else throw new XPathException("Unknown entity reference: " + buf); } /** * The method <code>expandCharRef</code> * * @param buf a <code>String</code> value * @return an <code>int</code> value * @exception XPathException if an error occurs */ private final static int expandCharRef(String buf) throws XPathException { try { int charNumber; if (buf.length() > 1 && buf.charAt(0) == 'x') { // Hex charNumber = Integer.parseInt(buf.substring(1), 16); } else { charNumber = Integer.parseInt(buf); } if (charNumber==0) { throw new XPathException("XQST0090 : Character number zero (0) is not allowed."); } return charNumber; } catch (NumberFormatException e) { throw new XPathException("Unknown character reference: " + buf); } } /* (non-Javadoc) * @see org.exist.xquery.value.AtomicValue#max(org.exist.xquery.value.AtomicValue) */ public AtomicValue max(Collator collator, AtomicValue other) throws XPathException { if (Type.subTypeOf(other.getType(), Type.STRING)) return Collations.compare(collator, value, ((StringValue) other).value) > 0 ? this : other; else return Collations.compare(collator, value, ((StringValue) other.convertTo(getType())).value) > 0 ? this : other; } public AtomicValue min(Collator collator, AtomicValue other) throws XPathException { if (Type.subTypeOf(other.getType(), Type.STRING)) return Collations.compare(collator, value, ((StringValue) other).value) < 0 ? this : other; else return Collations.compare(collator, value, ((StringValue) other.convertTo(getType())).value) < 0 ? this : other; } /* (non-Javadoc) * @see java.lang.Comparable#compareTo(java.lang.Object) */ public int compareTo(Object o) { AtomicValue other = (AtomicValue)o; // if(Type.subTypeOf(other.getType(), Type.STRING)) if (getType() == other.getType()) return value.compareTo(((StringValue)other).value); else return getType() > other.getType() ? 1 : -1; } /** Serialize for the persistant storage * @param offset * */ public byte[] serializeValue( int offset, boolean caseSensitive) { final String val = caseSensitive ? value : value.toLowerCase(); final byte[] data = new byte[ offset + 1 + UTF8.encoded(val) ]; data[offset] = (byte) type; // TODO: cast to byte is not safe UTF8.encode(val, data, offset+1); return data; } }