package client.net.sf.saxon.ce.om; import client.net.sf.saxon.ce.expr.z.IntRangeSet; import client.net.sf.saxon.ce.regex.GeneralUnicodeString; import client.net.sf.saxon.ce.regex.UnicodeString; import client.net.sf.saxon.ce.trans.Err; import client.net.sf.saxon.ce.trans.XPathException; /** * NameChecker is a utility class containing static methods to perform validation and analysis of XML names, * as defined in XML 1.1 or XML 1.0 5th edition. * The class also handles validation of characters against the XML 1.1 rules. */ public abstract class NameChecker { /** * Validate whether a given string constitutes a valid QName, as defined in XML Namespaces. * Note that this does not test whether the prefix is actually declared. * * @param name the name to be tested * @return true if the name is a lexically-valid QName */ public static boolean isQName(String name) { int colon = name.indexOf(':'); if (colon < 0) { return isValidNCName(name); } return colon != 0 && colon != name.length() - 1 && isValidNCName(name.substring(0, colon)) && isValidNCName(name.substring(colon + 1)); } /** * Extract the prefix from a QName. Note, the QName is assumed to be valid. * * @param qname The lexical QName whose prefix is required * @return the prefix, that is the part before the colon. Returns an empty * string if there is no prefix */ public static String getPrefix(String qname) { int colon = qname.indexOf(':'); if (colon < 0) { return ""; } return qname.substring(0, colon); } /** * Validate a QName, and return the prefix and local name. The local name is checked * to ensure it is a valid NCName. The prefix is not checked, on the theory that the caller * will look up the prefix to find a URI, and if the prefix is invalid, then no URI will * be found. * * @param qname the lexical QName whose parts are required. Note that leading and trailing * whitespace is not permitted * @return an array of two strings, the prefix and the local name. The first * item is a zero-length string if there is no prefix. * @throws QNameException if not a valid QName. */ public static String[] getQNameParts(CharSequence qname) throws QNameException { String[] parts = new String[2]; int colon = -1; int len = qname.length(); for (int i = 0; i < len; i++) { if (qname.charAt(i) == ':') { colon = i; break; } } if (colon < 0) { parts[0] = ""; parts[1] = qname.toString(); if (!isValidNCName(parts[1])) { throw new QNameException("Invalid QName " + Err.wrap(qname)); } } else { if (colon == 0) { throw new QNameException("QName cannot start with colon: " + Err.wrap(qname)); } if (colon == len - 1) { throw new QNameException("QName cannot end with colon: " + Err.wrap(qname)); } parts[0] = qname.subSequence(0, colon).toString(); parts[1] = qname.subSequence(colon + 1, len).toString(); if (!isValidNCName(parts[1])) { if (!isValidNCName(parts[0])) { throw new QNameException("Both the prefix " + Err.wrap(parts[0]) + " and the local part " + Err.wrap(parts[1]) + " are invalid"); } throw new QNameException("Invalid QName local part " + Err.wrap(parts[1])); } } return parts; } /** * Validate a QName, and return the prefix and local name. Both parts are checked * to ensure they are valid NCNames. * <p/> * <p><i>Used from compiled code</i></p> * * @param qname the lexical QName whose parts are required. Note that leading and trailing * whitespace is not permitted * @return an array of two strings, the prefix and the local name. The first * item is a zero-length string if there is no prefix. * @throws XPathException if not a valid QName. */ public static String[] checkQNameParts(CharSequence qname) throws XPathException { try { String[] parts = getQNameParts(qname); if (parts[0].length() > 0 && !isValidNCName(parts[0])) { throw new XPathException("Invalid QName prefix " + Err.wrap(parts[0])); } return parts; } catch (QNameException e) { XPathException err = new XPathException(e.getMessage()); err.setErrorCode("FORG0001"); throw err; } } // Both XML 1.0e5 and XML1.1e2 have // [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | // [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | // [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] // [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] // XML Namespaces removes the ":" option // In the regexes below, instead of matching all non-BMP characters, we match all UCS-16 codepoints in the surrogate // pair range #xD800-#xDFFF. This allows through the private use characters in planes 15 and 16 (above xEFFFF), but // we can live with that. private static String ncNameStartChar = "[A-Za-z_\\xd8-\\xf6\\xf8-\\u02ff\\u0370-\\u037f-\\u1fff\\u200c\\u200d" + "\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\udfff\\uf900-\\ufdcf\\ufdf0-\\ufffd]"; private static int[] nameStartRangeStartPoints = { 'A', '_', 'a', 0xc0, 0xd8, 0xf8, 0x370, 0x37f, 0x200c, 0x2070, 0x2c00, 0x3001, 0xf900, 0xfdf0, 0x10000 }; private static int[] nameStartRangeEndPoints = { 'Z', '_', 'z', 0xd6, 0xf6, 0x2ff, 0x37d, 0x1fff, 0x200d, 0x218f, 0x2fef, 0xd7ff, 0xfdcf, 0xfffd, 0xeffff }; private static IntRangeSet ncNameStartChars = new IntRangeSet(nameStartRangeStartPoints, nameStartRangeEndPoints); private static int[] nameRangeStartPoints = { '-', '.', '0', 0xb7, 0x300, 0x203f }; private static int[] nameRangeEndPoints = { '-', '.', '9', 0xb7, 0x36f, 0x2040 }; private static IntRangeSet ncNameChars = new IntRangeSet(nameRangeStartPoints, nameRangeEndPoints); // private static String ncNameChar = "[A-Za-z0-9_\\-\\.\\xb7\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d" + // "\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\udfff\\uf900-\\ufdcf\\ufdf0-\\ufffd]"; public static boolean isNCNameStartChar(int c) { return ncNameStartChars.contains(c); } public static boolean isNCNameChar(int c) { return ncNameStartChars.contains(c) || ncNameChars.contains(c); } //private static RegExp ncNamePattern = RegExp.compile("^" + ncNameStartChar + ncNameChar + "*$"); /** * Validate whether a given string constitutes a valid NCName, as defined in XML Namespaces. * * @param ncName the name to be tested. Any whitespace trimming must have already been applied. * @return true if the name is a lexically-valid QName */ public static boolean isValidNCName(CharSequence ncName) { int len = ncName.length(); if (len==0) { return false; } UnicodeString us = GeneralUnicodeString.makeUnicodeString(ncName); if (!isNCNameStartChar(us.charAt(0))) { return false; } for (int i=1; i<len; i++) { if (!isNCNameChar(us.charAt(i))) { return false; } } return true; } /** * Test whether a character is a valid XML character * * @param ch the character to be tested * @return true if this is a valid character in XML 1.1 */ public static boolean isValidChar(int ch) { // from XML 1.0 fifth edition // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // from XML 1.1 second edition // Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] return (ch >= 1 && ch <= 0xd7ff) || (ch >= 0xe000 && ch <= 0xfffd) || (ch >= 0x10000 && ch <= 0x10ffff); } } // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.