package client.net.sf.saxon.ce.value; import client.net.sf.saxon.ce.tree.util.FastStringBuffer; /** * This class provides helper methods and constants for handling whitespace */ public class Whitespace { private Whitespace() {} /** * The values PRESERVE, REPLACE, and COLLAPSE represent the three options for whitespace * normalization. They are deliberately chosen in ascending strength order; given a number * of whitespace facets, only the strongest needs to be carried out. */ public static final int PRESERVE = 0; public static final int REPLACE = 1; public static final int COLLAPSE = 2; /** * The values NONE, IGNORABLE, and ALL identify which kinds of whitespace text node * should be stripped when building a source tree. UNSPECIFIED indicates that no * particular request has been made. XSLT indicates that whitespace should be stripped * as defined by the xsl:strip-space and xsl:preserve-space declarations in the stylesheet */ public static final int NONE = 0; public static final int IGNORABLE = 1; public static final int ALL = 2; public static final int UNSPECIFIED = 3; public static final int XSLT = 4; /** * Test whether a character is whitespace * @param ch the character (Unicode codepoint) to be tested * @return true if the character is one of tab, newline, carriage return, or space */ public static boolean isWhitespace(int ch) { switch (ch) { case 9: case 10: case 13: case 32: return true; default: return false; } } /** * Remove all whitespace characters from a string * @param value the string from which whitespace is to be removed * @return the string without its whitespace. This may be the original value * if it contained no whitespace */ public static CharSequence removeAllWhitespace(CharSequence value) { if (containsWhitespace(value)) { FastStringBuffer sb = new FastStringBuffer(value.length()); for (int i=0; i<value.length(); i++) { char c = value.charAt(i); if (c > 32 || !C0WHITE[c]) { sb.append(c); } } return sb; } else { return value; } } /** * Remove leading whitespace characters from a string * @param value the string whose leading whitespace is to be removed * @return the string with leading whitespace removed. This may be the * original string if there was no leading whitespace */ public static CharSequence removeLeadingWhitespace(CharSequence value) { int start = -1; final int len = value.length(); for (int i=0; i<len; i++) { char c = value.charAt(i); if (c > 32 || !C0WHITE[c]) { start = i; break; } } if (start == 0) { return value; } else if (start < 0 || start == len - 1) { return ""; } else { return value.subSequence(start, len); } } /** * Determine if a string contains any whitespace * @param value the string to be tested * @return true if the string contains a character that is XML whitespace, that is * tab, newline, carriage return, or space */ public static boolean containsWhitespace(CharSequence value) { final int len = value.length(); for (int i=0; i<len; ) { char c = value.charAt(i++); if (c <= 32 && C0WHITE[c]) { return true; } } return false; } /** * Determine if a string is all-whitespace * * @param content the string to be tested * @return true if the supplied string contains no non-whitespace * characters */ public static boolean isWhite(CharSequence content) { final int len = content.length(); for (int i=0; i<len;) { // all valid XML 1.0 whitespace characters, and only whitespace characters, are <= 0x20 // But XML 1.1 allows non-white characters that are also < 0x20, so we need a specific test for these char c = content.charAt(i++); if (c > 32 || !C0WHITE[c]) { return false; } } return true; } private static boolean[] C0WHITE = { false, false, false, false, false, false, false, false, // 0-7 false, true, true, false, false, true, false, false, // 8-15 false, false, false, false, false, false, false, false, // 16-23 false, false, false, false, false, false, false, false, // 24-31 true // 32 }; /** * Collapse whitespace as defined in XML Schema. This is equivalent to the * XPath normalize-space() function * @param in the string whose whitespace is to be collapsed * @return the string with any leading or trailing whitespace removed, and any * internal sequence of whitespace characters replaced with a single space character. */ public static CharSequence collapseWhitespace(CharSequence in) { int len = in.length(); if (len==0 || !containsWhitespace(in)) { return in; } FastStringBuffer sb = new FastStringBuffer(len); boolean inWhitespace = true; int i = 0; for (; i<len; i++) { char c = in.charAt(i); switch (c) { case '\n': case '\r': case '\t': case ' ': if (inWhitespace) { // remove the whitespace } else { sb.append(' '); inWhitespace = true; } break; default: sb.append(c); inWhitespace = false; break; } } int nlen = sb.length(); if (nlen>0 && sb.charAt(nlen-1)==' ') { sb.setLength(nlen-1); } return sb; } /** * Remove leading and trailing whitespace. This has the same effect as collapseWhitespace, * but is cheaper, for use by data types that do not allow internal whitespace. * @param in the input string whose whitespace is to be removed * @return the result of removing excess whitespace */ public static CharSequence trimWhitespace(CharSequence in) { if (in.length()==0) { return in; } int first = 0; int last = in.length()-1; while (true) { final char x = in.charAt(first); if (x > 32 || !C0WHITE[x]) { break; } if (first++ >= last) { return ""; } } while (true) { final char x = in.charAt(last); if (x > 32 || !C0WHITE[x]) { break; } last--; } if (first == 0 && last == in.length()-1) { return in; } else { return in.subSequence(first, last+1); } } /** * Trim leading and trailing whitespace from a string, returning a string. * This differs from the Java trim() method in that the only characters treated as * whitespace are space, \n, \r, and \t. The String#trim() method removes all C0 * control characters (which is not the same thing under XML 1.1). * @param s the string to be trimmed. If null is supplied, null is returned. * @return the string with leading and trailing whitespace removed. */ public static String trim(CharSequence s) { if (s == null) { return null; } return trimWhitespace(s).toString(); } } // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.