/** * Copyright 2011 Alexandre Dutra * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * */ package fr.dutra.confluence2wordpress.util; /** * @author Alexandre Dutra * */ public class StringUtils { /** * Determines whether a text contains only whitespace, according to three different definitions: * - Java * - Unicode (category Z) * - W3C / HTML 4 spec * @see "http://stackoverflow.com/questions/1822772/java-regular-expression-to-match-all-whitespace-characters" * @see "http://www.w3.org/TR/html401/struct/text.html#h-9.1" * @param text * @return */ public static boolean isWhitespace(CharSequence text){ if(text == null || text.length() == 0) return true; for(int i = 0; i < text.length(); i++) { char c = text.charAt(i); switch(c) { // Java Whitespace case '\u0009':// HORIZONTAL TABULATION. case '\n': // LINE FEED. case '\u000B':// VERTICAL TABULATION. case '\u000C':// FORM FEED. case '\r': // CARRIAGE RETURN. case '\u001C':// FILE SEPARATOR. case '\u001D':// GROUP SEPARATOR. case '\u001E':// RECORD SEPARATOR. case '\u001F':// UNIT SEPARATOR. //Unicode Zs case '\u0020': // SPACE case '\u00A0': // NO-BREAK SPACE case '\u1680': // OGHAM SPACE MARK case '\u180E': // MONGOLIAN VOWEL SEPARATOR ᠎ case '\u2000': // EN QUAD   case '\u2001': // EM QUAD   case '\u2002': // EN SPACE   case '\u2003': // EM SPACE   case '\u2004': // THREE-PER-EM SPACE   case '\u2005': // FOUR-PER-EM SPACE   case '\u2006': // SIX-PER-EM SPACE   case '\u2007': // FIGURE SPACE   case '\u2008': // PUNCTUATION SPACE   case '\u2009': // THIN SPACE   case '\u200A': // HAIR SPACE   case '\u202F': // NARROW NO-BREAK SPACE   case '\u205F': // MEDIUM MATHEMATICAL SPACE   case '\u3000': // IDEOGRAPHIC SPACE //Unicode Zl case '\u2028': // LINE SEPARATOR //Unicode Zp case '\u2029': // PARAGRAPH SEPARATOR //HTML whitespace case '\u200B': //Zero-width space (​) break; default: return false; } } //this is equivalent: //return text.matches("[\\p{Z}\\p{javaWhitespace}case \u200B]+"); return true; } }