/* * Copyright 2006-2010 Daniel Henninger. All rights reserved. * * This software is published under the terms of the GNU Public License (GPL), * a copy of which is included in this distribution. */ package net.sf.kraken.util; import java.util.List; import java.util.regex.Pattern; /** * A simple class to perform various string related functions. * * @author Daniel Henninger */ public class StringUtils { /** * Convenience routine to perform a string join for groups in the database. * @param array Array of strings to join together. * @param delim Delimiter to separate strings with. * @return Joined string */ public static String join( List<String> array, String delim ) { StringBuffer sb = join(array, delim, new StringBuffer()); return sb.toString(); } /** * Helper function for primary use join function. * @param array Array of strings to join together. * @param delim Delimiter to separate strings with. * @param sb String buffer instance to work from. * @return String buffer instance. */ static StringBuffer join( List<String> array, String delim, StringBuffer sb ) { Boolean first = true; for (String s : array) { if (!first) { sb.append(delim); } else { first = false; } sb.append(s); } return sb; } /** * Regular Expressions */ /* HTML looking tags */ private static final Pattern htmlRE = Pattern.compile("<[^>]*>"); /* Newlines */ private static final Pattern newlineRE = Pattern.compile("<br/?>", Pattern.CASE_INSENSITIVE); /** * Strips HTML tags fairly loosely, trusting that html tags will look like * <whatever>. Before stripping these tags, it tries to convert known tags * to text versions, such as newlines. * * @param str the string from which to strip HTML tags * @return the given string with HTML tags removed */ public static String convertFromHtml(String str) { str = newlineRE.matcher(str).replaceAll("\\\n"); str = htmlRE.matcher(str).replaceAll(""); str = org.jivesoftware.util.StringUtils.unescapeFromXML(str); return str; } /** * This method ensures that the output String has only valid XML unicode characters as specified by the * XML 1.0 standard. For reference, please see the * standard. This method will return an empty String if the input is null or empty. * * @author Donoiu Cristian, GPL * @param s The String whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public static String removeInvalidXMLCharacters(String s) { if (s == null) { return ""; } StringBuilder out = new StringBuilder(); // Used to hold the output. int codePoint; // Used to reference the current character. int i=0; while(i<s.length()) { codePoint = s.codePointAt(i); // This is the unicode code of the character. if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed. (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { out.append(Character.toChars(codePoint)); } i+= Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char. } return out.toString(); } }