package org.toobs.framework.util.string; import java.text.MessageFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.htmlparser.Parser; import org.htmlparser.beans.StringBean; import org.htmlparser.util.ParserException; public class StringResource { /** logger component */ private static Log log = LogFactory.getLog(StringResource.class); /** * Function extention for decoding an xml escaped string. Replaces * < > " etc. with actual characters. * * @exception XMLTransformerException if a Transform Exception Occured. * @return String */ public static String stripTags(String str, Boolean includeLinks ) { String decodedString = convertHTMLtoString(str); String strippedString = ""; Parser parser = Parser.createParser(decodedString, null); StringBean sb = new StringBean (); sb.setLinks (includeLinks); try { parser.visitAllNodesWith (sb); strippedString = sb.getStrings (); } catch (ParserException e) { log.error("Could not parse html:" + decodedString); } return strippedString; } /** * Converts an HTML string to a readable string * * @param str is the string to convert * * @return is the converted string */ public static String convertHTMLtoString(String str) { StringBuffer strMutable = new StringBuffer(); int len = (str != null) ? str.length() : 0; int i=0; while (i<len) { char ch = str.charAt(i); if (ch == '\'') { strMutable.append('\''); i++; } else if (ch == '&') { if ((i+1)<len && str.charAt(i+1) == '#') { int poundIndex = str.indexOf("#", i); int endIndex = str.indexOf(";", poundIndex); if (endIndex != -1) { //get only numeric portion of html escaped values and get its char equivalent String strNumericPortion = str.substring(poundIndex + 1, endIndex); Integer intNumericPortion = new Integer(strNumericPortion); //char convertedChar = Character.forDigit(intNumericPortion.intValue(), 10); char convertedChar = (char) intNumericPortion.intValue(); if (convertedChar == '\'') { strMutable.append('\''); } else strMutable.append(convertedChar); i = ++endIndex; } else { strMutable.append('&'); strMutable.append('#'); i++;i++; } } else { int endIndex = str.indexOf(";", i); if (endIndex != -1) { String code = str.substring(i+1,endIndex); if (code.equalsIgnoreCase("amp")) { strMutable.append('&'); } else if (code.equalsIgnoreCase("lt")) { strMutable.append('<'); } else if (code.equalsIgnoreCase("gt")) { strMutable.append('>'); } else if (code.equalsIgnoreCase("quot")) { strMutable.append('"'); } else if (code.equalsIgnoreCase("apos")) { strMutable.append('\''); } else if (code.equalsIgnoreCase("nbsp")) { strMutable.append(' '); } i = ++endIndex; } else { strMutable.append('&'); i++; } } } else { strMutable.append(ch); i++; } } return strMutable.toString(); } /** * Extends a string to HTML, but also takes care that the string can be placed as javascript * @param s is the string to be converted * @param strUseJS to be converted for javascript * @return the converted string */ public static String extendedAsHTML(String s, String strUseJS) { StringBuffer str = new StringBuffer(); int len = (s != null) ? s.length() : 0; // In the case where the useJS flag is one, we cannot turn special chars into #format // And we have to escape and double single quotes if (strUseJS.compareToIgnoreCase("TRUE") == 0) { for (int i = 0; i < len; i++) { char ch = s.charAt(i); if (ch == '\'') { str.append('\\'); str.append('\''); } else if (ch == '"') { str.append('\\'); } str.append(ch); } } else { for (int i = 0; i < len; i++) { char ch = s.charAt(i); if (ch == '\'') { str.append('\''); } //if (Character.UnicodeBlock.of(ch).equals(Character.UnicodeBlock.BASIC_LATIN)) { str.append(ch); //} else { // str.append("&#"); // str.append(Integer.toString((int) ch)); // str.append(';'); //} } } return str.toString(); } /** * search string for double quotes, and insert backslashes before them * * @param str input string * * @return processed result */ public static String protectQuotes(String str) { StringBuffer result = new StringBuffer(str.length()); int len = (str != null) ? str.length() : 0; for (int i = 0; i < len; ++i) { char ch = str.charAt(i); if (ch == '"') { result.append("\\\""); } else { result.append(ch); } } return result.toString(); } /** * search string for non HTML attribute characters * @param s input string * @return processed result */ public static String formatHTMLAttribute(String s) { StringBuffer str = new StringBuffer(); int len = (s != null) ? s.length() : 0; for (int i = 0; i < len; i++ ) { char ch = s.charAt(i); switch ( ch ) { case '<': { str.append("<"); break; } case '>': { str.append(">"); break; } case '&': { str.append("&"); break; } case '"': { str.append("""); break; } default: { // if (Character.UnicodeBlock.of(ch).equals(Character.UnicodeBlock.BASIC_LATIN)) { str.append(ch); // } // else { // str.append("&#"); // str.append(Integer.toString((int)ch)); // str.append(';'); // } } } } return str.toString(); } /** * Converts an HTML string to a readable string * * @param str is the string to convert * * @return is the converted string */ public static String formatHTMLAttributetoString(String str) { StringBuffer strMutable = new StringBuffer(); int len = (str != null) ? str.length() : 0; int i=0; while (i<len) { char ch = str.charAt(i); if (ch == '\'') { strMutable.append('\''); i++; } else if (ch == '&') { if ((i+1)<len && str.charAt(i+1) == '#') { int poundIndex = str.indexOf("#", i); int endIndex = str.indexOf(";", poundIndex); //get only numeric portion of html escaped values and get its char equivalent String strNumericPortion = str.substring(poundIndex + 1, endIndex); Integer intNumericPortion = new Integer(strNumericPortion); //char convertedChar = Character.forDigit(intNumericPortion.intValue(), 10); char convertedChar = (char) intNumericPortion.intValue(); if (convertedChar == '\'') { strMutable.append('\''); } else strMutable.append(convertedChar); i = ++endIndex; } else { int endIndex = str.indexOf(";", i); if ( endIndex != -1 ) { String code = str.substring(i+1,endIndex); if (code.equalsIgnoreCase("amp")) { strMutable.append('&'); i = ++endIndex; } else if (code.equalsIgnoreCase("lt")) { strMutable.append('<'); i = ++endIndex; } else if (code.equalsIgnoreCase("gt")) { strMutable.append('>'); i = ++endIndex; } else if (code.equalsIgnoreCase("quot")) { strMutable.append('"'); i = ++endIndex; } else if (code.equalsIgnoreCase("apos")) { strMutable.append('\''); i = ++endIndex; } else { strMutable.append(ch); i++; } } else { strMutable.append(ch); i++; } } } else { strMutable.append(ch); i++; } } return strMutable.toString(); } /** * Utility method to format a string * * @param pattern String * @param arguments String[] * @return String */ public static String formatString(String pattern, String[] arguments) { if (pattern == null || arguments == null) { return null; } return MessageFormat.format(pattern, (Object[])arguments); } /* public static void main(String[] args) { String testString = "Quote here ->"<-"; String output = convertHTMLtoString(testString); System.out.println("Output: " + output); } */ }