/*
You may freely copy, distribute, modify and use this class as long
as the original author attribution remains intact. See message
below.
Copyright (C) 2004 Christian Pesch. All Rights Reserved.
*/
package slash.metamusic.util;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
/**
* Some useful functions for
* <ul>
* <li>strings
* </ul>
*
* @author Christian Pesch
* @version $Id: StringHelper.java 961 2007-03-25 16:46:20Z cpesch $
*/
public class StringHelper {
public static String trim(String string) {
if (string == null)
return null;
StringBuffer buffer = new StringBuffer(string);
for (int i = 0; i < buffer.length(); i++) {
char c = buffer.charAt(i);
if (Character.isWhitespace(c))
buffer.setCharAt(i, ' ');
}
return buffer.toString().trim();
}
public static String trimButKeepLineFeeds(String string) {
if (string == null)
return null;
StringBuffer buffer = new StringBuffer(string);
for (int i = 0; i < buffer.length(); i++) {
char c = buffer.charAt(i);
if (Character.isWhitespace(c) && c != '\r' && c != '\n')
buffer.setCharAt(i, ' ');
}
return buffer.toString().trim();
}
private static final Map<String, String> entities = new HashMap<String, String>();
static {
// Quotation mark
entities.put("quot", "\"");
// Ampersand
entities.put("amp", "\u0026");
// Less than
entities.put("lt", "\u003C");
// Greater than
entities.put("gt", "\u003E");
// Nonbreaking space
entities.put("nbsp", "\u00A0");
// Inverted exclamation point
entities.put("iexcl", "\u00A1");
// Cent sign
entities.put("cent", "\u00A2");
// Pound sign
entities.put("pound", "\u00A3");
// General currency sign
entities.put("curren", "\u00A4");
// Yen sign
entities.put("yen", "\u00A5");
// Broken vertical bar
entities.put("brvbar", "\u00A6");
// Section sign
entities.put("sect", "\u00A7");
// Umlaut
entities.put("uml", "\u00A8");
// Copyright
entities.put("copy", "\u00A9");
// Feminine ordinal
entities.put("ordf", "\u00AA");
// Left angle quote
entities.put("laquo", "\u00AB");
// Not sign
entities.put("not", "\u00AC");
// Soft hyphen
entities.put("shy", "\u00AD");
// Registered trademark
entities.put("reg", "\u00AE");
// Macron accent
entities.put("macr", "\u00AF");
// Degree sign
entities.put("deg", "\u00B0");
// Plus or minus
entities.put("plusmn", "\u00B1");
// Superscript 2
entities.put("sup2", "\u00B2");
// Superscript 3
entities.put("sup3", "\u00B3");
// Acute accent
entities.put("acute", "\u00B4");
// Micro sign (Greek mu)
entities.put("micro", "\u00B5");
// Paragraph sign
entities.put("para", "\u00B6");
// Middle dot
entities.put("middot", "\u00B7");
// Cedilla
entities.put("cedil", "\u00B8");
// Superscript 1
entities.put("sup1", "\u00B9");
// Masculine ordinal
entities.put("ordm", "\u00BA");
// Right angle quote
entities.put("raquo", "\u00BB");
// Fraction one-fourth
entities.put("frac14", "\u00BC");
// Fraction one-half
entities.put("frac12", "\u00BD");
// Fraction three-fourths
entities.put("frac34", "\u00BE");
// Inverted question mark
entities.put("iquest", "\u00BF");
// Capital A, grave accent
entities.put("Agrave", "\u00C0");
// Capital A, acute accent
entities.put("Aacute", "\u00C1");
// Capital A, circumflex accent
entities.put("Acirc", "\u00C2");
// Capital A, tilde
entities.put("Atilde", "\u00C3");
// Capital A, umlaut
entities.put("Auml", "\u00C4");
// Capital A, ring
entities.put("Aring", "\u00C5");
// Capital AE ligature
entities.put("AElig", "\u00C6");
// Capital C, cedilla
entities.put("Ccedil", "\u00C7");
// Capital E, grave accent
entities.put("Egrave", "\u00C8");
// Capital E, acute accent
entities.put("Eacute", "\u00C9");
// Capital E, circumflex accent
entities.put("Ecirc", "\u00CA");
// Capital E, umlaut
entities.put("Euml", "\u00CB");
// Capital I, grave accent
entities.put("Igrave", "\u00CC");
// Capital I, acute accent
entities.put("Iacute", "\u00CD");
// Capital I, circumflex accent
entities.put("Icirc", "\u00CE");
// Capital I, umlaut
entities.put("Iuml", "\u00CF");
// Capital eth, Icelandic
entities.put("ETH", "\u00D0");
// Capital N, tilde
entities.put("Ntilde", "\u00D1");
// Capital O, grave accent
entities.put("Ograve", "\u00D2");
// Capital O, acute accent
entities.put("Oacute", "\u00D3");
// Capital O, circumflex accent
entities.put("Ocirc", "\u00D4");
// Capital O, tilde
entities.put("Otilde", "\u00D5");
// Capital O, umlaut
entities.put("Ouml", "\u00D6");
// Multiply sign
entities.put("times", "\u00D7");
// Capital O, slash
entities.put("Oslash", "\u00D8");
// Capital U, grave accent
entities.put("Ugrave", "\u00D9");
// Capital U, acute accent
entities.put("Uacute", "\u00DA");
// Capital U, circumflex accent
entities.put("Ucirc", "\u00DB");
// Capital U, umlaut
entities.put("Uuml", "\u00DC");
// Capital Y, acute accent
entities.put("Yacute", "\u00DD");
// Capital thorn, Icelandic
entities.put("THORN", "\u00DE");
// Small sz ligature, German
entities.put("szlig", "\u00DF");
// Small a, grave accent
entities.put("agrave", "\u00E0");
// Small a, acute accent
entities.put("aacute", "\u00E1");
// Small a, circumflex accent
entities.put("acirc", "\u00E2");
// Small a, tilde
entities.put("atilde", "\u00E3");
// Small a, umlaut
entities.put("auml", "\u00E4");
// Small a, ring
entities.put("aring", "\u00E5");
// Small ae ligature
entities.put("aelig", "\u00E6");
// double low-9 quotation mark
entities.put("bdquo", "\u201E");
// Small c, cedilla
entities.put("ccedil", "\u00E7");
// Small e, grave accent
entities.put("egrave", "\u00E8");
// Small e, acute accent
entities.put("eacute", "\u00E9");
// Small e, circumflex accent
entities.put("ecirc", "\u00EA");
// Small e, umlaut
entities.put("euml", "\u00EB");
// Small i, grave accent
entities.put("igrave", "\u00EC");
// Small i, acute accent
entities.put("iacute", "\u00ED");
// Small i, circumflex accent
entities.put("icirc", "\u00EE");
// Small i, umlaut
entities.put("iuml", "\u00EF");
// Small eth, Icelandic
entities.put("eth", "\u00F0");
// Small n, tilde
entities.put("ntilde", "\u00F1");
// Small o, grave accent
entities.put("ograve", "\u00F2");
// Small o, acute accent
entities.put("oacute", "\u00F3");
// Small o, circumflex accent
entities.put("ocirc", "\u00F4");
// Small o, tilde
entities.put("otilde", "\u00F5");
// Small o, umlaut
entities.put("ouml", "\u00F6");
// Division sign
entities.put("divide", "\u00F7");
// Small o, slash
entities.put("oslash", "\u00F8");
// Small u, grave accent
entities.put("ugrave", "\u00F9");
// Small u, acute accent
entities.put("uacute", "\u00FA");
// Small u, circumflex accent
entities.put("ucirc", "\u00FB");
// Small u, umlaut
entities.put("uuml", "\u00FC");
// Small y, acute accent
entities.put("yacute", "\u00FD");
// Small thorn, Icelandic
entities.put("thorn", "\u00FE");
// Small y, umlaut
entities.put("yuml", "\u00FF");
}
public static String decodeEntities(String str) {
StringBuilder builder = new StringBuilder();
int semicolonIndex = 0;
while (semicolonIndex < str.length()) {
int ampersandIndex = str.indexOf("&", semicolonIndex);
if (ampersandIndex == -1) {
builder.append(str.substring(semicolonIndex, str.length()));
break;
}
builder.append(str.substring(semicolonIndex, ampersandIndex));
semicolonIndex = str.indexOf(";", ampersandIndex);
if (semicolonIndex == -1) {
builder.append(str.substring(ampersandIndex, str.length()));
break;
}
String tok = str.substring(ampersandIndex + 1, semicolonIndex);
if (tok.charAt(0) == '#') {
tok = tok.substring(1);
try {
int radix = 10;
if (tok.trim().charAt(0) == 'x') {
radix = 16;
tok = tok.substring(1, tok.length());
}
builder.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
builder.append('?');
}
} else {
tok = entities.get(tok);
if (tok != null)
builder.append(tok);
else
builder.append('?');
}
semicolonIndex++;
}
return builder.toString();
}
/**
* Creates a mixed mode string out of a string. Each space separated
* substring will have an uppercase first letter and a lowercase rest.
*
* @param string the string to create a mixed mode string from
* @return a mixed mode string out of a string. Each space separated
* substring will have an uppercase first letter and a lowercase rest
*/
public static String toMixedCase(String string) {
StringBuilder builder = new StringBuilder();
StringTokenizer tokenizer = new StringTokenizer(string, "_ ", true);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (token.length() > 1)
builder.append(token.substring(0, 1).toUpperCase()).append(token.substring(1).toLowerCase());
else
builder.append(token);
}
return builder.toString();
}
public static String replaceWhitespaces(String string) {
StringBuilder builder = new StringBuilder(string);
for (int i = 0; i < builder.length(); i++) {
char c = builder.charAt(i);
if (Character.isWhitespace(c))
builder.setCharAt(i, '_');
}
return builder.toString();
}
public static String removeAdjacentUnderscores(String string) {
StringBuilder builder = new StringBuilder(string);
for (int i = 0; i < builder.length() - 1; i++) {
if (builder.charAt(i) == '_' && builder.charAt(i + 1) == '_') {
builder.deleteCharAt(i);
i--;
}
}
return builder.toString();
}
public static String removeNonLetterOrDigits(String string) {
StringBuilder builder = new StringBuilder(string);
for (int i = 0; i < builder.length(); i++) {
char c = builder.charAt(i);
if (!(Character.isLetterOrDigit(c) || Character.isWhitespace(c)) || c == '\'') {
builder.deleteCharAt(i);
i--;
}
}
return builder.toString();
}
public static String formatNumber(long number, int digits) {
return formatNumber(number, digits, '0');
}
public static String formatNumber(long number, int length, char fill) {
return formatString(Long.toString(number), length, fill, true);
}
public static String formatString(String string, int length, char fill, boolean rightAligned) {
StringBuilder builder = new StringBuilder(string);
while (builder.length() < length) {
if (rightAligned)
builder.insert(0, fill);
else
builder.append(fill);
}
return builder.toString();
}
public static String shortenString(String string, int minimumLength, int lengthToShortenBy) {
int minIndex = Math.min(string.length(), minimumLength);
int maxIndex = string.length() - lengthToShortenBy;
return string.substring(0, Math.max(minIndex, maxIndex));
}
public static boolean isANumber(String string) {
try {
Long.parseLong(string);
return true;
} catch (NumberFormatException e) {
return false;
}
}
public static String replaceForURI(String string) {
string = string.trim();
string = removeNonLetterOrDigits(string);
string = replaceWhitespaces(string);
string = removeAdjacentUnderscores(string);
return string.trim();
}
}