/**
* $RCSfile$
* $Revision: 11823 $
* $Date: 2010-08-15 08:20:48 -0500 (Sun, 15 Aug 2010) $
*
* Copyright 2003-2007 Jive Software.
*
* All rights reserved. Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jivesoftware.smack.util;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import java.util.TimeZone;
/**
* A collection of utility methods for String objects.
*/
public class StringUtils {
/**
* Date format as defined in XEP-0082 - XMPP Date and Time Profiles. The
* time zone is set to UTC.
* <p>
* Date formats are not synchronized. Since multiple threads access the
* format concurrently, it must be synchronized externally or you can use
* the convenience methods {@link #parseXEP0082Date(String)} and
* {@link #formatXEP0082Date(Date)}.
*/
public static final DateFormat XEP_0082_UTC_FORMAT = new SimpleDateFormat(
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
static {
XEP_0082_UTC_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
}
private static final char[] QUOTE_ENCODE = """.toCharArray();
private static final char[] APOS_ENCODE = "'".toCharArray();
private static final char[] AMP_ENCODE = "&".toCharArray();
private static final char[] LT_ENCODE = "<".toCharArray();
private static final char[] GT_ENCODE = ">".toCharArray();
/**
* Used by the hash method.
*/
private static MessageDigest digest = null;
/**
* Pseudo-random number generator object for use with randomString(). The
* Random class is not considered to be cryptographically secure, so only
* use these random Strings for low to medium security applications.
*/
private static Random randGen = new Random();
/**
* Array of numbers and letters of mixed case. Numbers appear in the list
* twice so that there is a more equal chance that a number will be picked.
* We can use the array to get a random number or letter by picking a random
* array index.
*/
private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz"
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
/**
* Decodes a base64 String.
*
* @param data
* a base64 encoded String to decode.
* @return the decoded String.
*/
public static byte[] decodeBase64(String data) {
return Base64.decode(data);
}
/**
* Encodes a byte array into a base64 String.
*
* @param data
* a byte array to encode.
* @return a base64 encode String.
*/
public static String encodeBase64(byte[] data) {
return encodeBase64(data, false);
}
/**
* Encodes a byte array into a bse64 String.
*
* @param data
* The byte arry to encode.
* @param lineBreaks
* True if the encoding should contain line breaks and false if
* it should not.
* @return A base64 encoded String.
*/
public static String encodeBase64(byte[] data, boolean lineBreaks) {
return encodeBase64(data, 0, data.length, lineBreaks);
}
/**
* Encodes a byte array into a bse64 String.
*
* @param data
* The byte arry to encode.
* @param offset
* the offset of the bytearray to begin encoding at.
* @param len
* the length of bytes to encode.
* @param lineBreaks
* True if the encoding should contain line breaks and false if
* it should not.
* @return A base64 encoded String.
*/
public static String encodeBase64(byte[] data, int offset, int len,
boolean lineBreaks) {
return Base64.encodeBytes(data, offset, len,
(lineBreaks ? Base64.NO_OPTIONS : Base64.DONT_BREAK_LINES));
}
/**
* Encodes a String as a base64 String.
*
* @param data
* a String to encode.
* @return a base64 encoded String.
*/
public static String encodeBase64(String data) {
byte[] bytes = null;
try {
bytes = data.getBytes("ISO-8859-1");
} catch (final UnsupportedEncodingException uee) {
uee.printStackTrace();
}
return encodeBase64(bytes);
}
/**
* Encodes an array of bytes as String representation of hexadecimal.
*
* @param bytes
* an array of bytes to convert to a hex string.
* @return generated hex string.
*/
public static String encodeHex(byte[] bytes) {
final StringBuilder hex = new StringBuilder(bytes.length * 2);
for (final byte aByte : bytes) {
if ((aByte & 0xff) < 0x10) {
hex.append("0");
}
hex.append(Integer.toString(aByte & 0xff, 16));
}
return hex.toString();
}
/**
* Escapes all necessary characters in the String so that it can be used in
* an XML doc.
*
* @param string
* the string to escape.
* @return the string with appropriate characters escaped.
*/
public static String escapeForXML(String string) {
if (string == null) {
return null;
}
char ch;
int i = 0;
int last = 0;
final char[] input = string.toCharArray();
final int len = input.length;
final StringBuilder out = new StringBuilder((int) (len * 1.3));
for (; i < len; i++) {
ch = input[i];
if (ch > '>') {
} else if (ch == '<') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(LT_ENCODE);
} else if (ch == '>') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(GT_ENCODE);
}
else if (ch == '&') {
if (i > last) {
out.append(input, last, i - last);
}
// Do nothing if the string is of the form ë (unicode
// value)
if (!(len > i + 5 && input[i + 1] == '#'
&& Character.isDigit(input[i + 2])
&& Character.isDigit(input[i + 3])
&& Character.isDigit(input[i + 4]) && input[i + 5] == ';')) {
last = i + 1;
out.append(AMP_ENCODE);
}
} else if (ch == '"') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(QUOTE_ENCODE);
} else if (ch == '\'') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(APOS_ENCODE);
}
}
if (last == 0) {
return string;
}
if (i > last) {
out.append(input, last, i - last);
}
return out.toString();
}
/**
* Escapes the node portion of a JID according to "JID Escaping" (JEP-0106).
* Escaping replaces characters prohibited by node-prep with escape
* sequences, as follows:
* <p>
*
* <table border="1">
* <tr>
* <td><b>Unescaped Character</b></td>
* <td><b>Encoded Sequence</b></td>
* </tr>
* <tr>
* <td><space></td>
* <td>\20</td>
* </tr>
* <tr>
* <td>"</td>
* <td>\22</td>
* </tr>
* <tr>
* <td>&</td>
* <td>\26</td>
* </tr>
* <tr>
* <td>'</td>
* <td>\27</td>
* </tr>
* <tr>
* <td>/</td>
* <td>\2f</td>
* </tr>
* <tr>
* <td>:</td>
* <td>\3a</td>
* </tr>
* <tr>
* <td><</td>
* <td>\3c</td>
* </tr>
* <tr>
* <td>></td>
* <td>\3e</td>
* </tr>
* <tr>
* <td>@</td>
* <td>\40</td>
* </tr>
* <tr>
* <td>\</td>
* <td>\5c</td>
* </tr>
* </table>
* <p>
*
* This process is useful when the node comes from an external source that
* doesn't conform to nodeprep. For example, a username in LDAP may be
* "Joe Smith". Because the <space> character isn't a valid part of a
* node, the username should be escaped to "Joe\20Smith" before being made
* into a JID (e.g. "joe\20smith@example.com" after case-folding, etc. has
* been applied).
* <p>
*
* All node escaping and un-escaping must be performed manually at the
* appropriate time; the JID class will not escape or un-escape
* automatically.
*
* @param node
* the node.
* @return the escaped version of the node.
*/
public static String escapeNode(String node) {
if (node == null) {
return null;
}
final StringBuilder buf = new StringBuilder(node.length() + 8);
for (int i = 0, n = node.length(); i < n; i++) {
final char c = node.charAt(i);
switch (c) {
case '"':
buf.append("\\22");
break;
case '&':
buf.append("\\26");
break;
case '\'':
buf.append("\\27");
break;
case '/':
buf.append("\\2f");
break;
case ':':
buf.append("\\3a");
break;
case '<':
buf.append("\\3c");
break;
case '>':
buf.append("\\3e");
break;
case '@':
buf.append("\\40");
break;
case '\\':
buf.append("\\5c");
break;
default: {
if (Character.isWhitespace(c)) {
buf.append("\\20");
} else {
buf.append(c);
}
}
}
}
return buf.toString();
}
/**
* Formats a Date into a XEP-0082 - XMPP Date and Time Profiles string.
*
* @param date
* the time value to be formatted into a time string
* @return the formatted time string in XEP-0082 format
*/
public static String formatXEP0082Date(Date date) {
synchronized (XEP_0082_UTC_FORMAT) {
return XEP_0082_UTC_FORMAT.format(date);
}
}
/**
* Hashes a String using the SHA-1 algorithm and returns the result as a
* String of hexadecimal numbers. This method is synchronized to avoid
* excessive MessageDigest object creation. If calling this method becomes a
* bottleneck in your code, you may wish to maintain a pool of MessageDigest
* objects instead of using this method.
* <p>
* A hash is a one-way function -- that is, given an input, an output is
* easily computed. However, given the output, the input is almost
* impossible to compute. This is useful for passwords since we can store
* the hash and a hacker will then have a very hard time determining the
* original password.
*
* @param data
* the String to compute the hash of.
* @return a hashed version of the passed-in String
*/
public synchronized static String hash(String data) {
if (digest == null) {
try {
digest = MessageDigest.getInstance("SHA-1");
} catch (final NoSuchAlgorithmException nsae) {
System.err.println("Failed to load the SHA-1 MessageDigest. "
+ "Jive will be unable to function normally.");
}
}
// Now, compute hash.
try {
digest.update(data.getBytes("UTF-8"));
} catch (final UnsupportedEncodingException e) {
System.err.println(e);
}
return encodeHex(digest.digest());
}
/**
* Returns the XMPP address with any resource information removed. For
* example, for the address "matt@jivesoftware.com/Smack",
* "matt@jivesoftware.com" would be returned.
*
* @param XMPPAddress
* the XMPP address.
* @return the bare XMPP address without resource information.
*/
public static String parseBareAddress(String XMPPAddress) {
if (XMPPAddress == null) {
return null;
}
final int slashIndex = XMPPAddress.indexOf("/");
if (slashIndex < 0) {
return XMPPAddress;
} else if (slashIndex == 0) {
return "";
} else {
return XMPPAddress.substring(0, slashIndex);
}
}
/**
* Returns the name portion of a XMPP address. For example, for the address
* "matt@jivesoftware.com/Smack", "matt" would be returned. If no username
* is present in the address, the empty string will be returned.
*
* @param XMPPAddress
* the XMPP address.
* @return the name portion of the XMPP address.
*/
public static String parseName(String XMPPAddress) {
if (XMPPAddress == null) {
return null;
}
final int atIndex = XMPPAddress.lastIndexOf("@");
if (atIndex <= 0) {
return "";
} else {
return XMPPAddress.substring(0, atIndex);
}
}
/**
* Returns the resource portion of a XMPP address. For example, for the
* address "matt@jivesoftware.com/Smack", "Smack" would be returned. If no
* resource is present in the address, the empty string will be returned.
*
* @param XMPPAddress
* the XMPP address.
* @return the resource portion of the XMPP address.
*/
public static String parseResource(String XMPPAddress) {
if (XMPPAddress == null) {
return null;
}
final int slashIndex = XMPPAddress.indexOf("/");
if (slashIndex + 1 > XMPPAddress.length() || slashIndex < 0) {
return "";
} else {
return XMPPAddress.substring(slashIndex + 1);
}
}
/**
* Returns the server portion of a XMPP address. For example, for the
* address "matt@jivesoftware.com/Smack", "jivesoftware.com" would be
* returned. If no server is present in the address, the empty string will
* be returned.
*
* @param XMPPAddress
* the XMPP address.
* @return the server portion of the XMPP address.
*/
public static String parseServer(String XMPPAddress) {
if (XMPPAddress == null) {
return null;
}
final int atIndex = XMPPAddress.lastIndexOf("@");
// If the String ends with '@', return the empty string.
if (atIndex + 1 > XMPPAddress.length()) {
return "";
}
final int slashIndex = XMPPAddress.indexOf("/");
if (slashIndex > 0 && slashIndex > atIndex) {
return XMPPAddress.substring(atIndex + 1, slashIndex);
} else {
return XMPPAddress.substring(atIndex + 1);
}
}
/**
* Parses the given date string in the XEP-0082 - XMPP Date and Time
* Profiles format.
*
* @param dateString
* the date string to parse
* @return the parsed Date
* @throws ParseException
* if the specified string cannot be parsed
*/
public static Date parseXEP0082Date(String dateString)
throws ParseException {
synchronized (XEP_0082_UTC_FORMAT) {
return XEP_0082_UTC_FORMAT.parse(dateString);
}
}
/**
* Returns a random String of numbers and letters (lower and upper case) of
* the specified length. The method uses the Random class that is built-in
* to Java which is suitable for low to medium grade security uses. This
* means that the output is only pseudo random, i.e., each number is
* mathematically generated so is not truly random.
* <p>
*
* The specified length must be at least one. If not, the method will return
* null.
*
* @param length
* the desired length of the random String to return.
* @return a random String of numbers and letters of the specified length.
*/
public static String randomString(int length) {
if (length < 1) {
return null;
}
// Create a char buffer to put random letters and numbers in.
final char[] randBuffer = new char[length];
for (int i = 0; i < randBuffer.length; i++) {
randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
}
return new String(randBuffer);
}
/**
* Un-escapes the node portion of a JID according to "JID Escaping"
* (JEP-0106).
* <p>
* Escaping replaces characters prohibited by node-prep with escape
* sequences, as follows:
* <p>
*
* <table border="1">
* <tr>
* <td><b>Unescaped Character</b></td>
* <td><b>Encoded Sequence</b></td>
* </tr>
* <tr>
* <td><space></td>
* <td>\20</td>
* </tr>
* <tr>
* <td>"</td>
* <td>\22</td>
* </tr>
* <tr>
* <td>&</td>
* <td>\26</td>
* </tr>
* <tr>
* <td>'</td>
* <td>\27</td>
* </tr>
* <tr>
* <td>/</td>
* <td>\2f</td>
* </tr>
* <tr>
* <td>:</td>
* <td>\3a</td>
* </tr>
* <tr>
* <td><</td>
* <td>\3c</td>
* </tr>
* <tr>
* <td>></td>
* <td>\3e</td>
* </tr>
* <tr>
* <td>@</td>
* <td>\40</td>
* </tr>
* <tr>
* <td>\</td>
* <td>\5c</td>
* </tr>
* </table>
* <p>
*
* This process is useful when the node comes from an external source that
* doesn't conform to nodeprep. For example, a username in LDAP may be
* "Joe Smith". Because the <space> character isn't a valid part of a
* node, the username should be escaped to "Joe\20Smith" before being made
* into a JID (e.g. "joe\20smith@example.com" after case-folding, etc. has
* been applied).
* <p>
*
* All node escaping and un-escaping must be performed manually at the
* appropriate time; the JID class will not escape or un-escape
* automatically.
*
* @param node
* the escaped version of the node.
* @return the un-escaped version of the node.
*/
public static String unescapeNode(String node) {
if (node == null) {
return null;
}
final char[] nodeChars = node.toCharArray();
final StringBuilder buf = new StringBuilder(nodeChars.length);
for (int i = 0, n = nodeChars.length; i < n; i++) {
compare: {
final char c = node.charAt(i);
if (c == '\\' && i + 2 < n) {
final char c2 = nodeChars[i + 1];
final char c3 = nodeChars[i + 2];
if (c2 == '2') {
switch (c3) {
case '0':
buf.append(' ');
i += 2;
break compare;
case '2':
buf.append('"');
i += 2;
break compare;
case '6':
buf.append('&');
i += 2;
break compare;
case '7':
buf.append('\'');
i += 2;
break compare;
case 'f':
buf.append('/');
i += 2;
break compare;
}
} else if (c2 == '3') {
switch (c3) {
case 'a':
buf.append(':');
i += 2;
break compare;
case 'c':
buf.append('<');
i += 2;
break compare;
case 'e':
buf.append('>');
i += 2;
break compare;
}
} else if (c2 == '4') {
if (c3 == '0') {
buf.append("@");
i += 2;
break compare;
}
} else if (c2 == '5') {
if (c3 == 'c') {
buf.append("\\");
i += 2;
break compare;
}
}
}
buf.append(c);
}
}
return buf.toString();
}
private StringUtils() {
// Not instantiable.
}
}