/*
Copyright (C) 2002 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.mysql.jdbc;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
/**
* Various utility methods for converting to/from byte arrays in the platform
* encoding
*
* @author Mark Matthews
*/
public class StringUtils {
private static final int BYTE_RANGE = (1 + Byte.MAX_VALUE) - Byte.MIN_VALUE;
private static byte[] allBytes = new byte[BYTE_RANGE];
private static char[] byteToChars = new char[BYTE_RANGE];
static {
for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
allBytes[i - Byte.MIN_VALUE] = (byte) i;
}
String allBytesString = new String(allBytes, 0,
Byte.MAX_VALUE - Byte.MIN_VALUE);
int allBytesStringLen = allBytesString.length();
for (int i = 0;
(i < (Byte.MAX_VALUE - Byte.MIN_VALUE))
&& (i < allBytesStringLen); i++) {
byteToChars[i] = allBytesString.charAt(i);
}
}
/**
* Returns the byte[] representation of the given string using given
* encoding.
*
* @param s the string to convert
* @param encoding the character encoding to use
*
* @return byte[] representation of the string
*
* @throws UnsupportedEncodingException if an encoding unsupported by the
* JVM is supplied.
*/
public static final byte[] getBytes(String s, String encoding)
throws UnsupportedEncodingException {
SingleByteCharsetConverter converter = SingleByteCharsetConverter
.getInstance(encoding);
return getBytes(s, converter, encoding);
}
/**
* Returns the byte[] representation of the given string (re)using the
* given charset converter, and the given encoding.
*
* @param s the string to convert
* @param converter the converter to reuse
* @param encoding the character encoding to use
*
* @return byte[] representation of the string
*
* @throws UnsupportedEncodingException if an encoding unsupported by the
* JVM is supplied.
*/
public static final byte[] getBytes(String s,
SingleByteCharsetConverter converter, String encoding)
throws UnsupportedEncodingException {
byte[] b = null;
if (converter != null) {
b = converter.toBytes(s);
} else if (encoding == null) {
b = s.getBytes();
} else {
b = s.getBytes(encoding);
if (encoding.equalsIgnoreCase("SJIS")
|| encoding.equalsIgnoreCase("BIG5")
|| encoding.equalsIgnoreCase("GBK")) {
b = escapeSJISByteStream(b);
}
}
return b;
}
/**
* DOCUMENT ME!
*
* @param s DOCUMENT ME!
* @param converter DOCUMENT ME!
* @param encoding DOCUMENT ME!
* @param offset DOCUMENT ME!
* @param length DOCUMENT ME!
*
* @return DOCUMENT ME!
*
* @throws UnsupportedEncodingException DOCUMENT ME!
*/
public static final byte[] getBytes(String s,
SingleByteCharsetConverter converter, String encoding, int offset,
int length) throws UnsupportedEncodingException {
byte[] b = null;
if (converter != null) {
b = converter.toBytes(s, offset, length);
} else if (encoding == null) {
byte[] temp = s.getBytes();
b = new byte[length];
System.arraycopy(temp, offset, b, 0, length);
} else {
byte[] temp = s.getBytes(encoding);
b = new byte[length];
System.arraycopy(temp, offset, b, 0, length);
if (encoding.equalsIgnoreCase("SJIS")
|| encoding.equalsIgnoreCase("BIG5")
|| encoding.equalsIgnoreCase("GBK")) {
b = escapeSJISByteStream(b);
}
}
return b;
}
/**
* Dumps the given bytes to STDOUT as a hex dump (up to length bytes).
*
* @param byteBuffer the data to print as hex
* @param length the number of bytes to print
*/
public static final void dumpAsHex(byte[] byteBuffer, int length) {
int p = 0;
int rows = length / 8;
for (int i = 0; i < rows; i++) {
int ptemp = p;
for (int j = 0; j < 8; j++) {
String hexVal = Integer.toHexString((int) byteBuffer[ptemp]
& 0xff);
if (hexVal.length() == 1) {
hexVal = "0" + hexVal;
}
System.out.print(hexVal + " ");
ptemp++;
}
System.out.print(" ");
for (int j = 0; j < 8; j++) {
if ((byteBuffer[p] > 32) && (byteBuffer[p] < 127)) {
System.out.print((char) byteBuffer[p] + " ");
} else {
System.out.print(". ");
}
p++;
}
System.out.println();
}
int n = 0;
for (int i = p; i < length; i++) {
String hexVal = Integer.toHexString((int) byteBuffer[i] & 0xff);
if (hexVal.length() == 1) {
hexVal = "0" + hexVal;
}
System.out.print(hexVal + " ");
n++;
}
for (int i = n; i < 8; i++) {
System.out.print(" ");
}
System.out.print(" ");
for (int i = p; i < length; i++) {
if ((byteBuffer[i] > 32) && (byteBuffer[i] < 127)) {
System.out.print((char) byteBuffer[i] + " ");
} else {
System.out.print(". ");
}
}
System.out.println();
}
/**
* Returns the bytes as an ASCII String.
*
* @param buffer the bytes representing the string
*
* @return The ASCII String.
*/
public static final String toAsciiString(byte[] buffer) {
return toAsciiString(buffer, 0, buffer.length);
}
/**
* Returns the bytes as an ASCII String.
*
* @param buffer the bytes to convert
* @param startPos the position to start converting
* @param length the length of the string to convert
*
* @return the ASCII string
*/
public static final String toAsciiString(byte[] buffer, int startPos,
int length) {
char[] charArray = new char[length];
int readpoint = startPos;
for (int i = 0; i < length; i++) {
charArray[i] = (char) buffer[readpoint];
readpoint++;
}
return new String(charArray);
}
/**
* Unfortunately, SJIS has 0x5c as a high byte in some of its double-byte
* characters, so we need to escape it.
*
* @param origBytes the original bytes in SJIS format
*
* @return byte[] with 0x5c escaped
*/
public static byte[] escapeSJISByteStream(byte[] origBytes) {
if ((origBytes == null) || (origBytes.length == 0)) {
return origBytes;
}
int stringLen = origBytes.length;
int bufIndex = 0;
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(stringLen);
while (true) {
// Grab the first byte
int loByte = (int) origBytes[bufIndex];
if (loByte < 0) {
loByte += 256; // adjust for signedness/wrap-around
}
// We always write the first byte
bytesOut.write(loByte);
//
// The codepage characters in question exist between
// 0x81-0x9F and 0xE0-0xFC...
//
// See:
//
// http://www.microsoft.com/GLOBALDEV/Reference/dbcs/932.htm
//
// Problematic characters in GBK
//
// U+905C : CJK UNIFIED IDEOGRAPH
//
// Problematic characters in Big5
//
// B9F0 = U+5C62 : CJK UNIFIED IDEOGRAPH
//
if (((loByte >= 0x81) && (loByte <= 0x9F))
|| ((loByte >= 0xE0) && (loByte <= 0xFC))) {
if (bufIndex < (stringLen - 1)) {
int hiByte = (int) origBytes[bufIndex + 1];
if (hiByte < 0) {
hiByte += 256; // adjust for signedness/wrap-around
}
// write the high byte here, and increment the index
// for the high byte
bytesOut.write(hiByte);
bufIndex++;
// escape 0x5c if necessary
if (hiByte == 0x5C) {
bytesOut.write(hiByte);
}
}
} else if (loByte == 0x5c) {
if (bufIndex < (stringLen - 1)) {
int hiByte = (int) origBytes[bufIndex + 1];
if (hiByte < 0) {
hiByte += 256; // adjust for signedness/wrap-around
}
if (hiByte == 0x62) {
// we need to escape the 0x5c
bytesOut.write(0x5c);
bytesOut.write(0x62);
bufIndex++;
}
}
}
bufIndex++;
if (bufIndex >= stringLen) {
// we're done
break;
}
}
return bytesOut.toByteArray();
}
/**
* Returns the first non whitespace char, converted to upper case
*
* @param searchIn the string to search in
*
* @return the first non-whitespace character, upper cased.
*/
public static char firstNonWsCharUc(String searchIn) {
if (searchIn == null) {
return 0;
}
int length = searchIn.length();
for (int i = 0; i < length; i++) {
char c = searchIn.charAt(i);
if (!Character.isWhitespace(c)) {
return Character.toUpperCase(c);
}
}
return 0;
}
/**
* DOCUMENT ME!
*
* @param searchIn DOCUMENT ME!
* @param searchFor DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
public static int indexOfIgnoreCase(String searchIn, String searchFor) {
if ((searchIn == null) || (searchFor == null)) {
return -1;
}
int patternLength = searchFor.length();
int stringLength = searchIn.length();
int i = 0;
if (patternLength == 0) {
return -1;
}
// Brute force string pattern matching
char firstCharOfPattern = Character.toUpperCase(searchFor.charAt(0));
lookForFirstChar:
while (true) {
while ((i <= stringLength)
&& (Character.toUpperCase(searchIn.charAt(i)) != firstCharOfPattern)) {
i++;
}
if (i > stringLength) {
return -1;
}
int j = i + 1;
int end = (j + patternLength) - 1;
int k = 1; // start at second char of pattern
while (j < end) {
if (Character.toUpperCase(searchIn.charAt(j++)) != Character
.toUpperCase(searchFor.charAt(k++))) {
i++;
// start over
continue lookForFirstChar;
}
}
return i; // found entire pattern
}
}
/**
* Splits stringToSplit into a list, using the given delimitter
*
* @param stringToSplit the string to split
* @param delimitter the string to split on
* @param trim should the split strings be whitespace trimmed?
*
* @return the list of strings, split by delimitter
*
* @throws IllegalArgumentException DOCUMENT ME!
*/
public static final List split(String stringToSplit, String delimitter,
boolean trim) {
if (stringToSplit == null) {
return new ArrayList();
}
if (delimitter == null) {
throw new IllegalArgumentException();
}
StringTokenizer tokenizer = new StringTokenizer(stringToSplit,
delimitter, false);
List splitTokens = new ArrayList(tokenizer.countTokens());
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (trim) {
token = token.trim();
}
splitTokens.add(token);
}
return splitTokens;
}
/**
* Determines whether or not the string 'searchIn' contains the string
* 'searchFor', dis-regarding case. Shorthand for a
* String.regionMatch(...)
*
* @param searchIn the string to search in
* @param searchFor the string to search for
*
* @return whether searchIn starts with searchFor, ignoring case
*/
public static boolean startsWithIgnoreCase(String searchIn, String searchFor) {
return startsWithIgnoreCase(searchIn, 0, searchFor);
}
/**
* Determines whether or not the string 'searchIn' contains the string
* 'searchFor', dis-regarding case starting at 'startAt' Shorthand for a
* String.regionMatch(...)
*
* @param searchIn the string to search in
* @param startAt the position to start at
* @param searchFor the string to search for
*
* @return whether searchIn starts with searchFor, ignoring case
*/
public static boolean startsWithIgnoreCase(String searchIn, int startAt,
String searchFor) {
return searchIn.regionMatches(true, 0, searchFor, startAt,
searchFor.length());
}
/**
* Determines whether or not the sting 'searchIn' contains the string
* 'searchFor', di-regarding case and leading whitespace
*
* @param searchIn the string to search in
* @param searchFor the string to search for
*
* @return true if the string starts with 'searchFor' ignoring whitespace
*/
public static boolean startsWithIgnoreCaseAndWs(String searchIn,
String searchFor) {
int beginPos = 0;
int inLength = searchIn.length();
for (beginPos = 0; beginPos < inLength; beginPos++) {
if (!Character.isWhitespace(searchIn.charAt(beginPos))) {
break;
}
}
return startsWithIgnoreCase(searchIn, beginPos, searchFor);
}
}