StringUtils.java example

Explorer
Flickr-FTPD-master
- FlickrFtpd.java
- com
  - mysql
    - jdbc
/*
   Copyright (C) 2002 MySQL AB

      This program is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published by
      the Free Software Foundation; either version 2 of the License, or
      (at your option) any later version.

      This program is distributed in the hope that it will be useful,
      but WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      GNU General Public License for more details.

      You should have received a copy of the GNU General Public License
      along with this program; if not, write to the Free Software
      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */
package com.mysql.jdbc;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;


/**
 * Various utility methods for converting to/from byte arrays in the platform
 * encoding
 *
 * @author Mark Matthews
 */
public class StringUtils {
    private static final int BYTE_RANGE = (1 + Byte.MAX_VALUE) - Byte.MIN_VALUE;
    private static byte[] allBytes = new byte[BYTE_RANGE];
    private static char[] byteToChars = new char[BYTE_RANGE];

    static {
        for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
            allBytes[i - Byte.MIN_VALUE] = (byte) i;
        }

        String allBytesString = new String(allBytes, 0,
                Byte.MAX_VALUE - Byte.MIN_VALUE);

        int allBytesStringLen = allBytesString.length();

        for (int i = 0;
                (i < (Byte.MAX_VALUE - Byte.MIN_VALUE))
                && (i < allBytesStringLen); i++) {
            byteToChars[i] = allBytesString.charAt(i);
        }
    }

    /**
     * Returns the byte[] representation of the given string using given
     * encoding.
     *
     * @param s the string to convert
     * @param encoding the character encoding to use
     *
     * @return byte[] representation of the string
     *
     * @throws UnsupportedEncodingException if an encoding unsupported by the
     *         JVM is supplied.
     */
    public static final byte[] getBytes(String s, String encoding)
        throws UnsupportedEncodingException {
        SingleByteCharsetConverter converter = SingleByteCharsetConverter
            .getInstance(encoding);

        return getBytes(s, converter, encoding);
    }

    /**
     * Returns the byte[] representation of the given string (re)using the
     * given charset converter, and the given encoding.
     *
     * @param s the string to convert
     * @param converter the converter to reuse
     * @param encoding the character encoding to use
     *
     * @return byte[] representation of the string
     *
     * @throws UnsupportedEncodingException if an encoding unsupported by the
     *         JVM is supplied.
     */
    public static final byte[] getBytes(String s,
        SingleByteCharsetConverter converter, String encoding)
        throws UnsupportedEncodingException {
        byte[] b = null;

        if (converter != null) {
            b = converter.toBytes(s);
        } else if (encoding == null) {
            b = s.getBytes();
        } else {
            b = s.getBytes(encoding);

            if (encoding.equalsIgnoreCase("SJIS")
                    || encoding.equalsIgnoreCase("BIG5")
                    || encoding.equalsIgnoreCase("GBK")) {
                b = escapeSJISByteStream(b);
            }
        }

        return b;
    }

    /**
     * DOCUMENT ME!
     *
     * @param s DOCUMENT ME!
     * @param converter DOCUMENT ME!
     * @param encoding DOCUMENT ME!
     * @param offset DOCUMENT ME!
     * @param length DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     *
     * @throws UnsupportedEncodingException DOCUMENT ME!
     */
    public static final byte[] getBytes(String s,
        SingleByteCharsetConverter converter, String encoding, int offset,
        int length) throws UnsupportedEncodingException {
        byte[] b = null;

        if (converter != null) {
            b = converter.toBytes(s, offset, length);
        } else if (encoding == null) {
            byte[] temp = s.getBytes();

            b = new byte[length];
            System.arraycopy(temp, offset, b, 0, length);
        } else {
            byte[] temp = s.getBytes(encoding);

            b = new byte[length];
            System.arraycopy(temp, offset, b, 0, length);

            if (encoding.equalsIgnoreCase("SJIS")
                    || encoding.equalsIgnoreCase("BIG5")
                    || encoding.equalsIgnoreCase("GBK")) {
                b = escapeSJISByteStream(b);
            }
        }

        return b;
    }

    /**
     * Dumps the given bytes to STDOUT as a hex dump (up to length bytes).
     *
     * @param byteBuffer the data to print as hex
     * @param length the number of bytes to print
     */
    public static final void dumpAsHex(byte[] byteBuffer, int length) {
        int p = 0;
        int rows = length / 8;

        for (int i = 0; i < rows; i++) {
            int ptemp = p;

            for (int j = 0; j < 8; j++) {
                String hexVal = Integer.toHexString((int) byteBuffer[ptemp]
                        & 0xff);

                if (hexVal.length() == 1) {
                    hexVal = "0" + hexVal;
                }

                System.out.print(hexVal + " ");
                ptemp++;
            }

            System.out.print("    ");

            for (int j = 0; j < 8; j++) {
                if ((byteBuffer[p] > 32) && (byteBuffer[p] < 127)) {
                    System.out.print((char) byteBuffer[p] + " ");
                } else {
                    System.out.print(". ");
                }

                p++;
            }

            System.out.println();
        }

        int n = 0;

        for (int i = p; i < length; i++) {
            String hexVal = Integer.toHexString((int) byteBuffer[i] & 0xff);

            if (hexVal.length() == 1) {
                hexVal = "0" + hexVal;
            }

            System.out.print(hexVal + " ");
            n++;
        }

        for (int i = n; i < 8; i++) {
            System.out.print("   ");
        }

        System.out.print("    ");

        for (int i = p; i < length; i++) {
            if ((byteBuffer[i] > 32) && (byteBuffer[i] < 127)) {
                System.out.print((char) byteBuffer[i] + " ");
            } else {
                System.out.print(". ");
            }
        }

        System.out.println();
    }

    /**
     * Returns the bytes as an ASCII String.
     *
     * @param buffer the bytes representing the string
     *
     * @return The ASCII String.
     */
    public static final String toAsciiString(byte[] buffer) {
        return toAsciiString(buffer, 0, buffer.length);
    }

    /**
     * Returns the bytes as an ASCII String.
     *
     * @param buffer the bytes to convert
     * @param startPos the position to start converting
     * @param length the length of the string to convert
     *
     * @return the ASCII string
     */
    public static final String toAsciiString(byte[] buffer, int startPos,
        int length) {
        char[] charArray = new char[length];
        int readpoint = startPos;

        for (int i = 0; i < length; i++) {
            charArray[i] = (char) buffer[readpoint];
            readpoint++;
        }

        return new String(charArray);
    }

    /**
     * Unfortunately, SJIS has 0x5c as a high byte in some of its double-byte
     * characters, so we need to escape it.
     *
     * @param origBytes the original bytes in SJIS format
     *
     * @return byte[] with 0x5c escaped
     */
    public static byte[] escapeSJISByteStream(byte[] origBytes) {
        if ((origBytes == null) || (origBytes.length == 0)) {
            return origBytes;
        }

        int stringLen = origBytes.length;
        int bufIndex = 0;
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(stringLen);

        while (true) {
            // Grab the first byte
            int loByte = (int) origBytes[bufIndex];

            if (loByte < 0) {
                loByte += 256; // adjust for signedness/wrap-around
            }

            // We always write the first byte
            bytesOut.write(loByte);

            //
            // The codepage characters in question exist between
            // 0x81-0x9F and 0xE0-0xFC...
            //
            // See:
            //
            // http://www.microsoft.com/GLOBALDEV/Reference/dbcs/932.htm
            //
            // Problematic characters in GBK
            //
            // U+905C : CJK UNIFIED IDEOGRAPH
            //
            // Problematic characters in Big5
            //
            // B9F0 = U+5C62 : CJK UNIFIED IDEOGRAPH
            //
            if (((loByte >= 0x81) && (loByte <= 0x9F))
                    || ((loByte >= 0xE0) && (loByte <= 0xFC))) {
                if (bufIndex < (stringLen - 1)) {
                    int hiByte = (int) origBytes[bufIndex + 1];

                    if (hiByte < 0) {
                        hiByte += 256; // adjust for signedness/wrap-around
                    }

                    // write the high byte here, and increment the index
                    // for the high byte
                    bytesOut.write(hiByte);
                    bufIndex++;

                    // escape 0x5c if necessary
                    if (hiByte == 0x5C) {
                        bytesOut.write(hiByte);
                    }
                }
            } else if (loByte == 0x5c) {
                if (bufIndex < (stringLen - 1)) {
                    int hiByte = (int) origBytes[bufIndex + 1];

                    if (hiByte < 0) {
                        hiByte += 256; // adjust for signedness/wrap-around
                    }

                    if (hiByte == 0x62) {
                        // we need to escape the 0x5c
                        bytesOut.write(0x5c);
                        bytesOut.write(0x62);
                        bufIndex++;
                    }
                }
            }

            bufIndex++;

            if (bufIndex >= stringLen) {
                // we're done
                break;
            }
        }

        return bytesOut.toByteArray();
    }

    /**
     * Returns the first non whitespace char, converted to upper case
     *
     * @param searchIn the string to search in
     *
     * @return the first non-whitespace character, upper cased.
     */
    public static char firstNonWsCharUc(String searchIn) {
        if (searchIn == null) {
            return 0;
        }

        int length = searchIn.length();

        for (int i = 0; i < length; i++) {
            char c = searchIn.charAt(i);

            if (!Character.isWhitespace(c)) {
                return Character.toUpperCase(c);
            }
        }

        return 0;
    }

    /**
     * DOCUMENT ME!
     *
     * @param searchIn DOCUMENT ME!
     * @param searchFor DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     */
    public static int indexOfIgnoreCase(String searchIn, String searchFor) {
        if ((searchIn == null) || (searchFor == null)) {
            return -1;
        }

        int patternLength = searchFor.length();
        int stringLength = searchIn.length();

        int i = 0;

        if (patternLength == 0) {
            return -1;
        }

        // Brute force string pattern matching
        char firstCharOfPattern = Character.toUpperCase(searchFor.charAt(0));

lookForFirstChar: 
        while (true) {
            while ((i <= stringLength)
                    && (Character.toUpperCase(searchIn.charAt(i)) != firstCharOfPattern)) {
                i++;
            }

            if (i > stringLength) {
                return -1;
            }

            int j = i + 1;
            int end = (j + patternLength) - 1;

            int k = 1; // start at second char of pattern

            while (j < end) {
                if (Character.toUpperCase(searchIn.charAt(j++)) != Character
                        .toUpperCase(searchFor.charAt(k++))) {
                    i++;

                    // start over
                    continue lookForFirstChar;
                }
            }

            return i; // found entire pattern
        }
    }

    /**
     * Splits stringToSplit into a list, using the given delimitter
     *
     * @param stringToSplit the string to split
     * @param delimitter the string to split on
     * @param trim should the split strings be whitespace trimmed?
     *
     * @return the list of strings, split by delimitter
     *
     * @throws IllegalArgumentException DOCUMENT ME!
     */
    public static final List split(String stringToSplit, String delimitter,
        boolean trim) {
        if (stringToSplit == null) {
            return new ArrayList();
        }

        if (delimitter == null) {
            throw new IllegalArgumentException();
        }

        StringTokenizer tokenizer = new StringTokenizer(stringToSplit,
                delimitter, false);

        List splitTokens = new ArrayList(tokenizer.countTokens());

        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();

            if (trim) {
                token = token.trim();
            }

            splitTokens.add(token);
        }

        return splitTokens;
    }

    /**
     * Determines whether or not the string 'searchIn' contains the string
     * 'searchFor', dis-regarding case. Shorthand for a
     * String.regionMatch(...)
     *
     * @param searchIn the string to search in
     * @param searchFor the string to search for
     *
     * @return whether searchIn starts with searchFor, ignoring case
     */
    public static boolean startsWithIgnoreCase(String searchIn, String searchFor) {
        return startsWithIgnoreCase(searchIn, 0, searchFor);
    }

    /**
     * Determines whether or not the string 'searchIn' contains the string
     * 'searchFor', dis-regarding case starting at 'startAt' Shorthand for a
     * String.regionMatch(...)
     *
     * @param searchIn the string to search in
     * @param startAt the position to start at
     * @param searchFor the string to search for
     *
     * @return whether searchIn starts with searchFor, ignoring case
     */
    public static boolean startsWithIgnoreCase(String searchIn, int startAt,
        String searchFor) {
        return searchIn.regionMatches(true, 0, searchFor, startAt,
            searchFor.length());
    }

    /**
     * Determines whether or not the sting 'searchIn' contains the string
     * 'searchFor', di-regarding case and leading whitespace
     *
     * @param searchIn the string to search in
     * @param searchFor the string to search for
     *
     * @return true if the string starts with 'searchFor' ignoring whitespace
     */
    public static boolean startsWithIgnoreCaseAndWs(String searchIn,
        String searchFor) {
        int beginPos = 0;

        int inLength = searchIn.length();

        for (beginPos = 0; beginPos < inLength; beginPos++) {
            if (!Character.isWhitespace(searchIn.charAt(beginPos))) {
                break;
            }
        }

        return startsWithIgnoreCase(searchIn, beginPos, searchFor);
    }
}