/*
* This file is a part of Alchemy OS project.
* Copyright (C) 2011-2013, Sergey Basalaev <sbasalaev@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package alchemy.util;
import java.io.UTFDataFormatException;
/**
* Various utility functions to operate on strings.
*
* @author Sergey Basalaev
*/
public final class Strings {
private Strings() { }
private static final String ERR_END = "Incomplete character at the end";
private static final String ERR_WRONG = "Wrong code at byte ";
private static final String ERR_LONG = "Encoded string is too long";
/**
* Decodes bytes to String using modified UTF-8 format.
* @param b byte array
* @return decoded string
* @throws UTFDataFormatException if given byte sequence is not valid UTF
*/
public static String utfDecode(byte[] b) throws UTFDataFormatException {
int len = b.length;
char[] chars = new char[len];
int count = 0; //count of chars read so far
int ofs = 0; //offset in byte array
int b1, b2, b3; //bytes to compound symbols from
while (ofs < len) {
b1 = b[ofs] & 0xff;
ofs++;
if (b1 < 0x80) { // 0xxx xxxx
chars[count]=(char)b1;
count++;
continue;
}
switch (b1 & 0xf0) {
case 0xc0: // 1100 xxxx 10xx xxxx
case 0xd0: // 1101 xxxx 10xx xxxx
if (ofs + 1 > len) throw new UTFDataFormatException(ERR_END);
b2 = b[ofs];
ofs++;
if ((b2 & 0xc0) != 0x80) throw new UTFDataFormatException(ERR_WRONG+(ofs-1));
chars[count] = (char)( ((b1 & 0x1f) << 6) | (b2 & 0x3f) );
count++;
break;
case 0xe0: // 1110 xxxx 10xx xxxx 10xx xxxx
if (ofs + 2 > len) throw new UTFDataFormatException(ERR_END);
b2 = b[ofs];
ofs++;
b3 = b[ofs];
ofs++;
if (((b2 & 0xc0) != 0x80)) throw new UTFDataFormatException(ERR_WRONG+(ofs-2));
if (((b3 & 0xc0) != 0x80)) throw new UTFDataFormatException(ERR_WRONG+(ofs-1));
chars[count] = (char)( ((b1 & 0x0f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3f) );
count++;
break;
default:
throw new UTFDataFormatException(ERR_WRONG+(ofs-1));
}
}
return new String(chars, 0, count);
}
/**
* Encodes string in byte array in modified UTF format.
* @param str string to encode
* @return byte array containing encoded string
* @throws UTFDataFormatException if string is too long to encode
*/
public static byte[] utfEncode(String str) throws UTFDataFormatException {
int len = str.length(); //string length, C.O.
int count = 0; //a count of bytes
char ch; //current character
byte[] bytes; //result of encoding
int ofs = 0; //offset in byte array
//calculating byte count
for (int i=0; i<len; i++) {
ch = str.charAt(i);
if (ch == 0) count += 2;
else if (ch < 0x80) count++;
else if (ch < 0x0800) count += 2;
else count += 3;
}
if (count >= 0xffff)
throw new UTFDataFormatException(ERR_LONG);
bytes = new byte[count];
//string encoding
for (int i=0; i<len; i++) {
ch = str.charAt(i);
if (ch == 0) {
bytes[ofs] = (byte)0xc0;
ofs++;
bytes[ofs] = (byte)0x80;
ofs++;
} else if (ch < 0x80) {
bytes[ofs] = (byte)(ch);
ofs++;
} else if (ch < 0x800) {
bytes[ofs] = (byte)((ch >> 6) | 0xc0);
ofs++;
bytes[ofs] = (byte)(ch&0x3f | 0x80);
ofs++;
} else {
bytes[ofs] = (byte)((ch >> 12) | 0xe0);
ofs++;
bytes[ofs] = (byte)((ch >> 6)&0x3f | 0x80);
ofs++;
bytes[ofs] = (byte)(ch&0x3f | 0x80);
ofs++;
}
}
return bytes;
}
/**
* Returns formatted string using specified format string and arguments.
* Format specifiers are substrings of form <code>%n</code> where
* <code>n</code> is from 0 to 9. Each format specifier is substituted
* with corresponding value from array <code>args</code>. Specifier
* <code>%%</code> is substituted with percent character.
*
* @param fmt format string
* @param args arguments referenced by the format specifiers
* @return a formatted string
*/
public static String format(String fmt, Object[] args) {
StringBuffer buf = new StringBuffer();
while (true) {
int index = fmt.indexOf('%');
if (index < 0 || index == fmt.length()-1) {
buf.append(fmt);
break;
} else {
buf.append(fmt.substring(0, index));
char param = fmt.charAt(index+1);
if (param >= '0' && param <= '9') {
buf.append(toString(args[param-'0']));
} else {
buf.append(param);
}
fmt = fmt.substring(index+2);
}
}
return buf.toString();
}
/**
* Splits specified string around given characters.
* @param str string to split
* @param ch delimiter characters
* @param skipEmpty if true, empty strings are excluded
* @return the array of strings computed by splitting the string
* around given character
*/
public static String[] split(String str, char ch, boolean skipEmpty) {
ArrayList strings = new ArrayList();
int len = str.length();
int start = 0;
while (start < len) {
int end = str.indexOf(ch, start);
if (end < 0) end = len;
if (!skipEmpty || end - start > 1) {
strings.add(str.substring(start, end));
}
start = end+1;
}
String[] ret = new String[strings.size()];
strings.copyInto(ret);
return ret;
}
/** Returns hexadecimal character that represents the number. */
private static char hexchar(int i) {
return (char) (i <= 9 ? '0'+i : 'A'-10 + i);
}
/**
* Writes character to the buffer.
* If character is non-ASCII then it is escaped.
*/
private static void writeChar(char ch, StringBuffer buf) {
switch (ch) {
case '\n': buf.append("\\n"); break;
case '\r': buf.append("\\r"); break;
case '\t': buf.append("\\t"); break;
case '\0': buf.append("\\0"); break;
default:
if (ch >= ' ' && ch < 127) {
buf.append(ch);
} else {
buf.append("\\u")
.append(hexchar((ch >> 12) & 0xF))
.append(hexchar((ch >> 8) & 0xF))
.append(hexchar((ch >> 4) & 0xF))
.append(hexchar(ch & 0xF));
}
}
}
/** Converts array to a string and writes it to the buffer. */
static void arrayToString(Object a, ArrayList dejaVu, StringBuffer buf) {
buf.append('[');
// hack to not invoke 9 instanceof's
switch (a.getClass().getName().charAt(1)) {
case Arrays.AR_OBJECT: {
if (dejaVu.contains(a)) {
buf.append("[...]");
} else {
dejaVu.add(a);
Object[] aarray = (Object[])a;
int len = aarray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buildString(aarray[i], dejaVu, buf);
}
dejaVu.remove(a);
}
break;
}
case Arrays.AR_BOOLEAN: {
boolean[] zarray = (boolean[])a;
int len = zarray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(zarray[i] ? "true" : "false");
}
break;
}
case Arrays.AR_BYTE: {
byte[] barray = (byte[])a;
int len = barray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(barray[i]);
}
break;
}
case Arrays.AR_CHAR: {
char[] carray = (char[])a;
int len = carray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append('\'');
writeChar(carray[i], buf);
buf.append('\'');
}
break;
}
case Arrays.AR_SHORT: {
short[] sarray = (short[])a;
int len = sarray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(sarray[i]);
}
break;
}
case Arrays.AR_INT: {
int[] iarray = (int[])a;
int len = iarray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(iarray[i]);
}
break;
}
case Arrays.AR_LONG: {
long[] larray = (long[])a;
int len = larray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(larray[i]);
}
break;
}
case Arrays.AR_FLOAT: {
float[] farray = (float[])a;
int len = farray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(farray[i]);
}
break;
}
case Arrays.AR_DOUBLE: {
double[] darray = (double[])a;
int len = darray.length;
for (int i=0; i<len; i++) {
if (i != 0) buf.append(", ");
buf.append(darray[i]);
}
break;
}
}
buf.append(']');
}
static void buildString(Object a, ArrayList dejaVu, StringBuffer buf) {
if (a == null) {
buf.append("null");
return;
}
Class clz = a.getClass();
if (clz == ArrayList.class) {
((ArrayList)a).buildString(dejaVu, buf);
} else if (clz == HashMap.class) {
((HashMap)a).buildString(dejaVu, buf);
} else if (clz == String.class) {
buf.append('"');
String str = (String)a;
int len = str.length();
for (int i=0; i<len; i++) {
writeChar(str.charAt(i), buf);
}
buf.append('"');
} else if (clz.isArray()) {
arrayToString(a, dejaVu, buf);
} else {
buf.append(a);
}
}
/**
* Converts Alchemy object to a string.
*/
public static String toString(Object a) {
if (a == null) {
return "null";
} else if (a.getClass().isArray()) {
StringBuffer buf = new StringBuffer();
arrayToString(a, new ArrayList(), buf);
return buf.toString();
} else {
return a.toString();
}
}
}