package com.sleepycat.util;
import de.ovgu.cide.jakutil.*;
/**
* UTF operations with more flexibility than is provided by DataInput and
* DataOutput.
* @author Mark Hayes
*/
public class UtfOps {
private static byte[] EMPTY_BYTES={};
private static String EMPTY_STRING="";
/**
* Returns the byte length of a null terminated UTF string, not including
* the terminator.
* @param bytes the data containing the UTF string.
* @param offset the beginning of the string the measure.
* @throws IndexOutOfBoundsException if no zero terminator is found.
* @return the number of bytes.
*/
public static int getZeroTerminatedByteLength( byte[] bytes, int offset) throws IndexOutOfBoundsException {
int len=0;
while (bytes[offset++] != 0) {
len++;
}
return len;
}
/**
* Returns the byte length of the UTF string that would be created by
* converting the given characters to UTF.
* @param chars the characters that would be converted.
* @return the byte length of the equivalent UTF data.
*/
public static int getByteLength( char[] chars){
return getByteLength(chars,0,chars.length);
}
/**
* Returns the byte length of the UTF string that would be created by
* converting the given characters to UTF.
* @param chars the characters that would be converted.
* @param offset the first character to be converted.
* @param length the number of characters to be converted.
* @return the byte length of the equivalent UTF data.
*/
public static int getByteLength( char[] chars, int offset, int length){
int len=0;
length+=offset;
for (int i=offset; i < length; i++) {
int c=chars[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
len++;
}
else if (c > 0x07FF) {
len+=3;
}
else {
len+=2;
}
}
return len;
}
/**
* Returns the number of characters represented by the given UTF string.
* @param bytes the UTF string.
* @return the number of characters.
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int getCharLength( byte[] bytes) throws IllegalArgumentException, IndexOutOfBoundsException {
return getCharLength(bytes,0,bytes.length);
}
/**
* Returns the number of characters represented by the given UTF string.
* @param bytes the data containing the UTF string.
* @param offset the first byte to be converted.
* @param length the number of byte to be converted.
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int getCharLength( byte[] bytes, int offset, int length) throws IllegalArgumentException, IndexOutOfBoundsException {
int charCount=0;
length+=offset;
while (offset < length) {
switch ((bytes[offset] & 0xff) >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
offset++;
break;
case 12:
case 13:
offset+=2;
break;
case 14:
offset+=3;
break;
default :
throw new IllegalArgumentException();
}
charCount++;
}
return charCount;
}
/**
* Converts byte arrays into character arrays.
* @param bytes the source byte data to convert
* @param byteOffset the offset into the byte array at which
* to start the conversion
* @param chars the destination array
* @param charOffset the offset into chars at which to begin the copy
* @param len the amount of information to copy into chars
* @param isByteLen if true then len is a measure of bytes, otherwise
* len is a measure of characters
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static int bytesToChars(byte[] bytes,int byteOffset,char[] chars,int charOffset,int len,boolean isByteLen) throws IllegalArgumentException, IndexOutOfBoundsException {
int char1, char2, char3;
len+=isByteLen ? byteOffset : charOffset;
while ((isByteLen ? byteOffset : charOffset) < len) {
char1=bytes[byteOffset++] & 0xff;
switch ((char1 & 0xff) >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
chars[charOffset++]=(char)char1;
break;
case 12:
case 13:
char2=bytes[byteOffset++];
if ((char2 & 0xC0) != 0x80) {
throw new IllegalArgumentException();
}
chars[charOffset++]=(char)(((char1 & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
char2=bytes[byteOffset++];
char3=bytes[byteOffset++];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) throw new IllegalArgumentException();
chars[charOffset++]=(char)(((char1 & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
break;
default :
throw new IllegalArgumentException();
}
}
return byteOffset;
}
/**
* Converts character arrays into byte arrays.
* @param chars the source character data to convert
* @param charOffset the offset into the character array at which
* to start the conversion
* @param bytes the destination array
* @param byteOffset the offset into bytes at which to begin the copy
* @param charLength the length of characters to copy into bytes
*/
public static void charsToBytes(char[] chars,int charOffset,byte[] bytes,int byteOffset,int charLength){
charLength+=charOffset;
for (int i=charOffset; i < charLength; i++) {
int c=chars[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
bytes[byteOffset++]=(byte)c;
}
else if (c > 0x07FF) {
bytes[byteOffset++]=(byte)(0xE0 | ((c >> 12) & 0x0F));
bytes[byteOffset++]=(byte)(0x80 | ((c >> 6) & 0x3F));
bytes[byteOffset++]=(byte)(0x80 | ((c >> 0) & 0x3F));
}
else {
bytes[byteOffset++]=(byte)(0xC0 | ((c >> 6) & 0x1F));
bytes[byteOffset++]=(byte)(0x80 | ((c >> 0) & 0x3F));
}
}
}
/**
* Converts byte arrays into strings.
* @param bytes the source byte data to convert
* @param offset the offset into the byte array at which
* to start the conversion
* @param length the number of bytes to be converted.
* @return the string.
* @throws IndexOutOfBoundsException if a UTF character sequence at the end
* of the data is not complete.
* @throws IllegalArgumentException if an illegal UTF sequence is
* encountered.
*/
public static String bytesToString(byte[] bytes,int offset,int length) throws IllegalArgumentException, IndexOutOfBoundsException {
if (length == 0) return EMPTY_STRING;
int charLen=UtfOps.getCharLength(bytes,offset,length);
char[] chars=new char[charLen];
UtfOps.bytesToChars(bytes,offset,chars,0,length,true);
return new String(chars,0,charLen);
}
/**
* Converts strings to byte arrays.
* @param string the string to convert.
* @return the UTF byte array.
*/
public static byte[] stringToBytes(String string){
if (string.length() == 0) return EMPTY_BYTES;
char[] chars=string.toCharArray();
byte[] bytes=new byte[UtfOps.getByteLength(chars)];
UtfOps.charsToBytes(chars,0,bytes,0,chars.length);
return bytes;
}
}