package LBJ2.util;
import java.util.Arrays;
/**
* Represents a <code>String</code> by directly storing an encoding of that
* <code>String</code> in an array of <code>byte</code>s. This can save a
* lot of memory if all of the application's characters fit in a single byte
* when encoded by, for instance, UTF-8. In fact, the default encoding used
* by this class is UTF-8.
*
* @author Nick Rizzolo
**/
public class ByteString implements Cloneable, Comparable
{
/** The default character encoding for instances of this class. */
public static final String defaultEncoding = "UTF-8";
/** A byte string representing <code>""</code>. */
public static final ByteString emptyString =
new ByteString("", defaultEncoding);
/**
* Handles exceptions generated by unsupported encodings.
*
* @param e The exception.
**/
protected void handleEncodingException(Exception e) {
System.err.println(
"ERROR: Encoding \"" + encoding + "\" is not supported.");
e.printStackTrace();
System.exit(1);
}
/**
* Reads and returns a byte string from an input stream.
*
* @param in The input stream.
* @return The byte string.
**/
public static ByteString readByteString(ExceptionlessInputStream in) {
ByteString result = new ByteString(false);
result.read(in);
return result;
}
/**
* Reads and returns a byte string as written by a lexicon.
*
* @param in The input stream.
* @param i The assumed identifier. If no identifier is given in the
* input stream, the instantiated feature is given this
* identifier.
* @return The byte string.
**/
public static ByteString lexReadByteString(ExceptionlessInputStream in,
ByteString i) {
ByteString result = new ByteString(false);
result.lexRead(in, i);
return result;
}
/** The encoding method used by this instance. */
protected String encoding;
/** The encoded characters. */
protected byte[] value;
/**
* The hash code of the <code>String</code> decoding of this byte string.
**/
protected int hashCode;
/**
* For internal use only.
*
* @param b Dummy variable to make a new signature.
**/
protected ByteString(boolean b) { }
/** Creates an empty byte string. */
public ByteString() { this(""); }
/**
* Creates a byte string by using the default encoding to encode the
* specified string.
*
* @param s The string to encode.
**/
public ByteString(String s) { this(s, null); }
/**
* Creates a byte string by using the specified encoding to encode the
* specified string.
*
* @param s The string to encode.
* @param e The encoding method.
**/
public ByteString(String s, String e) {
encoding = e == null ? defaultEncoding : e.intern();
setValue(s);
}
/**
* Creates a byte string with the given encoding, which may involve
* converting the specified byte string's contents if the encodings differ.
*
* @param b The original byte string.
* @param e The new encoding.
**/
public ByteString(ByteString b, String e) {
encoding = e.intern();
if (b.encoding == encoding) {
value = b.value;
hashCode = b.hashCode;
}
else setValue(b.toString());
}
/** Returns the name of the encoding method of this byte string. */
public String getEncoding() { return encoding; }
/**
* Sets the value of this byte string to the byte encoding of the specified
* string.
*
* @param s The string to encode.
**/
public void setValue(String s) {
try { value = s.getBytes(encoding); }
catch (Exception e) { handleEncodingException(e); }
hashCode = s.hashCode();
}
/** Returns the length of {@link #value}. */
public int length() { return value.length; }
/**
* Returns the byte at index <code>i</code> of {@link #value}.
*
* @param i The index of the requested byte.
* @return The value of the requested byte.
**/
public byte getByte(int i) { return value[i]; }
/**
* Appends the encoding of the given string onto the existing encoding in
* this object. This operation changes the {@link #value} reference in
* this object.
*
* <p> <b>Warning:</b> Depending on the character encoding in use, this may
* introduce byte order markers into the middle of this object's byte
* array, which usually is not desired.
*
* @param s The string whose encoding will be appended.
* @return This object.
**/
public ByteString append(String s) {
hashCode = (toString() + s).hashCode();
byte[] v = null;
try { v = s.getBytes(encoding); }
catch (Exception e) { handleEncodingException(e); }
byte[] t = new byte[value.length + v.length];
System.arraycopy(value, 0, t, 0, value.length);
System.arraycopy(v, 0, t, value.length, v.length);
value = t;
return this;
}
/**
* Appends the encodings of all the given strings onto the existing
* encoding in this object. This operation changes the {@link #value}
* reference in this object.
*
* <p> <b>Warning:</b> Depending on the character encoding in use, this may
* introduce byte order markers into the middle of this object's byte
* array, which usually is not desired.
*
* @param s The strings whose encodings will be appended.
* @return This object.
**/
public ByteString append(String[] s) {
StringBuffer buffer = new StringBuffer(toString());
for (int i = 0; i < s.length; ++i) buffer.append(s[i]);
hashCode = buffer.toString().hashCode();
byte[][] v = new byte[s.length][];
int length = 0;
try {
for (int i = 0; i < v.length; ++i) {
v[i] = s[i].getBytes(encoding);
length += v[i].length;
}
}
catch (Exception e) { handleEncodingException(e); }
byte[] t = new byte[length];
length = value.length;
System.arraycopy(value, 0, t, 0, length);
for (int i = 0; i < v.length; ++i) {
System.arraycopy(v[i], 0, t, length, v[i].length);
length += v[i].length;
}
value = t;
return this;
}
/**
* Appends the string represented by the given byte string onto the
* existing content in this object. This operation changes the
* {@link #value} reference in this object.
*
* <p> <b>Warning:</b> Depending on the character encoding in use, this may
* introduce byte order markers into the middle of this object's byte
* array, which usually is not desired.
*
* @param b The string whose encoding will be appended.
* @return This object.
**/
public ByteString append(ByteString b) {
String s = b.toString();
hashCode = (toString() + s).hashCode();
if (encoding != b.encoding) return append(s);
byte[] t = new byte[value.length + b.value.length];
System.arraycopy(value, 0, t, 0, value.length);
System.arraycopy(b.value, 0, t, value.length, b.value.length);
value = t;
return this;
}
/**
* Appends the strings represented by the given byte strings onto the
* existing content in this object. This operation changes the
* {@link #value} reference in this object.
*
* <p> <b>Warning:</b> Depending on the character encoding in use, this may
* introduce byte order markers into the middle of this object's byte
* array, which usually is not desired.
*
* @param b The strings whose encodings will be appended.
* @return This object.
**/
public ByteString append(ByteString[] b) {
int length = 0;
StringBuffer buffer = new StringBuffer(toString());
for (int i = 0; i < b.length; ++i) {
String s = b[i].toString();
buffer.append(s);
if (encoding != b[i].encoding)
b[i] = new ByteString(s, encoding);
length += b[i].value.length;
}
hashCode = buffer.toString().hashCode();
byte[] t = new byte[length];
length = value.length;
System.arraycopy(value, 0, t, 0, length);
for (int i = 0; i < b.length; ++i) {
System.arraycopy(b[i].value, 0, t, length, b[i].value.length);
length += b[i].value.length;
}
value = t;
return this;
}
/**
* If the argument object is a byte string, this object's byte array and
* the argument object's byte array are compared lexicographically.
* Otherwise, -1 is returned. Of course, this operation is considerably
* more expensive if the two strings do not share the same encoding.
**/
public int compareTo(Object o) {
if (!(o instanceof ByteString)) return -1;
ByteString b = (ByteString) o;
if (encoding != b.encoding) return toString().compareTo(b.toString());
int n1 = value.length;
int n2 = b.value.length;
int n = Math.min(n1, n2);
for (int i = 0; i < n; ++i) {
byte b1 = value[i];
byte b2 = b.value[i];
if (b1 != b2) return b1 - b2;
}
return n1 - n2;
}
/** Returns a hash code for this object. */
public int hashCode() { return hashCode; }
/**
* Two byte strings are equivalent if they encode the same string. This
* operation is more expensive if the two byte strings use different
* encodings.
**/
public boolean equals(Object o) {
if (o instanceof String) return toString().equals(o);
if (!(o instanceof ByteString)) return false;
ByteString b = (ByteString) o;
if (encoding != b.encoding) return toString().equals(b.toString());
if (value.length != b.value.length) return false;
for (int i = 0; i < value.length; ++i)
if (value[i] != b.value[i]) return false;
return true;
}
/**
* Writes a complete binary representation of this byte string.
*
* @param out The output stream.
**/
public void write(ExceptionlessOutputStream out) {
out.writeString(encoding);
out.writeInt(hashCode);
out.writeBytes(value);
}
/**
* Reads in a complete binary representation of a byte string.
*
* @param in The input stream.
**/
public void read(ExceptionlessInputStream in) {
encoding = in.readString().intern();
hashCode = in.readInt();
value = in.readBytes();
}
/**
* Writes a binary representation of this byte string intended for use by
* a lexicon, omitting redundant information when possible.
*
* @param out The output stream.
* @param i The assumed identifier string. This byte strings value,
* encoding, or both may be omitted if they are equivalent to
* <code>i</code>.
**/
public void lexWrite(ExceptionlessOutputStream out, ByteString i) {
if (i != null && encoding == i.encoding && Arrays.equals(value, i.value))
out.writeBytes(null);
else {
out.writeBytes(value);
out.writeInt(hashCode);
out.writeString(i != null && encoding == i.encoding ? null : encoding);
}
}
/**
* Reads the representation of a byte string as stored by a lexicon,
* overwriting the data in this object.
*
* <p> This method is appropriate for reading byte strings as written by
* {@link #lexWrite(ExceptionlessOutputStream,ByteString)}.
*
* @param in The input stream.
* @param i The assumed identifier string.
**/
public void lexRead(ExceptionlessInputStream in, ByteString i) {
value = in.readBytes();
if (value == null) {
value = i.value;
hashCode = i.hashCode;
encoding = i.encoding;
}
else {
hashCode = in.readInt();
encoding = in.readString();
if (encoding == null) encoding = i.encoding;
else encoding = encoding.intern();
}
}
/** Returns a decoded string. */
public String toString() {
try { return new String(value, encoding); }
catch (Exception e) { handleEncodingException(e); }
return null;
}
/**
* Returns a shallow copy of this string. Note that this class does not
* provide any operations that modify the contents of the objects
* referenced by its fields, making a deep clone unnecessary.
* ({@link #append(String)}, {@link #append(ByteString)}, and
* {@link #setValue(String)} modify the {@link #value} field itself, but
* the reference is merely replaced; the contents of the original array do
* not change.)
**/
public Object clone() {
Object result = null;
try { result = super.clone(); }
catch (Exception e) {
System.err.println("Can't clone byte string '" + this + "':");
e.printStackTrace();
}
return result;
}
}