/*
* Part of the CCNx Java Library.
*
<<<<<<< HEAD
* Copyright (C) 2008, 2009, 2010, 2011 Palo Alto Research Center, Inc.
=======
* Copyright (C) 2008-2011 Palo Alto Research Center, Inc.
>>>>>>> 0f1ce5d4dba1b9f769b4a2edcbf8583543643287
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. You should have received
* a copy of the GNU Lesser General Public License along with this library;
* if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.ccnx.ccn.impl.encoding;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.logging.Level;
import org.ccnx.ccn.impl.support.DataUtils;
import org.ccnx.ccn.impl.support.Log;
/**
* The ccnb compressed binary XML codec. This class contains utility functions used by
* BinaryXMLEncoder and BinaryXMLDecoder as well as setup to use this codec with XMLCodecFactory.
*
* Ccnb encoding uses a dictionary to turn tag and attribute names into short
* binary identifiers, and uses a compressed encoding for those identifiers and the lengths
* of atomic UTF-8 and binary data. For easy encoding & decoding there are no lengths of elements;
* this means encoding can be done as a single pass because the length of an encoded child
* element does not need to be known in order to write the start of the parent element.
*
* The possible type tags:
* - dtag: binary tag encoded using a dictionary
* - dattr: binary attribute encoded using a dictionary
* - tag: UTF-8 encoded tag not found in dictionary
* - attribute: UTF-8 encoded attribute not found in dictionary
* - utf8 string
* - binary data
*
* See the protocol documentation for more details of the ccnb format.
*/
public final class BinaryXMLCodec implements XMLCodec {
/**
* Class for managing the paired type/value representation used to encode tags
* and content lengths.
*/
public final static class TypeAndVal {
protected int _type;
protected long _val;
public TypeAndVal(int type, long val) {
_type = type;
_val = val;
}
int type() { return _type; }
long val() { return _val; }
}
public static final String CODEC_NAME = "Binary";
/**
* Values encoded in type and value composites.
**/
/**
* // starts composite extension - value is subtype
*/
public static final byte XML_EXT = 0x00;
/**
* // starts composite - value is tagnamelen-1
*/
public static final byte XML_TAG = 0x01;
/**
* // starts composite - value is tagdict index
*/
public static final byte XML_DTAG = 0x02;
/**
* // attribute - value is attrnamelen-1, attribute value follows
*/
public static final byte XML_ATTR = 0x03;
/**
* // attribute value is attrdict index
*/
public static final byte XML_DATTR = 0x04;
/**
* // opaque binary data - value is byte count
*/
public static final byte XML_BLOB = 0x05;
/**
* // UTF-8 encoded character data - value is byte count
*/
public static final byte XML_UDATA = 0x06;
/**
* // end element
*/
public static final byte XML_CLOSE = 0x0;
/**
* // <?name:U value:U?>
*/
public static final byte XML_SUBTYPE_PROCESSING_INSTRUCTIONS = 16;
/**
* Masks for bitwise processing. Java's bitwise operations operate
* on ints, so save effort of promotion.
*/
public static final int XML_TT_BITS = 3;
public static final int XML_TT_MASK = ((1 << XML_TT_BITS) - 1);
public static final int XML_TT_VAL_BITS = XML_TT_BITS + 1;
public static final int XML_TT_VAL_MASK = ((1 << (XML_TT_VAL_BITS)) - 1);
public static final int XML_REG_VAL_BITS = 7;
public static final int XML_REG_VAL_MASK = ((1 << XML_REG_VAL_BITS) - 1);
public static final int XML_TT_NO_MORE = (1 << XML_REG_VAL_BITS); // 0x80
public static final int BYTE_MASK = 0xFF;
public static final int LONG_BYTES = 8;
public static final int LONG_BITS = 64;
private static final long bits_11 = 0x0000007FFL;
private static final long bits_18 = 0x00003FFFFL;
private static final long bits_32 = 0x0FFFFFFFFL;
/**
* The name of this codec. Used to generate XMLEncoder and XMLDecoder instances with XMLCodecFactory.
* @return the codec name.
*/
public final static String codecName() { return CODEC_NAME; }
/**
* Encode a type identifier (from the set listed above) and an integer value
* together in a composite encoding.
* Value is encoded in the first several bytes; with the tag encoded in the
* last three bits. The encoding of value is variable length in the bottom
* 7 bits of every byte except for the last one, where it is in the next to top
* 4 bits; the high order bit is set on every byte where there are more bytes
* to follow.
*
* @param type the type value to encode
* @param val Positive integer, potentially of any length, allow only longs
* here.
* @param buf the buffer to encode into
* @param offset the offset into buf at which to start encoding
* @return the number of bytes used to encode.
* @deprecated Use encodeTypeAndVal(final int type, final long value, final OutputStream ostream)
*/
@Deprecated
public static int encodeTypeAndVal(int type, long val, byte [] buf, int offset) {
if ((type > XML_UDATA) || (type < 0) || (val < 0)) {
throw new IllegalArgumentException("Tag and value must be positive, and tag valid.");
}
// Encode backwards. Calculate how many bytes we need:
int numEncodingBytes = numEncodingBytes(val);
if ((offset + numEncodingBytes) > buf.length) {
throw new IllegalArgumentException("Buffer space of " + (buf.length-offset) +
" bytes insufficient to hold " +
numEncodingBytes + " of encoded type and value.");
}
// Bottom 4 bits of val go in last byte with tag.
buf[offset + numEncodingBytes - 1] =
(byte)(BYTE_MASK &
(((XML_TT_MASK & type) |
((XML_TT_VAL_MASK & val) << XML_TT_BITS))) |
XML_TT_NO_MORE); // set top bit for last byte
val = val >>> XML_TT_VAL_BITS;;
// Rest of val goes into preceding bytes, 7 bits per byte, top bit
// is "more" flag.
int i = offset + numEncodingBytes - 2;
while ((0 != val) && (i >= offset)) {
buf[i] = (byte)(BYTE_MASK &
(val & XML_REG_VAL_MASK)); // leave top bit unset
val = val >>> XML_REG_VAL_BITS;
--i;
}
if (val != 0) {
Log.warning(Log.FAC_ENCODING, "This should not happen: miscalculated encoding length, have " + val + " left.");
}
return numEncodingBytes;
}
/**
* Convenience method. Encodes type and val into fixed buffer using encodeTypeAndVal(int, long, byte [], int)
* and returns the result.
* @param type the type value to encode
* @param val Positive integer, potentially of any length, allow only longs
* here.
* @return the encoded type and value
* @deprecated Use encodeTypeAndVal(final int type, final long value, final OutputStream ostream)
*/
@Deprecated
public static byte [] encodeTypeAndVal(int type, long val) {
byte [] buf = new byte[numEncodingBytes(val)];
encodeTypeAndVal(type, val, buf, 0);
return buf;
}
/**
* Convenience method. Encodes type and val into output stream using encodeTypeAndVal(int, long, byte [], int)
* and returns the number of bytes encoded.
* @param tag the type value to encode
* @param val Positive integer, potentially of any length, allow only longs
* here.
* @param ostream the stream to encode to
* @return the number of bytes encoded
*/
public static int encodeTypeAndVal(final int type, final long value, final OutputStream ostream) throws IOException {
/*
We exploit the fact that encoding is done from the right, so this actually means
there is a deterministic encoding from a long to a Type/Value pair:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|ABCD.EFGH|IJKL.MNOP|QRST.UVWX|YZ01.2345|6789.abcd|efgh.ijkl|mnop.qrst|uvwx.yz@#
60> 53> 46> 39> 32> 25> 18> 11> 4>
|_000.ABCD|_EFG.HIJK|_LMN.OPQR|_STU.VWXY|_Z01.2345|_678.9abc|_defg.hij|_klm.nopq|_rst.uvwx|_yz@#___
What we want to do is compute the result in MSB order and write it directly
to the channel without any intermediate form.
*/
int bits;
int count = 0;
// once we start writing bits, we keep writing bits even if they are "0"
boolean writing = false;
// a few heuristic to catch the small-bit length patterns
if( value < 0 || value > 15 ) {
int start = 60;
if( 0 <= value ) {
if( value < bits_11 )
start = 4;
else if( value < bits_18 )
start = 11;
else if( value < bits_32 )
start = 25;
}
for( int i = start; i >= 4; i -= 7) {
bits = (int) (value >>> i) & BinaryXMLCodec.XML_REG_VAL_MASK;
if( bits != 0 || writing ) {
ostream.write(bits);
count++;
writing = true;
}
}
}
// Explicit computation of the bottom byte
bits = type & BinaryXMLCodec.XML_TT_MASK;
final int bottom4 = (int) value & BinaryXMLCodec.XML_TT_VAL_MASK;
bits |= bottom4 << BinaryXMLCodec.XML_TT_BITS;
// the bottom byte always has the NO_MORE flag
bits |= BinaryXMLCodec.XML_TT_NO_MORE;
ostream.write(bits);
count++;
// byte [] encoding = encodeTypeAndVal(tag, val);
// ostream.write(encoding);
return count;
}
/**
* Decodes a type and value pair from an InputStream.
* @param istream stream to read from
* @return a decoded type and value
* @throws IOException if there is an error reading or decoding the type and value pair
*/
public static TypeAndVal decodeTypeAndVal(InputStream istream) throws IOException {
int next;
int type = -1;
long val = 0;
boolean more = true;
do {
next = istream.read();
if (next < 0) {
return null; // at EOF
}
// If leading byte is 0, we are at an end marker, not a start marker;
// last byte of TV will have type and high bit set. Previous bytes
// are packed number representation, so leading 0 not legal. Either
// error or we're just peeking.
if ((0 == next) && (0 == val)) {
return null;
}
more = (0 == (next & XML_TT_NO_MORE));
if (more) {
val = val << XML_REG_VAL_BITS;
val |= (next & XML_REG_VAL_MASK);
} else {
// last byte
type = next & XML_TT_MASK;
val = val << XML_TT_VAL_BITS;
val |= ((next >>> XML_TT_BITS) & XML_TT_VAL_MASK);
}
} while (more);
return new TypeAndVal(type, val);
}
/**
* Decodes a type and value pair from an InputStream, and then resets that
* stream at its original position.
* @param istream stream to read from
* @return a decoded type and value
* @throws IOException if there is an error reading or decoding the type and value pair
*/
public static TypeAndVal peekTypeAndVal(InputStream istream) throws IOException {
TypeAndVal tv = null;
istream.mark(LONG_BYTES*2);
try {
tv = decodeTypeAndVal(istream);
} finally {
istream.reset();
}
return tv;
}
/**
* Helper method, return the number of significant bits of x.
*
* Deprecated; unused here, but left since it is public.
*
* @param x number we want to know bit length of
* @return bit length of x
*/
public static int numbits(long x) {
if (0 == x)
return 0;
return (LONG_BITS - Long.numberOfLeadingZeros(x));
}
final static int ENCODING_LIMIT_1_BYTE = ((1 << (XML_TT_VAL_BITS)) - 1);
final static int ENCODING_LIMIT_2_BYTES = ((1 << (XML_TT_VAL_BITS + XML_REG_VAL_BITS)) - 1);
final static int ENCODING_LIMIT_3_BYTES = ((1 << (XML_TT_VAL_BITS + 2 * XML_REG_VAL_BITS)) - 1);
public static int numEncodingBytes(long x) {
if (x <= ENCODING_LIMIT_1_BYTE) return (1);
if (x <= ENCODING_LIMIT_2_BYTES) return (2);
if (x <= ENCODING_LIMIT_3_BYTES) return (3);
int numbytes = 1;
// Last byte gives you XML_TT_VAL_BITS
// Remainder each give you XML_REG_VAL_BITS
x = x >>> XML_TT_VAL_BITS;
while (x != 0) {
numbytes++;
x = x >>> XML_REG_VAL_BITS;
}
return (numbytes);
}
/**
* Decodes a binary blob (encoded binary content) from an InputStream.
* Expects to read a XML_BLOB type marker, and then the data. Has to peek
* to cope with 0-length blob. Inline the peek to avoid unneeded resets.
* @param istream stream to read from
* @return returns decoded blob (binary content)
* @throws IOException if stream cannot be read, decoded or reset
*/
public static byte [] decodeBlob(InputStream istream) throws IOException {
istream.mark(LONG_BYTES*2);
TypeAndVal tv = decodeTypeAndVal(istream);
if ((null == tv) || (XML_BLOB != tv.type())) { // if we just have closers left, will get back null
if (Log.isLoggable(Log.FAC_ENCODING, Level.FINEST))
Log.finest(Log.FAC_ENCODING, "Expected BLOB, got " + ((null == tv) ? " not a tag " : tv.type()) + ", assuming elided 0-length blob.");
istream.reset();
return new byte[0];
}
return decodeBlob(istream, (int)tv.val());
}
/**
* Decodes a binary blob (encoded binary content) from an InputStream
* when we have already read the BLOB tag and length, and just need to read the content.
* @param istream stream to read from
* @param blobLength the length of the binary content to read in bytes
* @return returns decoded blob (binary content)
* @throws IOException if stream cannot be read or decoded
*/
public static byte [] decodeBlob(InputStream istream, int blobLength) throws IOException {
byte [] bytes = new byte[blobLength];
int count = 0;
while (true) {
count += istream.read(bytes, count, (blobLength - count));
//Library.info("read "+count+" bytes out of "+blobLength+" in decodeBlob");
if (count < bytes.length) {
//we couldn't read enough.. need to try to read all of the bytes
//loop again...
//should we add a max number of tries?
} else if (count == bytes.length) {
//we are done reading! return now.
return bytes;
} else {
//we somehow read more than we should have...
throw new IOException("Expected to read " + bytes.length +
" bytes of data, read: " + count);
}
}
}
/**
* Encodes a binary BLOB (binary content) to an output stream.
* @param ostream the stream to write to
* @param blob the binary content to write
* @throws IOException if there is an error encoding or writing the data
*/
public static void encodeBlob(OutputStream ostream, byte [] blob) throws IOException {
encodeBlob(ostream, blob, 0, ((null == blob) ? 0 : blob.length));
}
/**
* Encodes a binary BLOB (binary content) to an output stream.
* @param ostream the stream to write to
* @param blob the binary content to write
* @param offset the offset into blob at which to start encoding data
* @param length the number of bytes of blob to encode
* @throws IOException if there is an error encoding or writing the data
*/
public static void encodeBlob(OutputStream ostream, byte [] blob, int offset, int length) throws IOException {
// We elide the encoding of a 0-length blob
if ((null == blob) || (length == 0)) {
if (Log.isLoggable(Log.FAC_ENCODING, Level.FINER))
Log.finer(Log.FAC_ENCODING, "Eliding 0-length blob.");
return;
}
encodeTypeAndVal(XML_BLOB, length, ostream);
if (null != blob) {
ostream.write(blob, offset, length);
}
}
/**
* Decodes a UTF-8 string element's content from an InputStream.
* Expects to read a XML_UDATA type marker, and then the data. Has to peek
* to cope with 0-length ustring. Inline the peek to avoid unneeded resets.
* This will not decode a TAG or ATTR ustring.
* @param istream stream to read from
* @return returns decoded String
* @throws IOException if stream cannot be read, decoded or reset
*/
public static String decodeUString(InputStream istream) throws IOException {
istream.mark(LONG_BYTES*2);
TypeAndVal tv = decodeTypeAndVal(istream);
if ((null == tv) || (XML_UDATA != tv.type())) { // if we just have closers left, will get back null
if (Log.isLoggable(Log.FAC_ENCODING, Level.FINEST))
Log.finest(Log.FAC_ENCODING, "Expected UDATA, got " + ((null == tv) ? " not a tag " : tv.type()) + ", assuming elided 0-length blob.");
istream.reset();
return "";
}
return decodeUString(istream, (int)tv.val());
}
/**
* Decodes a UTF-8 string element's content from an InputStream
* when we've read the type indicator (which could be UDATA, or TAG, or ATTR)
* and just need to get the data. Assumes caller will cope with the fact that
* TAGs and ATTRs have encoded lengths that are one byte shorter than their
* actual data length, and that the length passed in here is actually the
* length of data we should read.
* @param istream stream to read from
* @param byteLength length of element to read
* @return returns the decoded String
* @throws IOException if stream cannot be read or decoded
*/
public static String decodeUString(InputStream istream, int byteLength) throws IOException {
byte [] stringBytes = decodeBlob(istream, byteLength);
return DataUtils.getUTF8StringFromBytes(stringBytes);
}
/**
* Encode a non-TAG, non-ATTR UString (UTF-8 String).
* @param ostream stream to encode to
* @param ustring String to encode
* @throws IOException if there is an error encoding the data or writing to the stream
*/
public static void encodeUString(OutputStream ostream, String ustring) throws IOException {
encodeUString(ostream, ustring, XML_UDATA);
}
/**
* Encode the special case the UStrings that represent TAG and ATTR.
* The lengths of these strings are represented as length-1, as they
* can never be 0 length. The decrement is done here, rather than
* in encodeTypeAndVal.
* @param ostream the stream to encode to
* @param ustring the String containing the TAG or ATTR value. If null or a zero length string is
* passed in then nothing is written to the output.
* @param type the type to encode (XML_TAG or XML_ATTR)
* @throws IOException if there is an error encoding or writing the data
**/
public static void encodeUString(OutputStream ostream, String ustring, byte type) throws IOException {
// We elide the encoding of a 0-length UString
if ((null == ustring) || (ustring.length() == 0)) {
if (Log.isLoggable(Log.FAC_ENCODING, Level.FINER))
Log.finer(Log.FAC_ENCODING, "Eliding 0-length UString.");
return;
}
byte [] strBytes = DataUtils.getBytesFromUTF8String(ustring);
encodeTypeAndVal(type,
(((type == XML_TAG) || (type == XML_ATTR)) ?
(strBytes.length-1) :
strBytes.length), ostream);
ostream.write(strBytes);
}
}