/*
* BeDecoder.java
*
* Created on May 30, 2003, 2:44 PM
* Copyright (C) 2003, 2004, 2005, 2006 Aelitis, All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* AELITIS, SAS au capital de 46,603.30 euros
* 8 Allee Lenotre, La Grille Royale, 78600 Le Mesnil le Roi, France.
*/
package org.gudy.azureus2.core3.util;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* A set of utility methods to decode a bencoded array of byte into a Map.
* integer are represented as Long, String as byte[], dictionnaries as Map, and list as List.
*
* @author TdC_VgA
*
*/
public class BDecoder
{
private static final int MAX_BYTE_ARRAY_SIZE = 8*1024*1024;
private static final int MAX_MAP_KEY_SIZE = 64*1024;
private static final boolean TRACE = false;
private boolean recovery_mode;
public static Map
decode(
byte[] data )
throws IOException
{
return( new BDecoder().decodeByteArray( data ));
}
public static Map
decode(
byte[] data,
int offset,
int length )
throws IOException
{
return( new BDecoder().decodeByteArray( data, offset, length ));
}
public static Map
decode(
BufferedInputStream is )
throws IOException
{
return( new BDecoder().decodeStream( is ));
}
public
BDecoder()
{
}
public Map
decodeByteArray(
byte[] data)
throws IOException
{
return( decode(new BDecoderInputStreamArray(data)));
}
public Map
decodeByteArray(
byte[] data,
int offset,
int length )
throws IOException
{
return( decode(new BDecoderInputStreamArray(data, offset, length )));
}
public Map
decodeStream(
BufferedInputStream data )
throws IOException
{
return decodeStream(data, true);
}
public Map
decodeStream(
BufferedInputStream data,
boolean internKeys)
throws IOException
{
Object res = decodeInputStream(data, 0, internKeys);
if ( res == null ){
throw( new BEncodingException( "BDecoder: zero length file" ));
}else if ( !(res instanceof Map )){
throw( new BEncodingException( "BDecoder: top level isn't a Map" ));
}
return((Map)res );
}
private Map
decode(
InputStream data )
throws IOException
{
Object res = decodeInputStream(data, 0, true);
if ( res == null ){
throw( new BEncodingException( "BDecoder: zero length file" ));
}else if ( !(res instanceof Map )){
throw( new BEncodingException( "BDecoder: top level isn't a Map" ));
}
return((Map)res );
}
// reuseable buffer for keys, recursion is not an issue as this is only a temporary buffer that gets converted into a string immediately
private ByteBuffer keyBytesBuffer = ByteBuffer.allocate(32);
private Object
decodeInputStream(
InputStream dbis,
int nesting,
boolean internKeys)
throws IOException
{
if (nesting == 0 && !dbis.markSupported()) {
throw new IOException("InputStream must support the mark() method");
}
//set a mark
dbis.mark(Integer.MAX_VALUE);
//read a byte
int tempByte = dbis.read();
//decide what to do
switch (tempByte) {
case 'd' :
//create a new dictionary object
Map tempMap = new LightHashMap();
try{
//get the key
while (true) {
dbis.mark(Integer.MAX_VALUE);
tempByte = dbis.read();
if(tempByte == 'e' || tempByte == -1)
break; // end of map
dbis.reset();
// decode key strings manually so we can reuse the bytebuffer
int keyLength = (int)getNumberFromStream(dbis, ':');
ByteBuffer keyBytes;
if(keyLength < keyBytesBuffer.capacity())
{
keyBytes = keyBytesBuffer;
keyBytes.position(0).limit(keyLength);
} else {
keyBytes = keyBytesBuffer = ByteBuffer.allocate(keyLength);
}
getByteArrayFromStream(dbis, keyLength, keyBytes.array());
if ( keyLength > MAX_MAP_KEY_SIZE ){
String msg = "dictionary key is too large, max=" + MAX_MAP_KEY_SIZE + ": value=" + new String( keyBytes.array(), 0, 128 );
System.err.println( msg );
throw( new IOException( msg ));
}
CharBuffer cb = Constants.BYTE_CHARSET.decode(keyBytes);
String key = new String(cb.array(),0,cb.limit());
// keys often repeat a lot - intern to save space
// if (internKeys)
// key = StringInterner.intern( key );
//decode value
Object value = decodeInputStream(dbis,nesting+1,internKeys);
// value interning is too CPU-intensive, let's skip that for now
/*if(value instanceof byte[] && ((byte[])value).length < 17)
value = StringInterner.internBytes((byte[])value);*/
if ( TRACE ){
System.out.println( key + "->" + value + ";" );
}
// recover from some borked encodings that I have seen whereby the value has
// not been encoded. This results in, for example,
// 18:azureus_propertiesd0:e
// we only get null back here if decoding has hit an 'e' or end-of-file
// that is, there is no valid way for us to get a null 'value' here
if ( value == null ){
System.err.println( "Invalid encoding - value not serialsied for '" + key + "' - ignoring" );
break;
}
if ( tempMap.put( key, value) != null ){
// Debug.out( "BDecoder: key '" + key + "' already exists!" );
}
}
/*
if ( tempMap.size() < 8 ){
tempMap = new CompactMap( tempMap );
}*/
dbis.mark(Integer.MAX_VALUE);
tempByte = dbis.read();
dbis.reset();
if ( nesting > 0 && tempByte == -1 ){
throw( new BEncodingException( "BDecoder: invalid input data, 'e' missing from end of dictionary"));
}
}catch( Throwable e ){
if ( !recovery_mode ){
if ( e instanceof IOException ){
throw((IOException)e);
}
// throw( new IOException( Debug.getNestedExceptionMessage(e)));
}
}
if (tempMap instanceof LightHashMap)
((LightHashMap) tempMap).compactify(-0.9f);
//return the map
return tempMap;
case 'l' :
//create the list
ArrayList tempList = new ArrayList();
try{
//create the key
Object tempElement = null;
while ((tempElement = decodeInputStream(dbis, nesting+1, internKeys)) != null) {
//add the element
tempList.add(tempElement);
}
tempList.trimToSize();
dbis.mark(Integer.MAX_VALUE);
tempByte = dbis.read();
dbis.reset();
if ( nesting > 0 && tempByte == -1 ){
throw( new BEncodingException( "BDecoder: invalid input data, 'e' missing from end of list"));
}
}catch( Throwable e ){
if ( !recovery_mode ){
if ( e instanceof IOException ){
throw((IOException)e);
}
// throw( new IOException( Debug.getNestedExceptionMessage(e)));
}
}
//return the list
return tempList;
case 'e' :
case -1 :
return null;
case 'i' :
return Long.valueOf(getNumberFromStream(dbis, 'e'));
case '0' :
case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
case '6' :
case '7' :
case '8' :
case '9' :
//move back one
dbis.reset();
//get the string
return getByteArrayFromStream(dbis);
default :{
int rem_len = dbis.available();
if ( rem_len > 256 ){
rem_len = 256;
}
byte[] rem_data = new byte[rem_len];
dbis.read( rem_data );
throw( new BEncodingException(
"BDecoder: unknown command '" + tempByte + ", remainder = " + new String( rem_data )));
}
}
}
/*
private long getNumberFromStream(InputStream dbis, char parseChar) throws IOException {
StringBuffer sb = new StringBuffer(3);
int tempByte = dbis.read();
while ((tempByte != parseChar) && (tempByte >= 0)) {
sb.append((char)tempByte);
tempByte = dbis.read();
}
//are we at the end of the stream?
if (tempByte < 0) {
return -1;
}
String str = sb.toString();
// support some borked impls that sometimes don't bother encoding anything
if ( str.length() == 0 ){
return( 0 );
}
return Long.parseLong(str);
}
*/
/** only create the array once per decoder instance (no issues with recursion as it's only used in a leaf method)
*/
private final char[] numberChars = new char[32];
private long
getNumberFromStream(
InputStream dbis,
char parseChar)
throws IOException
{
int tempByte = dbis.read();
int pos = 0;
while ((tempByte != parseChar) && (tempByte >= 0)) {
numberChars[pos++] = (char)tempByte;
if ( pos == numberChars.length ){
throw( new NumberFormatException( "Number too large: " + new String(numberChars,0,pos) + "..." ));
}
tempByte = dbis.read();
}
//are we at the end of the stream?
if (tempByte < 0) {
return -1;
}else if ( pos == 0 ){
// support some borked impls that sometimes don't bother encoding anything
return(0);
}
try{
return( parseLong( numberChars, 0, pos ));
}catch( NumberFormatException e ){
String temp = new String( numberChars, 0, pos );
try{
double d = Double.parseDouble( temp );
long l = (long)d;
// Debug.out( "Invalid number '" + temp + "' - decoding as " + l + " and attempting recovery" );
return( l );
}catch( Throwable f ){
}
throw( e );
}
}
// This is similar to Long.parseLong(String) source
// It is also used in projects external to azureus2/azureus3 hence it is public
public static long
parseLong(
char[] chars,
int start,
int length )
{
if ( length > 0 ){
// Short Circuit: We don't support octal parsing, so if it
// starts with 0, it's 0
if (chars[start] == '0') {
return 0;
}
long result = 0;
boolean negative = false;
int i = start;
long limit;
if ( chars[i] == '-' ){
negative = true;
limit = Long.MIN_VALUE;
i++;
}else{
// Short Circuit: If we are only processing one char,
// and it wasn't a '-', just return that digit instead
// of doing the negative junk
if (length == 1) {
int digit = chars[i] - '0';
if ( digit < 0 || digit > 9 ){
throw new NumberFormatException(new String(chars,start,length));
}else{
return digit;
}
}
limit = -Long.MAX_VALUE;
}
int max = start + length;
if ( i < max ){
int digit = chars[i++] - '0';
if ( digit < 0 || digit > 9 ){
throw new NumberFormatException(new String(chars,start,length));
}else{
result = -digit;
}
}
long multmin = limit / 10;
while ( i < max ){
// Accumulating negatively avoids surprises near MAX_VALUE
int digit = chars[i++] - '0';
if ( digit < 0 || digit > 9 ){
throw new NumberFormatException(new String(chars,start,length));
}
if ( result < multmin ){
throw new NumberFormatException(new String(chars,start,length));
}
result *= 10;
if ( result < limit + digit ){
throw new NumberFormatException(new String(chars,start,length));
}
result -= digit;
}
if ( negative ){
if ( i > start+1 ){
return result;
}else{ /* Only got "-" */
throw new NumberFormatException(new String(chars,start,length));
}
}else{
return -result;
}
}else{
throw new NumberFormatException(new String(chars,start,length));
}
}
// This one causes lots of "Query Information" calls to the filesystem
/*
private long getNumberFromStreamOld(InputStream dbis, char parseChar) throws IOException {
int length = 0;
//place a mark
dbis.mark(Integer.MAX_VALUE);
int tempByte = dbis.read();
while ((tempByte != parseChar) && (tempByte >= 0)) {
tempByte = dbis.read();
length++;
}
//are we at the end of the stream?
if (tempByte < 0) {
return -1;
}
//reset the mark
dbis.reset();
//get the length
byte[] tempArray = new byte[length];
int count = 0;
int len = 0;
//get the string
while (count != length && (len = dbis.read(tempArray, count, length - count)) > 0) {
count += len;
}
//jump ahead in the stream to compensate for the :
dbis.skip(1);
//return the value
CharBuffer cb = Constants.DEFAULT_CHARSET.decode(ByteBuffer.wrap(tempArray));
String str_value = new String(cb.array(),0,cb.limit());
return Long.parseLong(str_value);
}
*/
private byte[]
getByteArrayFromStream(
InputStream dbis )
throws IOException
{
int length = (int) getNumberFromStream(dbis, ':');
if (length < 0) {
return null;
}
// note that torrent hashes can be big (consider a 55GB file with 2MB pieces
// this generates a pieces hash of 1/2 meg
if ( length > MAX_BYTE_ARRAY_SIZE ){
throw( new IOException( "Byte array length too large (" + length + ")"));
}
byte[] tempArray = new byte[length];
getByteArrayFromStream(dbis, length, tempArray);
return tempArray;
}
private void getByteArrayFromStream(InputStream dbis, int length, byte[] targetArray) throws IOException {
int count = 0;
int len = 0;
//get the string
while (count != length && (len = dbis.read(targetArray, count, length - count)) > 0)
count += len;
if (count != length)
throw (new IOException("BDecoder::getByteArrayFromStream: truncated"));
}
public void
setRecoveryMode(
boolean r )
{
recovery_mode = r;
}
public static void
print(
Object obj )
{
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter( sw );
print( pw, obj );
pw.flush();
System.out.println( sw.toString());
}
public static void
print(
PrintWriter writer,
Object obj )
{
print( writer, obj, "", false );
}
private static void
print(
PrintWriter writer,
Object obj,
String indent,
boolean skip_indent )
{
String use_indent = skip_indent?"":indent;
if ( obj instanceof Long ){
writer.println( use_indent + obj );
}else if ( obj instanceof byte[]){
byte[] b = (byte[])obj;
if ( b.length==20 ){
writer.println( use_indent + " { "+ ByteFormatter.nicePrint( b )+ " }" );
}else if ( b.length < 64 ){
writer.println( new String(b) + " [" + ByteFormatter.encodeString( b ) + "]" );
}else{
writer.println( "[byte array length " + b.length );
}
}else if ( obj instanceof String ){
writer.println( use_indent + obj );
}else if ( obj instanceof List ){
List l = (List)obj;
writer.println( use_indent + "[" );
for (int i=0;i<l.size();i++){
writer.print( indent + " (" + i + ") " );
print( writer, l.get(i), indent + " ", true );
}
writer.println( indent + "]" );
}else{
Map m = (Map)obj;
Iterator it = m.keySet().iterator();
while( it.hasNext()){
String key = (String)it.next();
if ( key.length() > 256 ){
writer.print( indent + key.substring(0,256) + "... = " );
}else{
writer.print( indent + key + " = " );
}
print( writer, m.get(key), indent + " ", true );
}
}
}
/**
* Converts any byte[] entries into UTF-8 strings
* @param map
* @return
*/
public static Map
decodeStrings(
Map map )
{
if (map == null ){
return( null );
}
Iterator it = map.entrySet().iterator();
while( it.hasNext()){
Map.Entry entry = (Map.Entry)it.next();
Object value = entry.getValue();
if ( value instanceof byte[]){
try{
entry.setValue( new String((byte[])value,"UTF-8" ));
}catch( Throwable e ){
System.err.println(e);
}
}else if ( value instanceof Map ){
decodeStrings((Map)value );
}else if ( value instanceof List ){
decodeStrings((List)value );
}
}
return( map );
}
public static List
decodeStrings(
List list )
{
if ( list == null ){
return( null );
}
for (int i=0;i<list.size();i++){
Object value = list.get(i);
if ( value instanceof byte[]){
try{
String str = new String((byte[])value, "UTF-8" );
list.set( i, str );
}catch( Throwable e ){
System.err.println(e);
}
}else if ( value instanceof Map ){
decodeStrings((Map)value );
}else if ( value instanceof List ){
decodeStrings((List)value );
}
}
return( list );
}
private static void
print(
File f,
File output )
{
try{
BDecoder decoder = new BDecoder();
decoder.setRecoveryMode( false );
PrintWriter pw = new PrintWriter( new FileWriter( output ));
print( pw, decoder.decodeStream( new BufferedInputStream( new FileInputStream( f ))));
pw.flush();
}catch( Throwable e ){
e.printStackTrace();
}
}
/*
private interface
BDecoderInputStream
{
public int
read()
throws IOException;
public int
read(
byte[] buffer )
throws IOException;
public int
read(
byte[] buffer,
int offset,
int length )
throws IOException;
public int
available()
throws IOException;
public boolean
markSupported();
public void
mark(
int limit );
public void
reset()
throws IOException;
}
private class
BDecoderInputStreamStream
implements BDecoderInputStream
{
final private BufferedInputStream is;
private
BDecoderInputStreamStream(
BufferedInputStream _is )
{
is = _is;
}
public int
read()
throws IOException
{
return( is.read());
}
public int
read(
byte[] buffer )
throws IOException
{
return( is.read( buffer ));
}
public int
read(
byte[] buffer,
int offset,
int length )
throws IOException
{
return( is.read( buffer, offset, length ));
}
public int
available()
throws IOException
{
return( is.available());
}
public boolean
markSupported()
{
return( is.markSupported());
}
public void
mark(
int limit )
{
is.mark( limit );
}
public void
reset()
throws IOException
{
is.reset();
}
}
*/
private class
BDecoderInputStreamArray
extends InputStream
{
final private byte[] buffer;
final private int count;
private int pos;
private int mark;
private
BDecoderInputStreamArray(
byte[] _buffer )
{
buffer = _buffer;
count = buffer.length;
}
private
BDecoderInputStreamArray(
byte[] _buffer,
int _offset,
int _length )
{
buffer = _buffer;
pos = _offset;
count = Math.min( _offset + _length, _buffer.length );
mark = _offset;
}
public int
read()
throws IOException
{
return (pos < count) ? (buffer[pos++] & 0xff) : -1;
}
public int
read(
byte[] buffer )
throws IOException
{
return( read( buffer, 0, buffer.length ));
}
public int
read(
byte[] b,
int offset,
int length )
throws IOException
{
if ( pos >= count ){
return( -1 );
}
if ( pos + length > count ){
length = count - pos;
}
if (length <= 0){
return( 0 );
}
System.arraycopy(buffer, pos, b, offset, length);
pos += length;
return( length );
}
public int
available()
throws IOException
{
return( count - pos );
}
public boolean
markSupported()
{
return( true );
}
public void
mark(
int limit )
{
mark = pos;
}
public void
reset()
throws IOException
{
pos = mark;
}
}
public static void
main(
String[] args )
{
print( new File( "C:\\Temp\\tables.config" ),
new File( "C:\\Temp\\tables.txt" ));
}
}