/* BDecoder - Converts an InputStream to BEValues.
Copyright (C) 2003 Mark J. Wielaard
This file is part of Snark.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package org.klomp.snark.bencode;
import java.io.EOFException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Decodes a bencoded stream to <code>BEValue</code>s.
*
* A bencoded byte stream can represent byte arrays, numbers, lists and
* maps (dictionaries).
*
* It currently contains a hack to indicate a name of a dictionary of
* which a SHA-1 digest hash should be calculated (the hash over the
* original bencoded bytes).
*
* @author Mark Wielaard (mark@klomp.org).
*/
public class BDecoder
{
// The InputStream to BDecode.
private final InputStream in;
// The last indicator read.
// Zero if unknown.
// '0'..'9' indicates a byte[].
// 'i' indicates an Number.
// 'l' indicates a List.
// 'd' indicates a Map.
// 'e' indicates end of Number, List or Map (only used internally).
// -1 indicates end of stream.
// Call getNextIndicator to get the current value (will never return zero).
private int indicator = 0;
// Used for ugly hack to get SHA hash over the metainfo info map
private static final String special_map = "info";
private boolean in_special_map = false;
/** creation deferred until we encounter the special map, to make processing of announce replies more efficient */
private MessageDigest sha_digest;
/**
* Ugly hack. Return the SHA has over bytes that make up the special map.
* @return null if there was no special map
*/
public byte[] get_special_map_digest()
{
if (sha_digest == null)
return null;
byte[] result = sha_digest.digest();
return result;
}
/****
// Ugly hack. Name defaults to "info".
public void set_special_map_name(String name)
{
special_map = name;
}
****/
/**
* Initalizes a new BDecoder. Nothing is read from the given
* <code>InputStream</code> yet.
*/
public BDecoder(InputStream in)
{
this.in = in;
}
/**
* Creates a new BDecoder and immediatly decodes the first value it
* sees.
*
* @return The first BEValue on the stream or null when the stream
* has ended.
*
* @throws InvalidBEncodingException when the stream doesn't start with a
* bencoded value or the stream isn't a bencoded stream at all.
* @throws IOException when somthing bad happens with the stream
* to read from.
*/
public static BEValue bdecode(InputStream in) throws IOException
{
return new BDecoder(in).bdecode();
}
/**
* Used for SHA1 hack
* @since 0.8.5
*/
private void createDigest() {
if (sha_digest == null) {
try {
sha_digest = MessageDigest.getInstance("SHA");
} catch(NoSuchAlgorithmException nsa) {
throw new InternalError(nsa.toString());
}
} else {
// there are two info maps, but not one inside the other,
// the resulting hash will be incorrect
// throw something? - no, the check in the MetaInfo constructor will catch it.
}
}
/**
* Returns what the next bencoded object will be on the stream or -1
* when the end of stream has been reached. Can return something
* unexpected (not '0' .. '9', 'i', 'l' or 'd') when the stream
* isn't bencoded.
*
* This might or might not read one extra byte from the stream.
*/
public int getNextIndicator() throws IOException
{
if (indicator == 0)
{
indicator = in.read();
// XXX - Used for ugly hack
if (in_special_map) sha_digest.update((byte)indicator);
}
return indicator;
}
/**
* Gets the next indicator and returns either null when the stream
* has ended or bdecodes the rest of the stream and returns the
* appropriate BEValue encoded object.
*/
public BEValue bdecode() throws IOException
{
indicator = getNextIndicator();
if (indicator == -1)
return null;
if (indicator >= '0' && indicator <= '9')
return bdecodeBytes();
else if (indicator == 'i')
return bdecodeNumber();
else if (indicator == 'l')
return bdecodeList();
else if (indicator == 'd')
return bdecodeMap();
else
throw new InvalidBEncodingException
("Unknown indicator '" + indicator + "'");
}
/**
* Returns the next bencoded value on the stream and makes sure it
* is a byte array. If it is not a bencoded byte array it will throw
* InvalidBEncodingException.
*/
public BEValue bdecodeBytes() throws IOException
{
int c = getNextIndicator();
int num = c - '0';
if (num < 0 || num > 9)
throw new InvalidBEncodingException("Number expected, not '"
+ (char)c + "'");
indicator = 0;
c = read();
int i = c - '0';
while (i >= 0 && i <= 9)
{
// XXX - This can overflow!
num = num*10 + i;
c = read();
i = c - '0';
}
if (c != ':')
throw new InvalidBEncodingException("Colon expected, not '"
+ (char)c + "'");
return new BEValue(read(num));
}
/**
* Returns the next bencoded value on the stream and makes sure it
* is a number. If it is not a number it will throw
* InvalidBEncodingException.
*/
public BEValue bdecodeNumber() throws IOException
{
int c = getNextIndicator();
if (c != 'i')
throw new InvalidBEncodingException("Expected 'i', not '"
+ (char)c + "'");
indicator = 0;
c = read();
if (c == '0')
{
c = read();
if (c == 'e')
return new BEValue(Integer.valueOf(0));
else
throw new InvalidBEncodingException("'e' expected after zero,"
+ " not '" + (char)c + "'");
}
// XXX - We don't support more the 255 char big integers
StringBuilder chars = new StringBuilder(16);
if (c == '-')
{
chars.append((char)c);
c = read();
}
if (c < '1' || c > '9')
throw new InvalidBEncodingException("Invalid Integer start '"
+ (char)c + "'");
chars.append((char)c);
c = read();
while(c >= '0' && c <= '9')
{
chars.append((char)c);
c = read();
}
if (c != 'e')
throw new InvalidBEncodingException("Integer should end with 'e'");
String s = chars.toString();
int len = s.length();
// save a little space if we're sure it will fit
Number num;
if (len < 10)
num = Integer.valueOf(s);
else if (len < 19)
num = Long.valueOf(s);
else if (len > 256)
throw new InvalidBEncodingException("Too many digits: " + len);
else
num = new BigInteger(s);
return new BEValue(num);
}
/**
* Returns the next bencoded value on the stream and makes sure it
* is a list. If it is not a list it will throw
* InvalidBEncodingException.
*/
public BEValue bdecodeList() throws IOException
{
int c = getNextIndicator();
if (c != 'l')
throw new InvalidBEncodingException("Expected 'l', not '"
+ (char)c + "'");
indicator = 0;
List<BEValue> result = new ArrayList<BEValue>();
c = getNextIndicator();
while (c != 'e')
{
result.add(bdecode());
c = getNextIndicator();
}
indicator = 0;
return new BEValue(result);
}
/**
* Returns the next bencoded value on the stream and makes sure it
* is a map (dictonary). If it is not a map it will throw
* InvalidBEncodingException.
*/
public BEValue bdecodeMap() throws IOException
{
int c = getNextIndicator();
if (c == '<')
throw new InvalidBEncodingException("Expected a .torrent metainfo file but found HTML? Check URL or file!");
else if (c != 'd')
throw new InvalidBEncodingException("Expected 'd', not '"
+ (char)c + "'");
indicator = 0;
Map<String, BEValue> result = new HashMap<String, BEValue>();
c = getNextIndicator();
while (c != 'e')
{
// Dictonary keys are always strings.
String key = bdecode().getString();
// XXX ugly hack
// This will not screw up if an info map contains an info map,
// but it will if there are two info maps (not one inside the other)
boolean special = (!in_special_map) && special_map.equals(key);
if (special) {
createDigest();
in_special_map = true;
}
BEValue value = bdecode();
result.put(key, value);
// XXX ugly hack continued
if (special)
in_special_map = false;
c = getNextIndicator();
}
indicator = 0;
return new BEValue(result);
}
/**
* Returns the next byte read from the InputStream (as int).
* Throws EOFException if InputStream.read() returned -1.
*/
private int read() throws IOException
{
int c = in.read();
if (c == -1)
throw new EOFException();
if (in_special_map) sha_digest.update((byte)c);
return c;
}
/**
* Returns a byte[] containing length valid bytes starting at offset
* zero. Throws EOFException if InputStream.read() returned -1
* before all requested bytes could be read. Note that the byte[]
* returned might be bigger then requested but will only contain
* length valid bytes. The returned byte[] will be reused when this
* method is called again.
*/
private byte[] read(int length) throws IOException
{
byte[] result = new byte[length];
int read = 0;
while (read < length)
{
int i = in.read(result, read, length - read);
if (i == -1)
throw new EOFException();
read += i;
}
if (in_special_map) sha_digest.update(result, 0, length);
return result;
}
/**
* prints out the decoded data
* @since 0.9.14
*/
public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage: BDecoder file.torrent");
System.exit(1);
}
try {
BEValue bev = bdecode(new FileInputStream(args[0]));
System.out.println(bev.toString());
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(1);
}
}
}