package org.limewire.bittorrent.bencoding; import java.io.EOFException; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import org.limewire.util.BEncoder; /** * Provides common functionality for objects that represent pieces of bencoded data. * <p> * Reads bencoded data and parses it into objects that extend <code>Token</code>, * like <code>BEString</code> and <code>BEList</code>. * Use the factory method <code>Token.getNextToken(ReadableByteChannel)</code> * to get a parsed <code>Token</code> object. * <p> * BitTorrent uses a simple and extensible data format called bencoding. * More information about bencoding is on the Web at: * http://en.wikipedia.org/wiki/Bencoding and * http://www.bittorrent.org/beps/bep_0003.html#the-connectivity-is-as-follows. */ //TODO: Write the steps to parse bencoded data. public abstract class Token<T> { /** An undefined Token. */ protected static final int INTERNAL = -1; /** A number Token. */ public static final int LONG = 0; /** A string Token. */ public static final int STRING = 1; /** A list Token. */ public static final int LIST = 2; /** A dictionary Token. */ public static final int DICTIONARY = 3; /** A boolean Token */ public static final int BOOLEAN = 4; /** A rational Token */ public static final int RATIONAL = 5; /** The normal ACSII text encoding to use in bencoding for BitTorrent. */ protected static final String ASCII = "ISO-8859-1"; // When parsing ASCII characters, ZERO and NINE are used to see if a character //like '5' is between them, and thus a number protected static final byte ZERO, NINE; static { byte zero = 0; byte nine = 0; try { zero = "0".getBytes(ASCII)[0]; nine = "9".getBytes(ASCII)[0]; } catch (UnsupportedEncodingException impossible) { throw new RuntimeException(impossible); } ZERO = zero; NINE = nine; } /** The channel this Token reads bencoded data from. */ protected final ReadableByteChannel chan; /** The parsed Java object this Token made from the bencoded data it read. */ protected T result; /** * Makes a new object to represent a bencoded token to be read and parsed. * * @param chan the <code>ReadableByteChannel</code> this can read bencoded data from */ public Token(ReadableByteChannel chan) { this.chan = chan; } /** * Notification that this can read bencoded data from its channel. */ public abstract void handleRead() throws IOException; /** * Determines if this has read a complete bencoded sentence. * * @return true if we've read enough bencoded data to parse it into a complete object. * false if we're still waiting to read more bencoded data to finish our object. */ protected abstract boolean isDone(); /** * Finds out what kind of bencoded element this is. */ abstract public int getType(); /** * Gets the object we made from the bencoded data we read and parsed. * * @return the Object we parsed. * null if we haven't read enough bencoded data from our channel to make it yet. */ public T getResult() { if (!isDone()) return null; return result; } /** A Token that marks the end of a list of Token objects. */ static final EndElement TERMINATOR = new EndElement(); private static class EndElement extends Token<EndElement> { EndElement() { super(null); // No channel to read from result = this; // The object we parsed is this one } @Override public void handleRead() throws IOException {} @Override protected boolean isDone() { return true; // There is no data to parse } @Override public int getType() { return INTERNAL; } } /** * Reads the next bencoded object from the channel, returning a Token object that matches its type. * The Token this returns may be incomplete. * * Call handleRead() to finish parsing an incomplete Token object. * Use isDone() to determine if it's complete, and getResult() to get the parsed Token object. * * @param chan the ReadableByteChannel to read bencoded data from * @param charsetName the charset used for decoding dictionary keys * @return a possibly incomplete Token object, or null * @throws IOException if a read from the channel throws */ public static Token<?> getNextToken(ReadableByteChannel chan, String charsetName) throws IOException { // There's some bencoded data in the given chanel for us to read and parse. // It might be a string like "5:hello", or a list that starts "l", has other elements, and ends "e". // // First, it reads a single byte from the channel. // This is going to be a number like "5", or a letter that identifies a type like "l". // // Based on what letter it reads, it hands off control to a type specific constructor. // If it's a "d" for dictionary for instance, it gives the channel to the BEDictionary constructor. byte []b = new byte[1]; ByteBuffer one_byte = ByteBuffer.wrap(b); int read = chan.read(one_byte); if (read == 0) return null; // The channel gave us no data, so we have no parsed object to return if (read == -1) throw new EOFException("Could not read next Token"); if (b[0] == BEncoder.I) return new BELong(chan); if (b[0] == BEncoder.R) return new BERational(chan); else if (b[0] == BEncoder.D) return new BEDictionary(chan, charsetName); else if (b[0] == BEncoder.L) return new BEList(chan); else if (b[0] == BEncoder.E) return Token.TERMINATOR; else if (b[0] >= ZERO && b[0] <= NINE) return new BEString(b[0], chan); else if (b[0] == BEncoder.TRUE || b[0] == BEncoder.FALSE) return b[0] == BEncoder.TRUE ? BEBoolean.TRUE : BEBoolean.FALSE; else throw new IOException("unrecognized token type " + (char)b[0]); } /** * Reads the next bencoded object from the channel, returning a Token object that matches its type. * The Token this returns may be incomplete. * * The charset that is used for decoding dictionary keys is {@link #ASCII}. * * Call handleRead() to finish parsing an incomplete Token object. * Use isDone() to determine if it's complete, and getResult() to get the parsed Token object. * * @param chan the ReadableByteChannel to read bencoded data from * @return a possibly incomplete Token object, or null * @throws IOException if a read from the channel throws */ public static Token<?> getNextToken(ReadableByteChannel chan) throws IOException { return getNextToken(chan, ASCII); } /** * Parses bencoded data in a channel into an object that extends Token. * * @param data a channel with a complete bencoded object. * @param charsetName name of the charset used for decoding dictionary keys * @return an object that extends Token like BEList. * null if the byte array didn't contain a complete bencoded object. */ public static Object parse(ReadableByteChannel byteChannel, String charsetName) throws IOException { Token<?> t = getNextToken(byteChannel, charsetName); // Reads the first letter like "l" to see what's next if (t == null) return null; // The channel couldn't even give 1 byte t.handleRead(); // Tell t to read from its channel and parse the data it reads return t.getResult(); } /** * Parses bencoded data in a channel into an object that extends Token. * * @param data a channel with a complete bencoded object. * @return an object that extends Token like BEList. * null if the byte array didn't contain a complete bencoded object. */ public static Object parse(ReadableByteChannel byteChannel) throws IOException { return parse(byteChannel, ASCII); } }