package com.limegroup.gnutella.tigertree;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.bitzi.util.Base32;
import com.limegroup.gnutella.dime.DIMEGenerator;
import com.limegroup.gnutella.dime.DIMEParser;
import com.limegroup.gnutella.dime.DIMERecord;
import com.limegroup.gnutella.util.UUID;
/**
* @author Gregorio Roper
*
* Class handling all the reading and writing of HashTrees to the network
*/
class HashTreeHandler {
private static final Log LOG = LogFactory.getLog(HashTreeHandler.class);
private static final String OUTPUT_TYPE = "application/dime";
private static final String SERIALIZED_TREE_TYPE =
"http://open-content.net/spec/thex/breadthfirst";
private static final String XML_TYPE = "text/xml";
private static final byte[] TREE_TYPE_BYTES =
getBytes(SERIALIZED_TREE_TYPE);
private static final byte[] XML_TYPE_BYTES =
getBytes(XML_TYPE);
private static final String DIGEST =
"http://open-content.net/spec/digest/tiger";
private static final String DTD_PUBLIC_ID =
"-//NET//OPEN-CONTENT//THEX 02//EN";
private static final String DTD_SYSTEM_ID =
"http://open-content.net/spec/thex/thex.dtd";
private static final String DTD_ENTITY =
"<!ELEMENT hashtree (file,digest,serializedtree)>" +
"<!ELEMENT file EMPTY>" +
"<!ATTLIST file size CDATA #REQUIRED>" +
"<!ATTLIST file segmentsize CDATA #REQUIRED>" +
"<!ELEMENT digest EMPTY>" +
"<!ATTLIST digest algorithm CDATA #REQUIRED>" +
"<!ATTLIST digest outputsize CDATA #REQUIRED>" +
"<!ELEMENT serializedtree EMPTY>" +
"<!ATTLIST serializedtree depth CDATA #REQUIRED>"+
"<!ATTLIST serializedtree type CDATA #REQUIRED>" +
"<!ATTLIST serializedtree uri CDATA #REQUIRED>";
private static final String SYSTEM_STRING = "SYSTEM";
private static final String XML_TREE_DESC_START =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ "<!DOCTYPE hashtree " + SYSTEM_STRING + " \"" + DTD_SYSTEM_ID + "\">"
+ "<hashtree>";
private static final String XML_TREE_DESC_END = "</hashtree>";
private static int HASH_SIZE = 24;
/**
* Returns the bytes of a string in UTF-8 format, or in the default
* format if UTF-8 failed for whatever reason.
*/
private static byte[] getBytes(String string) {
try {
return string.getBytes("UTF-8");
} catch(UnsupportedEncodingException uee) {
LOG.debug(string, uee);
return string.getBytes();
}
}
///////////////////////// WRITING ///////////////////////
/**
* The generator containing the DIME message to send.
*/
private final DIMEGenerator GENERATOR;
/**
* Constructs a new handler for sending
* @param tree
* the <tt>HashTree</tt> to construct this message from
*/
public HashTreeHandler(HashTree tree) {
LOG.trace("creating HashTreeHandler for sending");
UUID uri = UUID.nextUUID();
GENERATOR = new DIMEGenerator();
GENERATOR.add(new XMLRecord(tree, uri));
GENERATOR.add(new TreeRecord(tree, uri));
}
/**
* method for writing a HashTree to an OutputStream
*
* @param os
* the <tt>OutputStream</tt> to write to.
* @throws IOException
* if there was a problem writing to os.
*/
public void write(OutputStream os) throws IOException {
GENERATOR.write(os);
}
/**
* Determines the length of the written data.
*/
public int getLength() {
return GENERATOR.getLength();
}
/**
* Determines the mime type of the output.
*/
public String getType() {
return OUTPUT_TYPE;
}
/**
* A simple XML DIMERecord.
*/
private static class XMLRecord extends DIMERecord {
XMLRecord(HashTree tree, UUID uri) {
super(DIMERecord.TYPE_MEDIA_TYPE, null, null,
XML_TYPE_BYTES, getXML(tree, uri));
}
/**
* Constructs the XML bytes.
*/
private static byte[] getXML(HashTree tree, UUID uri) {
String xml =
XML_TREE_DESC_START
+ "<file size='"
+ tree.getFileSize()
+ "' segmentsize='"
+ HashTree.BLOCK_SIZE
+ "'/>"
+ "<digest algorithm='"
+ DIGEST
+ "' outputsize='"
+ HASH_SIZE
+ "'/>"
+ "<serializedtree depth='"
+ tree.getDepth()
+ "' type='"
+ SERIALIZED_TREE_TYPE
+ "' uri='uuid:"
+ uri
+ "'/>"
+ XML_TREE_DESC_END;
return getBytes(xml);
}
}
/**
* Private DIMERecord for a Tree.
*/
private static class TreeRecord extends DIMERecord {
/**
* The tree of this record.
*/
private final HashTree TREE;
/**
* The length of the tree.
*/
private final int LENGTH;
TreeRecord(HashTree tree, UUID uri) {
super(DIMERecord.TYPE_ABSOLUTE_URI, null,
getBytes("uuid:" + uri),
TREE_TYPE_BYTES, null);
TREE = tree;
LENGTH = TREE.getNodeCount() * HASH_SIZE;
}
/**
* Writes the tree's data to the specified output stream.
*/
public void writeData(OutputStream out) throws IOException {
for(Iterator i = TREE.getAllNodes().iterator(); i.hasNext(); ) {
Iterator iter = ((List)i.next()).iterator();
while (iter.hasNext())
out.write((byte[])iter.next());
}
writePadding(getDataLength(), out);
}
/**
* Determines the length of the data.
*/
public int getDataLength() {
return LENGTH;
}
}
////////////////////// READING /////////////////////
/**
* Reads a HashTree in DIME format from an input stream.
* Returns the list of all nodes of the tree.
*
* @param is
* the <tt>InputStream</tt> to read from
* @param fileSize
* the size of the file we expect the hash tree for
* @param root32
* Base32 encoded root hash
* @return The list of all nodes in this tree.
* @throws IOException
* in case of a problem reading from the InputStream
*/
static List read(InputStream is, long fileSize, String root32)
throws IOException {
LOG.trace("creating HashTreeHandler from network");
DIMEParser parser = new DIMEParser(is);
DIMERecord xmlRecord = parser.nextRecord();
DIMERecord treeRecord = parser.nextRecord();
if(LOG.isDebugEnabled()) {
LOG.debug("xml id: [" + xmlRecord.getIdentifier() + "]");
LOG.debug("xml type: [" + xmlRecord.getTypeString() + "]");
LOG.debug("tree id: [" + treeRecord.getIdentifier() + "]");
LOG.debug("tree type: [" + treeRecord.getTypeString() + "]");
LOG.debug("xml type num: [" + xmlRecord.getTypeId() + "]");
LOG.debug("tree type num: [" + treeRecord.getTypeId() + "]");
}
while(parser.hasNext()) {
if(LOG.isWarnEnabled())
LOG.warn("more elements in the dime record.");
parser.nextRecord(); // ignore them.
}
String xml = new String(xmlRecord.getData(), "UTF-8");
byte[] hashTree = treeRecord.getData();
XMLTreeDescription xtd = new XMLTreeDescription(xml);
if (!xtd.isValid())
throw new IOException(
"invalid XMLTreeDescription " + xtd.toString());
if (xtd.getFileSize() != fileSize)
throw new IOException(
"file size attribute was "
+ xtd.getFileSize()
+ " expected "
+ fileSize);
HashTreeDescription htr = new HashTreeDescription(hashTree);
if (!Base32.encode(htr.getRoot()).equals(root32))
throw new IOException("Root hashes do not match");
return htr.getAllNodes(fileSize);
}
/**
* @author Gregorio Roper
*
* private class holding the XML Tree description
*/
private static class XMLTreeDescription {
private static final int UNKNOWN = 0;
private static final int VALID = 1;
private static final int INVALID = 2;
private int _parsed = UNKNOWN;
private long _fileSize = 0;
private int _blockSize = 0;
private String _algorithm = null;
private int _hashSize = 0;
private String _serializationType = null;
private String _uri;
private String data;
protected XMLTreeDescription(String xml) {
data = xml;
}
/*
* Accessor for the _fileSize;
*/
long getFileSize() {
return _fileSize;
}
/**
* Accessor for the _uri;
*/
String getURI() {
return _uri;
}
/**
* Check if the xml tree description if the tree is what we expected
*/
boolean isValid() {
if (_parsed == UNKNOWN) {
_parsed = parse() ? VALID : INVALID;
}
if(_parsed == INVALID) {
return false;
} else if (_blockSize != HashTree.BLOCK_SIZE) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected block size: " + _blockSize);
return false;
} else if (!DIGEST.equals(_algorithm)) {
if(LOG.isDebugEnabled())
LOG.debug("unsupported digest algorithm: " + _algorithm);
return false;
} else if (_hashSize != HASH_SIZE) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected block size: " + _blockSize);
return false;
} else if (!SERIALIZED_TREE_TYPE.equals(_serializationType)) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected serialization type: " +
_serializationType);
return false;
}
return true;
}
/*
* A simple parsing method for reading the xml tree description.
*/
private boolean parse() {
// hack!
// Shareaza sends invalid XML,
int offset = data.indexOf("system");
if (offset > 0 && offset < data.indexOf(DTD_SYSTEM_ID)) {
data = data.substring(0, offset) +
SYSTEM_STRING +
data.substring(offset + "system".length());
}
if (LOG.isDebugEnabled())
LOG.debug("XMLTreeDescription read: " + data);
DOMParser parser = new DOMParser();
InputSource is = new InputSource(new StringReader(data));
parser.setEntityResolver(new Resolver());
try {
parser.parse(is);
} catch (IOException ioe) {
LOG.debug(ioe);
return false;
} catch (SAXException saxe) {
LOG.debug(saxe);
return false;
}
Document doc = parser.getDocument();
Node treeDesc = doc.getElementsByTagName("hashtree").item(0);
if (treeDesc == null) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't find hashtree element: " + data);
return false;
}
NodeList nodes = treeDesc.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element el = (Element) node;
if (el.getTagName().equals("file"))
parseFileElement(el);
else if (el.getTagName().equals("digest"))
parseDigestElement(el);
else if (el.getTagName().equals("serializedtree"))
parseSerializedtreeElement(el);
}
}
return true;
}
private void parseFileElement(Element e) {
try {
_fileSize = Long.parseLong(e.getAttribute("size"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse file size: " + e.getNodeValue(),
nfe);
}
try {
_blockSize = Integer.parseInt(e.getAttribute("segmentsize"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse block size: " + e.getNodeValue(),
nfe);
}
}
private void parseDigestElement(Element e) {
_algorithm = e.getAttribute("algorithm");
try {
_hashSize = Integer.parseInt(e.getAttribute("outputsize"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse hash size: " + e.getNodeValue(),
nfe);
}
}
private void parseSerializedtreeElement(Element e) {
_serializationType = e.getAttribute("type");
_uri = e.getAttribute("uri");
try {
// value is ignored, but if it can't be parsed we should add
// a notice to the Log
Integer.parseInt(e.getAttribute("depth"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse depth: " + e.getNodeValue(),
nfe);
}
}
}
/**
* A custom EntityResolver so we don't hit a website for resolving.
*/
private static final class Resolver implements EntityResolver {
public Resolver() {}
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
if (systemId.equals(DTD_SYSTEM_ID)) {
InputSource is = new InputSource(new StringReader(DTD_ENTITY));
is.setPublicId(DTD_PUBLIC_ID);//optional
is.setSystemId(DTD_SYSTEM_ID);//required
return is;
}
//the parser will open a regular URI connection to the systemId
//if we return null. Here we don't want this to occur...
if (publicId == null)
throw new SAXException("Can't resolve SYSTEM entity at '" +
systemId + "'");
else
throw new SAXException("Can't resolve PUBLIC entity '" +
publicId + "' at '" +
systemId + "'");
}
}
/**
* @author Gregorio Roper
*
* private class holding serialized HashTree
*/
private static class HashTreeDescription {
private final byte[] DATA;
protected HashTreeDescription(byte[] data) {
DATA = data;
}
/*
* Accessor for root hash.
*/
byte[] getRoot() throws IOException {
if (DATA.length < HASH_SIZE)
throw new IOException("invalid data");
byte[] ret = new byte[HASH_SIZE];
System.arraycopy(DATA, 0, ret, 0, HASH_SIZE);
return ret;
}
/*
* Returns a List containing a generation for nodes from the hash tree
*
* @throws IOException if the hashes did not match.
*/
List getAllNodes(long fileSize) throws IOException {
int depth = HashTree.calculateDepth(fileSize);
List hashes = new ArrayList();
byte[] data = DATA;
if (data.length % HASH_SIZE != 0) {
if (LOG.isDebugEnabled())
LOG.debug("illegal size of data field for HashTree");
throw new IOException("corrupted hash tree detected");
}
// read the hashes from the data field
for (int i = 0; i + HASH_SIZE <= data.length; i += HASH_SIZE) {
byte[] hash = new byte[HASH_SIZE];
System.arraycopy(data, i, hash, 0, HASH_SIZE);
hashes.add(hash);
}
String root32 = Base32.encode(getRoot());
// iterator of all hashes we read
Iterator hashIterator = hashes.iterator();
// the current generation we are working on
List generation = new ArrayList(1);
// stores the last verified generation
List parent = null;
// index of the generation we are working on.
int genIndex = 0;
// whether or not the current row is verified.
boolean verified = false;
List allNodes = new ArrayList(depth+1);
// Iterate through the read elements and see if they match
// what we calculate.
// Only calculate when we've read enough of the current
// generation that it may be a full generation.
// Imagine the trees:
// A
// / \
// B C
// / \ \
// D E C
// /\ /\ \
// F G H I C
// or
// A
// / \
// B C
// / \ / \
// D E F G
// /\ /\ /\ /\
// I H J K L M N O
//
// In both cases, we only have read the full child gen.
// when we've read parent.size()*2 or parent.size()*2-1
// child nodes.
// If it didn't match on parent.size()*2, and
// the child has greater than that, then the tree is
// corrupt.
while (genIndex <= depth && hashIterator.hasNext()) {
verified = false;
byte[] hash = (byte[]) hashIterator.next();
generation.add(hash);
if (parent == null) {
verified = true;
// add generation 0 containing the root hash
genIndex++;
parent = generation;
allNodes.add(generation);
generation = new ArrayList(2);
} else if (generation.size() > parent.size() * 2) {
// the current generation is already too big => the hash
// tree is corrupted, abort at once!
if (LOG.isDebugEnabled()) {
LOG.debug("parent");
String str = "";
for (Iterator iter = parent.iterator(); iter.hasNext(); ) {
str = str + Base32.encode((byte[])iter.next()) + "; ";
}
LOG.debug(str);
str = "";
LOG.debug("newparent");
List newparent = HashTree.createParentGeneration(generation);
for (Iterator iter = newparent.iterator(); iter.hasNext(); ) {
str = str + Base32.encode((byte[])iter.next()) + "; ";
}
LOG.debug(str);
str = "";
LOG.debug("generation");
for (Iterator iter = generation.iterator(); iter.hasNext(); ) {
str = str + Base32.encode((byte[])iter.next()) + "; ";
}
LOG.debug(str);
str = "";
}
throw new IOException("corrupted hash tree detected");
} else if (generation.size() == parent.size() * 2 - 1 ||
generation.size() == parent.size() * 2) {
List calculatedParent =
HashTree.createParentGeneration(generation);
if(isMatching(parent, calculatedParent)) {
// the current generation is complete and verified!
genIndex++;
parent = generation;
allNodes.add(Collections.unmodifiableList(generation));
// only create room for a new generation if one exists
if(genIndex <= depth && hashIterator.hasNext())
generation = new ArrayList(parent.size() * 2);
verified = true;
}
}
} // end of while
// If the current row was unable to verify, fail.
// In mostly all cases, this will occur with the inner if
// statement in the above loop. However, if the last row
// is the one that had the problem, the loop will not catch it.
if(!verified)
throw new IOException("corrupted hash tree detected");
LOG.debug("Valid hash tree received.");
return allNodes;
}
/**
* Determines if two lists of byte arrays completely match.
*/
private boolean isMatching(List a, List b) {
if (a.size() == b.size()) {
for (int i = 0; i < a.size(); i++) {
byte[] one = (byte[]) a.get(i);
byte[] two = (byte[]) b.get(i);
if(!Arrays.equals(one, two))
return false;
}
return true;
}
return false;
}
}
}