package com.limegroup.gnutella.tigertree.dime;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.limewire.util.Base32;
import org.limewire.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.limegroup.gnutella.dime.DIMEParser;
import com.limegroup.gnutella.dime.DIMERecord;
import com.limegroup.gnutella.security.Tiger;
import com.limegroup.gnutella.tigertree.HashTreeUtils;
public class TigerDimeReadUtils {
private static final Log LOG = LogFactory.getLog(TigerDimeReadUtils.class);
/**
* @author Gregorio Roper
*
* private class holding the XML Tree description
*/
private static class XMLTreeDescription {
private static final int UNKNOWN = 0;
private static final int VALID = 1;
private static final int INVALID = 2;
private int _parsed = UNKNOWN;
private long _fileSize = 0;
private int _blockSize = 0;
private String _algorithm = null;
private int _hashSize = 0;
private String _serializationType = null;
// private String _uri;
private String data;
protected XMLTreeDescription(String xml) {
data = xml;
}
/*
* Accessor for the _fileSize;
*/
long getFileSize() {
return _fileSize;
}
/**
* Check if the xml tree description if the tree is what we expected
*/
boolean isValid() {
if (_parsed == UNKNOWN) {
_parsed = parse() ? VALID : INVALID;
}
if(_parsed == INVALID) {
return false;
} else if (_blockSize != HashTreeUtils.BLOCK_SIZE) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected block size: " + _blockSize);
return false;
} else if (!TigerDimeUtils.DIGEST.equals(_algorithm)) {
if(LOG.isDebugEnabled())
LOG.debug("unsupported digest algorithm: " + _algorithm);
return false;
} else if (_hashSize != TigerDimeUtils.HASH_SIZE) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected block size: " + _blockSize);
return false;
} else if (!TigerDimeUtils.SERIALIZED_TREE_TYPE.equals(_serializationType)) {
if(LOG.isDebugEnabled())
LOG.debug("unexpected serialization type: " +
_serializationType);
return false;
}
return true;
}
/*
* A simple parsing method for reading the xml tree description.
*/
private boolean parse() {
// hack!
// Shareaza sends invalid XML,
int offset = data.indexOf("system");
if (offset > 0 && offset < data.indexOf(TigerDimeUtils.DTD_SYSTEM_ID)) {
data = data.substring(0, offset) +
TigerDimeUtils.SYSTEM_STRING +
data.substring(offset + "system".length());
}
if (LOG.isDebugEnabled())
LOG.debug("XMLTreeDescription read: " + data);
Document doc = null;
try {
doc = XMLUtils.getDocument(data, new Resolver(), new XMLUtils.LogErrorHandler(LOG));
} catch (IOException ioe) {
LOG.debug(ioe);
return false;
}
Node treeDesc = doc.getElementsByTagName("hashtree").item(0);
if (treeDesc == null) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't find hashtree element: " + data);
return false;
}
NodeList nodes = treeDesc.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element el = (Element) node;
if (el.getTagName().equals("file"))
parseFileElement(el);
else if (el.getTagName().equals("digest"))
parseDigestElement(el);
else if (el.getTagName().equals("serializedtree"))
parseSerializedtreeElement(el);
}
}
return true;
}
private void parseFileElement(Element e) {
try {
_fileSize = Long.parseLong(e.getAttribute("size"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse file size: " + e.getNodeValue(),
nfe);
}
try {
_blockSize = Integer.parseInt(e.getAttribute("segmentsize"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse block size: " + e.getNodeValue(),
nfe);
}
}
private void parseDigestElement(Element e) {
_algorithm = e.getAttribute("algorithm");
try {
_hashSize = Integer.parseInt(e.getAttribute("outputsize"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse hash size: " + e.getNodeValue(),
nfe);
}
}
private void parseSerializedtreeElement(Element e) {
_serializationType = e.getAttribute("type");
// _uri = e.getAttribute("uri");
try {
// value is ignored, but if it can't be parsed we should add
// a notice to the Log
Integer.parseInt(e.getAttribute("depth"));
} catch (NumberFormatException nfe) {
if(LOG.isDebugEnabled())
LOG.debug("couldn't parse depth: " + e.getNodeValue(),
nfe);
}
}
}
/**
* A custom EntityResolver so we don't hit a website for resolving.
*/
private static final class Resolver implements EntityResolver {
public Resolver() {}
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
if (systemId.equals(TigerDimeUtils.DTD_SYSTEM_ID)) {
InputSource is = new InputSource(new StringReader(TigerDimeUtils.DTD_ENTITY));
is.setPublicId(TigerDimeUtils.DTD_PUBLIC_ID);//optional
is.setSystemId(TigerDimeUtils.DTD_SYSTEM_ID);//required
return is;
}
//the parser will open a regular URI connection to the systemId
//if we return null. Here we don't want this to occur...
if (publicId == null)
throw new SAXException("Can't resolve SYSTEM entity at '" +
systemId + "'");
else
throw new SAXException("Can't resolve PUBLIC entity '" +
publicId + "' at '" +
systemId + "'");
}
}
/**
* @author Gregorio Roper
*
* private class holding serialized HashTree
*/
private static class HashTreeDescription {
private final byte[] DATA;
protected HashTreeDescription(byte[] data) {
DATA = data;
}
/*
* Accessor for root hash.
*/
byte[] getRoot() throws IOException {
if (DATA.length < TigerDimeUtils.HASH_SIZE)
throw new IOException("invalid data");
byte[] ret = new byte[TigerDimeUtils.HASH_SIZE];
System.arraycopy(DATA, 0, ret, 0, TigerDimeUtils.HASH_SIZE);
return ret;
}
/*
* Returns a List containing a generation for nodes from the hash tree
*
* @throws IOException if the hashes did not match.
*/
List<List<byte[]>> getAllNodes(long fileSize) throws IOException {
int depth = HashTreeUtils.calculateDepth(fileSize);
List<byte[]> hashes = new ArrayList<byte[]>();
byte[] data = DATA;
if (data.length % TigerDimeUtils.HASH_SIZE != 0) {
if (LOG.isDebugEnabled())
LOG.debug("illegal size of data field for HashTree");
throw new IOException("corrupted hash tree detected");
}
// read the hashes from the data field
for (int i = 0; i + TigerDimeUtils.HASH_SIZE <= data.length; i += TigerDimeUtils.HASH_SIZE) {
byte[] hash = new byte[TigerDimeUtils.HASH_SIZE];
System.arraycopy(data, i, hash, 0, TigerDimeUtils.HASH_SIZE);
hashes.add(hash);
}
// iterator of all hashes we read
Iterator<byte[]> hashIterator = hashes.iterator();
// the current generation we are working on
List<byte[]> generation = new ArrayList<byte[]>(1);
// stores the last verified generation
List<byte[]> parent = null;
// index of the generation we are working on.
int genIndex = 0;
// whether or not the current row is verified.
boolean verified = false;
List<List<byte[]>> allNodes = new ArrayList<List<byte[]>>(depth+1);
// Iterate through the read elements and see if they match
// what we calculate.
// Only calculate when we've read enough of the current
// generation that it may be a full generation.
// Imagine the trees:
// A
// / \
// B C
// / \ \
// D E C
// /\ /\ \
// F G H I C
// or
// A
// / \
// B C
// / \ / \
// D E F G
// /\ /\ /\ /\
// I H J K L M N O
//
// In both cases, we only have read the full child gen.
// when we've read parent.size()*2 or parent.size()*2-1
// child nodes.
// If it didn't match on parent.size()*2, and
// the child has greater than that, then the tree is
// corrupt.
while (genIndex <= depth && hashIterator.hasNext()) {
verified = false;
byte[] hash = hashIterator.next();
generation.add(hash);
if (parent == null) {
verified = true;
// add generation 0 containing the root hash
genIndex++;
parent = generation;
allNodes.add(generation);
generation = new ArrayList<byte[]>(2);
} else if (generation.size() > parent.size() * 2) {
// the current generation is already too big => the hash
// tree is corrupted, abort at once!
if (LOG.isDebugEnabled()) {
LOG.debug("parent");
String str = "";
for(byte[] b : parent)
str = str + Base32.encode(b) + "; ";
LOG.debug(str);
str = "";
LOG.debug("newparent");
List<byte[]> newparent = HashTreeUtils.createParentGeneration(generation, new Tiger());
for(byte[] b : newparent)
str = str + Base32.encode(b) + "; ";
LOG.debug(str);
str = "";
LOG.debug("generation");
for(byte[] b : generation)
str = str + Base32.encode(b) + "; ";
LOG.debug(str);
str = "";
}
throw new IOException("corrupted hash tree detected");
} else if (generation.size() == parent.size() * 2 - 1 ||
generation.size() == parent.size() * 2) {
List<byte[]> calculatedParent =
HashTreeUtils.createParentGeneration(generation, new Tiger());
if(isMatching(parent, calculatedParent)) {
// the current generation is complete and verified!
genIndex++;
parent = generation;
allNodes.add(Collections.unmodifiableList(generation));
// only create room for a new generation if one exists
if(genIndex <= depth && hashIterator.hasNext())
generation = new ArrayList<byte[]>(parent.size() * 2);
verified = true;
}
}
} // end of while
// If the current row was unable to verify, fail.
// In mostly all cases, this will occur with the inner if
// statement in the above loop. However, if the last row
// is the one that had the problem, the loop will not catch it.
if(!verified)
throw new IOException("corrupted hash tree detected");
LOG.debug("Valid hash tree received.");
return allNodes;
}
/**
* Determines if two lists of byte arrays completely match.
*/
private boolean isMatching(List<byte[]> a, List<byte[]> b) {
if (a.size() == b.size()) {
for (int i = 0; i < a.size(); i++) {
byte[] one = a.get(i);
byte[] two = b.get(i);
if(!Arrays.equals(one, two))
return false;
}
return true;
}
return false;
}
}
/**
* Reads a HashTree in DIME format from an input stream.
* Returns the list of all nodes of the tree.
*
* @param is
* the <tt>InputStream</tt> to read from
* @param fileSize
* the size of the file we expect the hash tree for
* @param root32
* Base32 encoded root hash
* @return The list of all nodes in this tree.
* @throws IOException
* in case of a problem reading from the InputStream
*/
public static List<List<byte[]>> read(InputStream is, long fileSize, String root32)
throws IOException {
LOG.trace("creating HashTreeHandler from network");
DIMEParser parser = new DIMEParser(is);
return nodesFromRecords(parser, fileSize, root32);
}
/**
* Returns a list of nodes from a list of dime records.
*
* @param xmlRecord
* @param treeRecord
* @param fileSize
* @param root32d
* @return
* @throws IOException
*/
static List<List<byte[]>> nodesFromRecords(Iterator<DIMERecord> iterator, long fileSize, String root32) throws IOException {
if(!iterator.hasNext())
throw new IOException("no xml record");
DIMERecord xmlRecord = iterator.next();
if(!iterator.hasNext())
throw new IOException("no tree record");
DIMERecord treeRecord = iterator.next();
if(LOG.isDebugEnabled()) {
LOG.debug("xml id: [" + xmlRecord.getIdentifier() + "]");
LOG.debug("xml type: [" + xmlRecord.getTypeString() + "]");
LOG.debug("tree id: [" + treeRecord.getIdentifier() + "]");
LOG.debug("tree type: [" + treeRecord.getTypeString() + "]");
LOG.debug("xml type num: [" + xmlRecord.getTypeId() + "]");
LOG.debug("tree type num: [" + treeRecord.getTypeId() + "]");
}
while(iterator.hasNext()) {
if(LOG.isWarnEnabled())
LOG.warn("more elements in the dime record.");
iterator.next(); // ignore them.
}
String xml = new String(xmlRecord.getData(), "UTF-8");
byte[] hashTree = treeRecord.getData();
XMLTreeDescription xtd = new XMLTreeDescription(xml);
if (!xtd.isValid())
throw new IOException(
"invalid XMLTreeDescription " + xtd.toString());
if (xtd.getFileSize() != fileSize)
throw new IOException(
"file size attribute was "
+ xtd.getFileSize()
+ " expected "
+ fileSize);
HashTreeDescription htr = new HashTreeDescription(hashTree);
if (!Base32.encode(htr.getRoot()).equals(root32))
throw new IOException("Root hashes do not match");
return htr.getAllNodes(fileSize);
}
}