package com.limegroup.gnutella.tigertree.dime; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.limewire.util.Base32; import org.limewire.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.limegroup.gnutella.dime.DIMEParser; import com.limegroup.gnutella.dime.DIMERecord; import com.limegroup.gnutella.security.Tiger; import com.limegroup.gnutella.tigertree.HashTreeUtils; public class TigerDimeReadUtils { private static final Log LOG = LogFactory.getLog(TigerDimeReadUtils.class); /** * @author Gregorio Roper * * private class holding the XML Tree description */ private static class XMLTreeDescription { private static final int UNKNOWN = 0; private static final int VALID = 1; private static final int INVALID = 2; private int _parsed = UNKNOWN; private long _fileSize = 0; private int _blockSize = 0; private String _algorithm = null; private int _hashSize = 0; private String _serializationType = null; // private String _uri; private String data; protected XMLTreeDescription(String xml) { data = xml; } /* * Accessor for the _fileSize; */ long getFileSize() { return _fileSize; } /** * Check if the xml tree description if the tree is what we expected */ boolean isValid() { if (_parsed == UNKNOWN) { _parsed = parse() ? VALID : INVALID; } if(_parsed == INVALID) { return false; } else if (_blockSize != HashTreeUtils.BLOCK_SIZE) { if(LOG.isDebugEnabled()) LOG.debug("unexpected block size: " + _blockSize); return false; } else if (!TigerDimeUtils.DIGEST.equals(_algorithm)) { if(LOG.isDebugEnabled()) LOG.debug("unsupported digest algorithm: " + _algorithm); return false; } else if (_hashSize != TigerDimeUtils.HASH_SIZE) { if(LOG.isDebugEnabled()) LOG.debug("unexpected block size: " + _blockSize); return false; } else if (!TigerDimeUtils.SERIALIZED_TREE_TYPE.equals(_serializationType)) { if(LOG.isDebugEnabled()) LOG.debug("unexpected serialization type: " + _serializationType); return false; } return true; } /* * A simple parsing method for reading the xml tree description. */ private boolean parse() { // hack! // Shareaza sends invalid XML, int offset = data.indexOf("system"); if (offset > 0 && offset < data.indexOf(TigerDimeUtils.DTD_SYSTEM_ID)) { data = data.substring(0, offset) + TigerDimeUtils.SYSTEM_STRING + data.substring(offset + "system".length()); } if (LOG.isDebugEnabled()) LOG.debug("XMLTreeDescription read: " + data); Document doc = null; try { doc = XMLUtils.getDocument(data, new Resolver(), new XMLUtils.LogErrorHandler(LOG)); } catch (IOException ioe) { LOG.debug(ioe); return false; } Node treeDesc = doc.getElementsByTagName("hashtree").item(0); if (treeDesc == null) { if(LOG.isDebugEnabled()) LOG.debug("couldn't find hashtree element: " + data); return false; } NodeList nodes = treeDesc.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); if (node.getNodeType() == Node.ELEMENT_NODE) { Element el = (Element) node; if (el.getTagName().equals("file")) parseFileElement(el); else if (el.getTagName().equals("digest")) parseDigestElement(el); else if (el.getTagName().equals("serializedtree")) parseSerializedtreeElement(el); } } return true; } private void parseFileElement(Element e) { try { _fileSize = Long.parseLong(e.getAttribute("size")); } catch (NumberFormatException nfe) { if(LOG.isDebugEnabled()) LOG.debug("couldn't parse file size: " + e.getNodeValue(), nfe); } try { _blockSize = Integer.parseInt(e.getAttribute("segmentsize")); } catch (NumberFormatException nfe) { if(LOG.isDebugEnabled()) LOG.debug("couldn't parse block size: " + e.getNodeValue(), nfe); } } private void parseDigestElement(Element e) { _algorithm = e.getAttribute("algorithm"); try { _hashSize = Integer.parseInt(e.getAttribute("outputsize")); } catch (NumberFormatException nfe) { if(LOG.isDebugEnabled()) LOG.debug("couldn't parse hash size: " + e.getNodeValue(), nfe); } } private void parseSerializedtreeElement(Element e) { _serializationType = e.getAttribute("type"); // _uri = e.getAttribute("uri"); try { // value is ignored, but if it can't be parsed we should add // a notice to the Log Integer.parseInt(e.getAttribute("depth")); } catch (NumberFormatException nfe) { if(LOG.isDebugEnabled()) LOG.debug("couldn't parse depth: " + e.getNodeValue(), nfe); } } } /** * A custom EntityResolver so we don't hit a website for resolving. */ private static final class Resolver implements EntityResolver { public Resolver() {} public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (systemId.equals(TigerDimeUtils.DTD_SYSTEM_ID)) { InputSource is = new InputSource(new StringReader(TigerDimeUtils.DTD_ENTITY)); is.setPublicId(TigerDimeUtils.DTD_PUBLIC_ID);//optional is.setSystemId(TigerDimeUtils.DTD_SYSTEM_ID);//required return is; } //the parser will open a regular URI connection to the systemId //if we return null. Here we don't want this to occur... if (publicId == null) throw new SAXException("Can't resolve SYSTEM entity at '" + systemId + "'"); else throw new SAXException("Can't resolve PUBLIC entity '" + publicId + "' at '" + systemId + "'"); } } /** * @author Gregorio Roper * * private class holding serialized HashTree */ private static class HashTreeDescription { private final byte[] DATA; protected HashTreeDescription(byte[] data) { DATA = data; } /* * Accessor for root hash. */ byte[] getRoot() throws IOException { if (DATA.length < TigerDimeUtils.HASH_SIZE) throw new IOException("invalid data"); byte[] ret = new byte[TigerDimeUtils.HASH_SIZE]; System.arraycopy(DATA, 0, ret, 0, TigerDimeUtils.HASH_SIZE); return ret; } /* * Returns a List containing a generation for nodes from the hash tree * * @throws IOException if the hashes did not match. */ List<List<byte[]>> getAllNodes(long fileSize) throws IOException { int depth = HashTreeUtils.calculateDepth(fileSize); List<byte[]> hashes = new ArrayList<byte[]>(); byte[] data = DATA; if (data.length % TigerDimeUtils.HASH_SIZE != 0) { if (LOG.isDebugEnabled()) LOG.debug("illegal size of data field for HashTree"); throw new IOException("corrupted hash tree detected"); } // read the hashes from the data field for (int i = 0; i + TigerDimeUtils.HASH_SIZE <= data.length; i += TigerDimeUtils.HASH_SIZE) { byte[] hash = new byte[TigerDimeUtils.HASH_SIZE]; System.arraycopy(data, i, hash, 0, TigerDimeUtils.HASH_SIZE); hashes.add(hash); } // iterator of all hashes we read Iterator<byte[]> hashIterator = hashes.iterator(); // the current generation we are working on List<byte[]> generation = new ArrayList<byte[]>(1); // stores the last verified generation List<byte[]> parent = null; // index of the generation we are working on. int genIndex = 0; // whether or not the current row is verified. boolean verified = false; List<List<byte[]>> allNodes = new ArrayList<List<byte[]>>(depth+1); // Iterate through the read elements and see if they match // what we calculate. // Only calculate when we've read enough of the current // generation that it may be a full generation. // Imagine the trees: // A // / \ // B C // / \ \ // D E C // /\ /\ \ // F G H I C // or // A // / \ // B C // / \ / \ // D E F G // /\ /\ /\ /\ // I H J K L M N O // // In both cases, we only have read the full child gen. // when we've read parent.size()*2 or parent.size()*2-1 // child nodes. // If it didn't match on parent.size()*2, and // the child has greater than that, then the tree is // corrupt. while (genIndex <= depth && hashIterator.hasNext()) { verified = false; byte[] hash = hashIterator.next(); generation.add(hash); if (parent == null) { verified = true; // add generation 0 containing the root hash genIndex++; parent = generation; allNodes.add(generation); generation = new ArrayList<byte[]>(2); } else if (generation.size() > parent.size() * 2) { // the current generation is already too big => the hash // tree is corrupted, abort at once! if (LOG.isDebugEnabled()) { LOG.debug("parent"); String str = ""; for(byte[] b : parent) str = str + Base32.encode(b) + "; "; LOG.debug(str); str = ""; LOG.debug("newparent"); List<byte[]> newparent = HashTreeUtils.createParentGeneration(generation, new Tiger()); for(byte[] b : newparent) str = str + Base32.encode(b) + "; "; LOG.debug(str); str = ""; LOG.debug("generation"); for(byte[] b : generation) str = str + Base32.encode(b) + "; "; LOG.debug(str); str = ""; } throw new IOException("corrupted hash tree detected"); } else if (generation.size() == parent.size() * 2 - 1 || generation.size() == parent.size() * 2) { List<byte[]> calculatedParent = HashTreeUtils.createParentGeneration(generation, new Tiger()); if(isMatching(parent, calculatedParent)) { // the current generation is complete and verified! genIndex++; parent = generation; allNodes.add(Collections.unmodifiableList(generation)); // only create room for a new generation if one exists if(genIndex <= depth && hashIterator.hasNext()) generation = new ArrayList<byte[]>(parent.size() * 2); verified = true; } } } // end of while // If the current row was unable to verify, fail. // In mostly all cases, this will occur with the inner if // statement in the above loop. However, if the last row // is the one that had the problem, the loop will not catch it. if(!verified) throw new IOException("corrupted hash tree detected"); LOG.debug("Valid hash tree received."); return allNodes; } /** * Determines if two lists of byte arrays completely match. */ private boolean isMatching(List<byte[]> a, List<byte[]> b) { if (a.size() == b.size()) { for (int i = 0; i < a.size(); i++) { byte[] one = a.get(i); byte[] two = b.get(i); if(!Arrays.equals(one, two)) return false; } return true; } return false; } } /** * Reads a HashTree in DIME format from an input stream. * Returns the list of all nodes of the tree. * * @param is * the <tt>InputStream</tt> to read from * @param fileSize * the size of the file we expect the hash tree for * @param root32 * Base32 encoded root hash * @return The list of all nodes in this tree. * @throws IOException * in case of a problem reading from the InputStream */ public static List<List<byte[]>> read(InputStream is, long fileSize, String root32) throws IOException { LOG.trace("creating HashTreeHandler from network"); DIMEParser parser = new DIMEParser(is); return nodesFromRecords(parser, fileSize, root32); } /** * Returns a list of nodes from a list of dime records. * * @param xmlRecord * @param treeRecord * @param fileSize * @param root32d * @return * @throws IOException */ static List<List<byte[]>> nodesFromRecords(Iterator<DIMERecord> iterator, long fileSize, String root32) throws IOException { if(!iterator.hasNext()) throw new IOException("no xml record"); DIMERecord xmlRecord = iterator.next(); if(!iterator.hasNext()) throw new IOException("no tree record"); DIMERecord treeRecord = iterator.next(); if(LOG.isDebugEnabled()) { LOG.debug("xml id: [" + xmlRecord.getIdentifier() + "]"); LOG.debug("xml type: [" + xmlRecord.getTypeString() + "]"); LOG.debug("tree id: [" + treeRecord.getIdentifier() + "]"); LOG.debug("tree type: [" + treeRecord.getTypeString() + "]"); LOG.debug("xml type num: [" + xmlRecord.getTypeId() + "]"); LOG.debug("tree type num: [" + treeRecord.getTypeId() + "]"); } while(iterator.hasNext()) { if(LOG.isWarnEnabled()) LOG.warn("more elements in the dime record."); iterator.next(); // ignore them. } String xml = new String(xmlRecord.getData(), "UTF-8"); byte[] hashTree = treeRecord.getData(); XMLTreeDescription xtd = new XMLTreeDescription(xml); if (!xtd.isValid()) throw new IOException( "invalid XMLTreeDescription " + xtd.toString()); if (xtd.getFileSize() != fileSize) throw new IOException( "file size attribute was " + xtd.getFileSize() + " expected " + fileSize); HashTreeDescription htr = new HashTreeDescription(hashTree); if (!Base32.encode(htr.getRoot()).equals(root32)) throw new IOException("Root hashes do not match"); return htr.getAllNodes(fileSize); } }