HashTreeHandler.java example

Explorer
learning-bittorrent-master
package com.limegroup.gnutella.tigertree;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.bitzi.util.Base32;
import com.limegroup.gnutella.dime.DIMEGenerator;
import com.limegroup.gnutella.dime.DIMEParser;
import com.limegroup.gnutella.dime.DIMERecord;
import com.limegroup.gnutella.util.UUID;

/**
 * @author Gregorio Roper
 * 
 * Class handling all the reading and writing of HashTrees to the network
 */
class HashTreeHandler {
    private static final Log LOG = LogFactory.getLog(HashTreeHandler.class);
    
    private static final String OUTPUT_TYPE = "application/dime";

    private static final String SERIALIZED_TREE_TYPE =
        "http://open-content.net/spec/thex/breadthfirst";
    private static final String XML_TYPE = "text/xml";

    private static final byte[] TREE_TYPE_BYTES =
        getBytes(SERIALIZED_TREE_TYPE);
    private static final byte[] XML_TYPE_BYTES =
        getBytes(XML_TYPE);

    private static final String DIGEST =
        "http://open-content.net/spec/digest/tiger";    

    private static final String DTD_PUBLIC_ID =
        "-//NET//OPEN-CONTENT//THEX 02//EN";
    private static final String DTD_SYSTEM_ID =
        "http://open-content.net/spec/thex/thex.dtd";
    private static final String DTD_ENTITY =
        "<!ELEMENT hashtree (file,digest,serializedtree)>" +
        "<!ELEMENT file EMPTY>" +
        "<!ATTLIST file size CDATA #REQUIRED>" +
        "<!ATTLIST file segmentsize CDATA #REQUIRED>" +
        "<!ELEMENT digest EMPTY>" +
        "<!ATTLIST digest algorithm CDATA #REQUIRED>" +
        "<!ATTLIST digest outputsize CDATA #REQUIRED>" +
        "<!ELEMENT serializedtree EMPTY>" +
        "<!ATTLIST serializedtree depth CDATA #REQUIRED>"+
        "<!ATTLIST serializedtree type CDATA #REQUIRED>" +
        "<!ATTLIST serializedtree uri CDATA #REQUIRED>";

    private static final String SYSTEM_STRING = "SYSTEM";
    
    private static final String XML_TREE_DESC_START =
        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
        + "<!DOCTYPE hashtree " + SYSTEM_STRING + " \"" + DTD_SYSTEM_ID + "\">"
        + "<hashtree>";
    private static final String XML_TREE_DESC_END = "</hashtree>";

    private static int HASH_SIZE = 24;
    
    /**
     * Returns the bytes of a string in UTF-8 format, or in the default
     * format if UTF-8 failed for whatever reason.
     */
    private static byte[] getBytes(String string) {
        try {
            return string.getBytes("UTF-8");
        } catch(UnsupportedEncodingException uee) {
            LOG.debug(string, uee);
            return string.getBytes();
        }
    }
        
    
    
    /////////////////////////       WRITING        ///////////////////////

    /** 
     * The generator containing the DIME message to send.
     */
    private final DIMEGenerator GENERATOR;

    /**
     * Constructs a new handler for sending
     * @param tree
     *            the <tt>HashTree</tt> to construct this message from
     */
    public HashTreeHandler(HashTree tree) {
        LOG.trace("creating HashTreeHandler for sending");
        UUID uri = UUID.nextUUID();
        GENERATOR = new DIMEGenerator();
        GENERATOR.add(new XMLRecord(tree, uri));
        GENERATOR.add(new TreeRecord(tree, uri));
    }

    /**
     * method for writing a HashTree to an OutputStream
     * 
     * @param os
     *            the <tt>OutputStream</tt> to write to.
     * @throws IOException
     *             if there was a problem writing to os.
     */
    public void write(OutputStream os) throws IOException {
        GENERATOR.write(os);
    }
    
    /**
     * Determines the length of the written data.
     */
    public int getLength() {
        return GENERATOR.getLength();
    }

    /**
     * Determines the mime type of the output.
     */
    public String getType() {
        return OUTPUT_TYPE;
    }

    /**
     * A simple XML DIMERecord.
     */
    private static class XMLRecord extends DIMERecord {
        XMLRecord(HashTree tree, UUID uri) {
            super(DIMERecord.TYPE_MEDIA_TYPE, null, null,
                  XML_TYPE_BYTES, getXML(tree, uri));
        }
        
        /**
         * Constructs the XML bytes.
         */
        private static byte[] getXML(HashTree tree, UUID uri) {
            String xml =
                XML_TREE_DESC_START
                + "<file size='"
                + tree.getFileSize()
                + "' segmentsize='"
                + HashTree.BLOCK_SIZE
                + "'/>"
                + "<digest algorithm='"
                + DIGEST
                + "' outputsize='"
                + HASH_SIZE
                + "'/>"
                + "<serializedtree depth='"
                + tree.getDepth()
                + "' type='"
                + SERIALIZED_TREE_TYPE
                + "' uri='uuid:"
                + uri
                + "'/>"
                + XML_TREE_DESC_END;
            return getBytes(xml);
        }
    }
    
    /**
     * Private DIMERecord for a Tree.
     */
    private static class TreeRecord extends DIMERecord {
        /**
         * The tree of this record.
         */
        private final HashTree TREE;
        
        /**
         * The length of the tree.
         */
        private final int LENGTH;
        
        TreeRecord(HashTree tree, UUID uri) {
            super(DIMERecord.TYPE_ABSOLUTE_URI, null,
                  getBytes("uuid:" + uri),
                  TREE_TYPE_BYTES, null);
            TREE = tree;
            LENGTH = TREE.getNodeCount() * HASH_SIZE;
        }

        /**
         * Writes the tree's data to the specified output stream.
         */
        public void writeData(OutputStream out) throws IOException {
            for(Iterator i = TREE.getAllNodes().iterator(); i.hasNext(); ) {
                Iterator iter = ((List)i.next()).iterator();
                while (iter.hasNext())
                    out.write((byte[])iter.next());
            }
            writePadding(getDataLength(), out);
        }
    
        /**
         * Determines the length of the data.
         */
        public int getDataLength() {
            return LENGTH;
        }
    }
    


        //////////////////////    READING   /////////////////////


    /**
     * Reads a HashTree in DIME format from an input stream.
     * Returns the list of all nodes of the tree.
     * 
     * @param is
     *            the <tt>InputStream</tt> to read from
     * @param fileSize
     *            the size of the file we expect the hash tree for
     * @param root32
     *            Base32 encoded root hash
     * @return The list of all nodes in this tree.
     * @throws IOException
     *             in case of a problem reading from the InputStream
     */
    static List read(InputStream is, long fileSize, String root32)
      throws IOException {
        LOG.trace("creating HashTreeHandler from network");
        DIMEParser parser = new DIMEParser(is);
        DIMERecord xmlRecord = parser.nextRecord();
        DIMERecord treeRecord = parser.nextRecord();
        if(LOG.isDebugEnabled()) {
            LOG.debug("xml id: [" + xmlRecord.getIdentifier() + "]");
            LOG.debug("xml type: [" + xmlRecord.getTypeString() + "]");
            LOG.debug("tree id: [" + treeRecord.getIdentifier() + "]");
            LOG.debug("tree type: [" + treeRecord.getTypeString() + "]");
            LOG.debug("xml type num: [" + xmlRecord.getTypeId() + "]");
            LOG.debug("tree type num: [" + treeRecord.getTypeId() + "]");
        }

        while(parser.hasNext()) {
            if(LOG.isWarnEnabled())
                LOG.warn("more elements in the dime record.");
            parser.nextRecord(); // ignore them.
        }
                
        String xml = new String(xmlRecord.getData(), "UTF-8");
        byte[] hashTree = treeRecord.getData();
        
        XMLTreeDescription xtd = new XMLTreeDescription(xml);
        if (!xtd.isValid())
            throw new IOException(
                "invalid XMLTreeDescription " + xtd.toString());
        if (xtd.getFileSize() != fileSize)
            throw new IOException(
                "file size attribute was "
                    + xtd.getFileSize()
                    + " expected "
                    + fileSize);

        HashTreeDescription htr = new HashTreeDescription(hashTree);

        if (!Base32.encode(htr.getRoot()).equals(root32))
            throw new IOException("Root hashes do not match");

        return htr.getAllNodes(fileSize);
    }    

    /**
     * @author Gregorio Roper
     * 
     * private class holding the XML Tree description
     */
    private static class XMLTreeDescription {
        private static final int UNKNOWN = 0;
        private static final int VALID = 1;
        private static final int INVALID = 2;
        private int _parsed = UNKNOWN;
        private long _fileSize = 0;
        private int _blockSize = 0;
        private String _algorithm = null;
        private int _hashSize = 0;
        private String _serializationType = null;
        private String _uri;
        private String data;        

        protected XMLTreeDescription(String xml) {
            data = xml;
        }

        /*
         * Accessor for the _fileSize;
         */
        long getFileSize() {
            return _fileSize;
        }
        
        /**
         * Accessor for the _uri;
         */
        String getURI() {
            return _uri;
        }

        /**
         * Check if the xml tree description if the tree is what we expected
         */
        boolean isValid() {
            if (_parsed == UNKNOWN) {
                _parsed = parse() ? VALID : INVALID;
            }
            
            if(_parsed == INVALID) {
                return false;
            } else if (_blockSize != HashTree.BLOCK_SIZE) {
                if(LOG.isDebugEnabled())
                    LOG.debug("unexpected block size: " + _blockSize);
                return false;
            } else if (!DIGEST.equals(_algorithm)) {
                if(LOG.isDebugEnabled())
                    LOG.debug("unsupported digest algorithm: " + _algorithm);
                return false;
            } else if (_hashSize != HASH_SIZE) {
                if(LOG.isDebugEnabled())
                    LOG.debug("unexpected block size: " + _blockSize);
                return false;
            } else if (!SERIALIZED_TREE_TYPE.equals(_serializationType)) {
                if(LOG.isDebugEnabled())
                    LOG.debug("unexpected serialization type: " + 
                              _serializationType);
                return false;
            }
            return true;
        }

        /*
         * A simple parsing method for reading the xml tree description.
         */
        private boolean parse() {
            // hack!
            // Shareaza sends invalid XML,
            int offset = data.indexOf("system");
            if (offset > 0 && offset < data.indexOf(DTD_SYSTEM_ID)) {
                data = data.substring(0, offset) + 
                       SYSTEM_STRING +
                       data.substring(offset + "system".length());
            }
            
            if (LOG.isDebugEnabled())
                LOG.debug("XMLTreeDescription read: " + data);

            DOMParser parser = new DOMParser();
            InputSource is = new InputSource(new StringReader(data));
            parser.setEntityResolver(new Resolver());


            try {
                parser.parse(is);
            } catch (IOException ioe) {
                LOG.debug(ioe);
                return false;
            } catch (SAXException saxe) {
                LOG.debug(saxe);
                return false;
            }

            Document doc = parser.getDocument();
            Node treeDesc = doc.getElementsByTagName("hashtree").item(0);
            if (treeDesc == null) {
                if(LOG.isDebugEnabled())
                    LOG.debug("couldn't find hashtree element: " + data);
                return false;
            }

            NodeList nodes = treeDesc.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++) {
                Node node = nodes.item(i);
                if (node.getNodeType() == Node.ELEMENT_NODE) {
                    Element el = (Element) node;
                    if (el.getTagName().equals("file"))
                        parseFileElement(el);
                    else if (el.getTagName().equals("digest"))
                        parseDigestElement(el);
                    else if (el.getTagName().equals("serializedtree"))
                        parseSerializedtreeElement(el);
                }
            }
            return true;
        }

        private void parseFileElement(Element e) {
            try {
                _fileSize = Long.parseLong(e.getAttribute("size"));
            } catch (NumberFormatException nfe) {
                if(LOG.isDebugEnabled())
                    LOG.debug("couldn't parse file size: " + e.getNodeValue(), 
                              nfe);
            }

            try {
                _blockSize = Integer.parseInt(e.getAttribute("segmentsize"));
            } catch (NumberFormatException nfe) {
                if(LOG.isDebugEnabled())
                    LOG.debug("couldn't parse block size: " + e.getNodeValue(),
                              nfe);
            }
        }

        private void parseDigestElement(Element e) {
            _algorithm = e.getAttribute("algorithm");
            try {
                _hashSize = Integer.parseInt(e.getAttribute("outputsize"));
            } catch (NumberFormatException nfe) {
                if(LOG.isDebugEnabled())
                    LOG.debug("couldn't parse hash size: " + e.getNodeValue(),
                              nfe);
            }
        }

        private void parseSerializedtreeElement(Element e) {
            _serializationType = e.getAttribute("type");
            _uri = e.getAttribute("uri");
            try {
                // value is ignored, but if it can't be parsed we should add
                // a notice to the Log
                Integer.parseInt(e.getAttribute("depth"));
            } catch (NumberFormatException nfe) {
                if(LOG.isDebugEnabled())
                    LOG.debug("couldn't parse depth: " + e.getNodeValue(),
                              nfe);
            }

        }
    }
    
    /**
     * A custom EntityResolver so we don't hit a website for resolving.
     */
    private static final class Resolver implements EntityResolver {
        public Resolver() {}

        public InputSource resolveEntity(String publicId, String systemId)
                throws SAXException, IOException {
            if (systemId.equals(DTD_SYSTEM_ID)) {
                InputSource is = new InputSource(new StringReader(DTD_ENTITY));
                is.setPublicId(DTD_PUBLIC_ID);//optional
                is.setSystemId(DTD_SYSTEM_ID);//required
                return is;
            }
            //the parser will open a regular URI connection to the systemId
            //if we return null. Here we don't want this to occur...
            if (publicId == null)
                throw new SAXException("Can't resolve SYSTEM entity at '" +
                                       systemId + "'");
            else
                throw new SAXException("Can't resolve PUBLIC entity '" +
                                       publicId + "' at '" +
                                       systemId + "'");
        }
    }
    
    /**
     * @author Gregorio Roper
     * 
     * private class holding serialized HashTree
     */
    private static class HashTreeDescription {
        private final byte[] DATA;
        
        protected HashTreeDescription(byte[] data) {
            DATA = data;
        }

        /*
         * Accessor for root hash.
         */
        byte[] getRoot() throws IOException {
            if (DATA.length < HASH_SIZE)
                throw new IOException("invalid data");
            byte[] ret = new byte[HASH_SIZE];
            System.arraycopy(DATA, 0, ret, 0, HASH_SIZE);
            return ret;
        }

        /*
         * Returns a List containing a generation for nodes from the hash tree
         * 
         * @throws IOException if the hashes did not match.
         */
        List getAllNodes(long fileSize) throws IOException {
            int depth = HashTree.calculateDepth(fileSize);
            List hashes = new ArrayList();
            byte[] data = DATA;

            if (data.length % HASH_SIZE != 0) {
                if (LOG.isDebugEnabled())
                    LOG.debug("illegal size of data field for HashTree");
                throw new IOException("corrupted hash tree detected");
            }

            // read the hashes from the data field
            for (int i = 0; i + HASH_SIZE <= data.length; i += HASH_SIZE) {
                byte[] hash = new byte[HASH_SIZE];
                System.arraycopy(data, i, hash, 0, HASH_SIZE);
                hashes.add(hash);
            }

            String root32 = Base32.encode(getRoot());
            // iterator of all hashes we read
            Iterator hashIterator = hashes.iterator();
            // the current generation we are working on
            List generation = new ArrayList(1);
            // stores the last verified generation
            List parent = null;
            // index of the generation we are working on.
            int genIndex = 0;
            // whether or not the current row is verified.
            boolean verified = false;
            
            List allNodes = new ArrayList(depth+1);
            
            // Iterate through the read elements and see if they match
            // what we calculate.
            // Only calculate when we've read enough of the current
            // generation that it may be a full generation.
            // Imagine the trees:
            //           A
            //        /     \
            //       B       C
            //      / \       \
            //     D  E        C
            //    /\  /\        \
            //   F G H I         C
            //              or
            //           A
            //        /     \
            //       B       C
            //      / \     / \
            //     D  E    F   G
            //    /\  /\  /\   /\
            //   I H J K L M  N O
            //
            // In both cases, we only have read the full child gen.
            // when we've read parent.size()*2 or parent.size()*2-1
            // child nodes.
            // If it didn't match on parent.size()*2, and
            // the child has greater than that, then the tree is
            // corrupt.
            
            while (genIndex <= depth && hashIterator.hasNext()) {
                verified = false;
                byte[] hash = (byte[]) hashIterator.next();
                generation.add(hash);
                if (parent == null) {
                    verified = true;
                    // add generation 0 containing the root hash
                    genIndex++;
                    parent = generation;
                    allNodes.add(generation);
                    generation = new ArrayList(2);
                } else if (generation.size() > parent.size() * 2) {
                    // the current generation is already too big => the hash
                    // tree is corrupted, abort at once!
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("parent");
                        String str = "";
                        for (Iterator iter = parent.iterator(); iter.hasNext(); ) {
                            str = str + Base32.encode((byte[])iter.next()) + "; "; 
                        }
                        LOG.debug(str);
                        str = "";
                        LOG.debug("newparent");
                        List newparent = HashTree.createParentGeneration(generation);
                        for (Iterator iter = newparent.iterator(); iter.hasNext(); ) {
                            str = str + Base32.encode((byte[])iter.next()) + "; "; 
                        }
                        LOG.debug(str);
                        str = "";
                        LOG.debug("generation");
                        for (Iterator iter = generation.iterator(); iter.hasNext(); ) {
                            str = str + Base32.encode((byte[])iter.next()) + "; "; 
                        }
                        LOG.debug(str);
                        str = "";

                    }
                    throw new IOException("corrupted hash tree detected");
                } else if (generation.size() == parent.size() * 2 - 1 ||
                           generation.size() == parent.size() * 2) {
                    List calculatedParent =
                        HashTree.createParentGeneration(generation);
                    if(isMatching(parent, calculatedParent)) {
                        // the current generation is complete and verified!
                        genIndex++;
                        parent = generation;
                        allNodes.add(Collections.unmodifiableList(generation));
                        // only create room for a new generation if one exists
                        if(genIndex <= depth && hashIterator.hasNext())
                            generation = new ArrayList(parent.size() * 2);
                        verified = true;
                    }
                }
            } // end of while
            
            // If the current row was unable to verify, fail.
            // In mostly all cases, this will occur with the inner if
            // statement in the above loop.  However, if the last row
            // is the one that had the problem, the loop will not catch it.
            if(!verified)
                throw new IOException("corrupted hash tree detected");

            LOG.debug("Valid hash tree received.");
            return allNodes;
        }
        
        /**
         * Determines if two lists of byte arrays completely match.
         */
        private boolean isMatching(List a, List b) {
            if (a.size() == b.size()) {
                for (int i = 0; i < a.size(); i++) {
                    byte[] one = (byte[]) a.get(i);
                    byte[] two = (byte[]) b.get(i);
                    if(!Arrays.equals(one, two))
                        return false;
                }
                return true;
            }
            return false;
        }
    }

}