HashTree.java example

Explorer
learning-bittorrent-master
package com.limegroup.gnutella.tigertree;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.bitzi.util.Base32;
import com.limegroup.gnutella.Assert;
import com.limegroup.gnutella.FileDesc;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.downloader.Interval;
import com.limegroup.gnutella.http.HTTPConstants;
import com.limegroup.gnutella.http.HTTPHeaderValue;
import com.limegroup.gnutella.security.Tiger;
import com.limegroup.gnutella.security.TigerTree;

/**
 * This class stores HashTrees and is capable of verifying a file it is also
 * used for storing them in a file.
 *
 * Be careful when modifying any non transient variables, as this
 * class serialized to disk.
 * 
 * @author Gregorio Roper
 */
public class HashTree implements HTTPHeaderValue, Serializable {
    
    private static final long serialVersionUID = -5752974896215224469L;    

    private static transient final Log LOG = LogFactory.getLog(HashTree.class);

    // some static constants
    private static transient final int  KB                   = 1024;
    private static transient final int  MB                   = 1024 * KB;
            static transient final int  BLOCK_SIZE           = 1024;
    private static transient final byte INTERNAL_HASH_PREFIX = 0x01;

    // constants written to the outputstream when serialized.
    
    /**
     * The lowest depth list of nodes.
     */
    private final List /* of byte[] */ NODES;
    
    /**
     * The tigertree root hash.
     */
    private final byte[] ROOT_HASH;
    
    /**
     * The size of the file this hash identifies.
     */
    private final long FILE_SIZE;
    
    /*
     * The depth of this tree.
     */
    private final int DEPTH;
    
    /**
     * The URI for this hash tree.
     */
    private final String THEX_URI;
    
    /**
     * The tree writer.
     */
    private transient HashTreeHandler _treeWriter;
    
    /**
     * The size of each node
     */
    private transient int _nodeSize;

    /**
     * Constructs a new HashTree out of the given nodes, root, sha1
     * and filesize.
     */
    private HashTree(List allNodes, String sha1, long fileSize) {
        this(allNodes,sha1,fileSize,calculateNodeSize(fileSize,allNodes.size()-1));
    }
    
    /**
     * Constructs a new HashTree out of the given nodes, root, sha1
     * filesize and chunk size.
     */
    private HashTree(List allNodes, String sha1, long fileSize, int nodeSize) {
        THEX_URI = HTTPConstants.URI_RES_N2X + sha1;
        NODES = (List)allNodes.get(allNodes.size()-1);
        FILE_SIZE = fileSize;
        ROOT_HASH = (byte[])((List)allNodes.get(0)).get(0);
        DEPTH = allNodes.size()-1;
        Assert.that(TigerTree.log2Ceil(NODES.size()) == DEPTH);
        Assert.that(NODES.size() * nodeSize >= fileSize);
        HashTreeNodeManager.instance().register(this, allNodes);
        _nodeSize = nodeSize;
    }
    
    /**
     * Creates a new HashTree for the given FileDesc.
     */
    static HashTree createHashTree(FileDesc fd) throws IOException {
        if (LOG.isDebugEnabled())
            LOG.debug("creating hashtree for file " + fd);
        InputStream in = null;
        try {
            in = fd.createInputStream();
            return createHashTree(fd.getFileSize(), in, fd.getSHA1Urn());
        } finally {
            if(in != null) {
                try {
                    in.close();
                } catch(IOException ignored) {}
            }
        }                
    }
    
    /**
     *  Calculates a the node size based on the file size and the target depth.
     *  
     *   A tree of depth n has 2^(n-1) leaf nodes, so ideally the file will be
     *   split in that many chunks.  However, since chunks have to be powers of 2,
     *   we make the size of each chunk the closest power of 2 that is bigger than
     *   the ideal size.
     *   
     *   This ensures the resulting tree will have between 2^(n-2) and 2^(n-1) nodes.
     */
    public static int calculateNodeSize(long fileSize, int depth) {
        
        // don't create more than this many nodes
        int maxNodes = 1 << depth;        
        // calculate ideal node size, 
        int idealNodeSize = (int) (fileSize) / maxNodes;
        // rounding up!
        if (fileSize % maxNodes != 0)
            idealNodeSize++;
        // calculate nodes size, node size must equal to 2^n, n in {10,11,...}
        int n = TigerTree.log2Ceil(idealNodeSize);
        // 2^n
        int nodeSize = 1 << n;
        
        if (LOG.isDebugEnabled()) {
            LOG.debug("fileSize " + fileSize);
            LOG.debug("depth " + depth);
            LOG.debug("nodeSize " + nodeSize);
        }

        // this is just to make sure we have the right nodeSize for our depth
        // of choice
        Assert.that(nodeSize * (long)maxNodes >= fileSize,
                    "nodeSize: " + nodeSize + 
                    ", fileSize: " + fileSize + 
                    ", maxNode: " + maxNodes);
        Assert.that(nodeSize * (long)maxNodes <= fileSize * 2,
                    "nodeSize: " + nodeSize + 
                    ", fileSize: " + fileSize + 
                    ", maxNode: " + maxNodes);
 
        return nodeSize;
    }
    
    /**
     * Creates a new HashTree for the given file size, input stream and SHA1.
     *
     * Exists as a hook for tests, to create a HashTree from a File
     * when no FileDesc exists.
     */
    private static HashTree createHashTree(long fileSize, InputStream is,
                                           URN sha1) throws IOException {
        // do the actual hashing
        int nodeSize = calculateNodeSize(fileSize,calculateDepth(fileSize));
        List nodes = createTTNodes(nodeSize, fileSize, is);
        
        // calculate the intermediary nodes to get the root hash & others.
        List allNodes = createAllParentNodes(nodes);
        return new HashTree(allNodes, sha1.toString(), fileSize, nodeSize);
    }

    /**
     * Reads a new HashTree from the network.  It is expected that the
     * data is in DIME format, the first record being an XML description
     * of the tree's structure, and the second record being the
     * breadth-first tree.
     * 
     * @param is
     *            the <tt>InputStream</tt> to read from
     * @param sha1
     *            a <tt>String</tt> containing the sha1 URN for the same file
     * @param root32
     *            a <tt>String</tt> containing the Base32 encoded expected
     *            root hash
     * @param fileSize
     *            the long specifying the size of the File
     * @return HashTree if we successfully read from the network
     * @throws IOException if there was an error reading from the network
     *         or if the data was corrupted or invalid in any way.
     */
    public static HashTree createHashTree(InputStream is, String sha1,
                                          String root32, long fileSize)
                                          throws IOException {
        if(LOG.isTraceEnabled())
            LOG.trace("reading " + sha1 + "." + root32 + " dime data.");
        return new HashTree(HashTreeHandler.read(is, fileSize, root32),
                            sha1, fileSize);
    }
    
    /**
     * Checks whether the specific area of the file matches the hash tree. 
     */
    public boolean isCorrupt(Interval in, byte [] data) {
        return isCorrupt(in, data, data.length);
    }
 
    /**
     * Checks whether the specific area of the file matches the hash tree.
     */
    public boolean isCorrupt(Interval in, byte[] data, int length) {
        Assert.that(in.high <= FILE_SIZE);
        
        // if the interval is not a fixed chunk, we cannot verify it.
        // (actually we can but its more complicated) 
        if (in.low % _nodeSize == 0 && 
                in.high - in.low +1 <= _nodeSize &&
                (in.high == in.low+_nodeSize-1 || in.high == FILE_SIZE -1)) {
            TigerTree digest = new TigerTree();
            digest.update(data, 0, length);
            byte [] hash = digest.digest();
            byte [] treeHash = (byte [])NODES.get(in.low / _nodeSize);
            boolean ok = Arrays.equals(treeHash, hash);
            if (LOG.isDebugEnabled())
                LOG.debug("interval "+in+" verified "+ok);
            return !ok;
        } 
        return true;
    }

    /**
     * @return Thex URI for this HashTree
     * @see com.limegroup.gnutella.http.HTTPHeaderValue#httpStringValue()
     */
    public String httpStringValue() {
        return THEX_URI + ";" + Base32.encode(ROOT_HASH);
    }

    /**
     * @return true if the DEPTH is ideal according to our own standards, else
     *         we know that we have to rebuild the HashTree
     */
    public boolean isGoodDepth() {
        return (DEPTH == calculateDepth(FILE_SIZE));
    }
    
    /**
     * @return true if the DEPTH is ideal enough according to our own standards
     */
    public boolean isDepthGoodEnough() {
        // for some ranges newDepth actually returns smaller values than oldDepth
        return DEPTH >= calculateDepth(FILE_SIZE) - 1;
    }
    
    /**
     * Determines if this tree is better than another.
     *
     * A tree is considered better if the other's depth is not 'good',
     * and this depth is good, or if both are not good then the depth
     * closer to 'good' is best.
     */
    public boolean isBetterTree(HashTree other) {
        if(other == null)
            return true;
        else if(other.isGoodDepth())
            return false;
        else if(this.isGoodDepth())
            return true;
        else {
            int ideal = calculateDepth(FILE_SIZE);
            int diff1 = Math.abs(this.DEPTH - ideal);
            int diff2 = Math.abs(other.DEPTH - ideal);
            if(diff1 < diff2)
                return true;
            else
                return false;
        }
    }

    /**
     * @return long Returns the FILE_SIZE.
     */
    public long getFileSize() {
        return FILE_SIZE;
    }

    /**
     * @return String Returns the Base32 encoded root hash
     */
    public String getRootHash() {
        return Base32.encode(ROOT_HASH);
    }

    /**
     * @return String the THEX_URI.
     */
    public String getThexURI() {
        return THEX_URI;
    }

    /**
     * @return int the DEPTH
     */
    public int getDepth() {
        return DEPTH;
    }

    /**
     * @return List the NODES.
     */
    public List getNodes() {
        return NODES;
    }
    
    public synchronized int getNodeSize() {
        if (_nodeSize == 0) {
            // we were deserialized
            _nodeSize = calculateNodeSize(FILE_SIZE,DEPTH);
        }
        return _nodeSize;
    }
    
    /**
     * @return The number of nodes in the full tree.
     */
    public int getNodeCount() {
        // This works by calculating how many nodes
        // will be in the tree based on the number of nodes
        // at the last depth.  The previous depth is always
        // going to have ceil(current/2) nodes.
        double last = NODES.size();
        int count = (int)last;
        for(int i = DEPTH-1; i >= 0; i--) {
            last = Math.ceil(last / 2);
            count += (int)last;
        }
        return count;
    }
    
    
    /**
     * @return all nodes.
     */
    public List getAllNodes() {
        return HashTreeNodeManager.instance().getAllNodes(this);
    }

    /**
     * Writes this HashTree to the specified OutputStream using DIME.
     */
    public void write(OutputStream out) throws IOException {
        getTreeWriter().write(out);
    }
    
    /**
     * Determines the length of the tree's output.
     */
    public int getOutputLength() {
        return getTreeWriter().getLength();
    }
    
    /**
     * Determines the type of the output.
     */
    public String getOutputType() {
        return getTreeWriter().getType();
    }

    /**
     * Calculates which depth we want to use for the HashTree. For small files
     * we can save a lot of memory by not creating such a large HashTree
     * 
     * @param size
     *            the fileSize
     * @return int the ideal generation depth for the fileSize
     */    
    public static int calculateDepth(long size) {
        if (size < 256 * KB) // 256KB chunk, 0b tree
            return 0;
        else if (size < 512 * KB) // 256KB chunk, 24B tree
            return 1;
        else if (size < MB)  // 256KB chunk, 72B tree
            return 2;
        else if (size < 2 * MB) // 256KB chunk, 168B tree
            return 3;
        else if (size < 4 * MB) // 256KB chunk, 360B tree
            return 4;
        else if (size < 8 * MB) // 256KB chunk, 744B tree
            return 5;
        else if (size < 16 * MB) // 256KB chunk, 1512B tree
            return 6;
        else if (size < 32 * MB) // 256KB chunk, 3048B tree
            return 7;
        else if (size < 64 * MB) // 256KB chunk, 6120B tree
            return 8;
        else if (size < 256 * MB) // 512KB chunk, 12264B tree
            return 9;
        else if (size < 1024 * MB) // 1MB chunk, 24552B tree 
            return 10;
        else
            return 11; // 2MB chunks, 49128B tree
    }
    
    /**
     * Returns the TreeWriter, initializing it if necessary.
     * No volatile or locking is necessary, because it's not a huge
     * deal if we create two of these.
     */
    private HashTreeHandler getTreeWriter() {
        if(_treeWriter == null)
            _treeWriter = new HashTreeHandler(this);
        return _treeWriter;
    }            

    /*
     * Static helper methods
     */

    /*
     * Iterative method to generate the parent nodes of an arbitrary
     * depth.
     *
     * The 0th element of the returned List will always be a List of size
     * 1, containing a byte[] of the root hash.
     */
    static List createAllParentNodes(List nodes) {
        List allNodes = new ArrayList();
        allNodes.add(Collections.unmodifiableList(nodes));
        while (nodes.size() > 1) {
            nodes = createParentGeneration(nodes);
            allNodes.add(0, nodes);
        }
        return allNodes;
    }
     
    /*
     * Create the parent generation of the Merkle HashTree for a given child
     * generation
     */
    static List createParentGeneration(List nodes) {
        MessageDigest md = new Tiger();
        int size = nodes.size();
        size = size % 2 == 0 ? size / 2 : (size + 1) / 2;
        List ret = new ArrayList(size);
        Iterator iter = nodes.iterator();
        while (iter.hasNext()) {
            byte[] left = (byte[]) iter.next();
            if (iter.hasNext()) {
                byte[] right = (byte[]) iter.next();
                md.reset();
                md.update(INTERNAL_HASH_PREFIX);
                md.update(left, 0, left.length);
                md.update(right, 0, right.length);
                byte[] result = md.digest();
                ret.add(result);
            } else {
                ret.add(left);
            }
        }
        return ret;
    }     

    /*
     * Create a generation of nodes. It is very important that nodeSize equals
     * 2^n (n>=10) or we will not get the expected generation of nodes of a
     * Merkle HashTree
     */
    private static List createTTNodes(int nodeSize, long fileSize,
                                      InputStream is) throws IOException {
        List ret = new ArrayList((int)Math.ceil((double)fileSize/nodeSize));
        MessageDigest tt = new TigerTree();
        byte[] block = new byte[BLOCK_SIZE * 128];
        long offset = 0;
        int read = 0;
        while (offset < fileSize) {
            int nodeOffset = 0;
            long time = System.currentTimeMillis();
            // reset our TigerTree instance
            tt.reset();
            // hashing nodes independently
            while (nodeOffset < nodeSize && (read = is.read(block)) != -1) {
                tt.update(block, 0, read);
                // update offsets
                nodeOffset += read;
                offset += read;
                try {
                    long sleep = (System.currentTimeMillis() - time) * 2;
                    if(sleep > 0)
                        Thread.sleep(sleep);
                } catch (InterruptedException ie) {
                    throw new IOException("interrupted during hashing operation");
                }
                time = System.currentTimeMillis();
            }
            // node hashed, add the hash to our internal List.
            ret.add(tt.digest());
            
            // verify sanity of the hashing.
            if(offset == fileSize) {
                // if read isn't already -1, the next read MUST be -1.
                // it wouldn't already be -1 if the fileSize was a multiple
                // of BLOCK_SIZE * 128
                if(read != -1 && is.read() != -1) {
                    LOG.warn("More data than fileSize!");
                    throw new IOException("unknown file size.");
                }
            } else if(read == -1 && offset != fileSize) {
                if(LOG.isWarnEnabled()) {
                    LOG.warn("couldn't hash whole file. " +
                             "read: " + read + 
                           ", offset: " + offset +
                           ", fileSize: " + fileSize);
                }
                throw new IOException("couldn't hash whole file.");
            }
        }
        return ret;
    }
}