HashTreeUtils.java example

Explorer
limewire5-ruby-master
package com.limegroup.gnutella.tigertree;

import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.limegroup.gnutella.security.MerkleTree;

public class HashTreeUtils {
    
    private static final Log LOG = LogFactory.getLog(HashTreeUtils.class);

    public static final long  KB                  = 1024;
    public static final long  MB                  = 1024 * KB;
    public static final int  BLOCK_SIZE           = 1024;
    public static final byte INTERNAL_HASH_PREFIX = 0x01;

    /*
     * Iterative method to generate the parent nodes of an arbitrary
     * depth.
     *
     * The 0th element of the returned List will always be a List of size
     * 1, containing a byte[] of the root hash.
     */
    public static List<List<byte[]>> createAllParentNodes(List<byte[]> nodes, MessageDigest messageDigest) {
        List<List<byte[]>> allNodes = new ArrayList<List<byte[]>>();
        allNodes.add(Collections.unmodifiableList(nodes));
        while (nodes.size() > 1) {
            nodes = HashTreeUtils.createParentGeneration(nodes, messageDigest);
            allNodes.add(0, nodes);
        }
        return allNodes;
    }

    /*
     * Create the parent generation of the Merkle HashTree for a given child
     * generation
     */
    public static List<byte[]> createParentGeneration(List<byte[]> nodes, MessageDigest md) {
        md.reset();
        int size = nodes.size();
        size = size % 2 == 0 ? size / 2 : (size + 1) / 2;
        List<byte[]> ret = new ArrayList<byte[]>(size);
        Iterator<byte[]> iter = nodes.iterator();
        while (iter.hasNext()) {
            byte[] left = iter.next();
            if (iter.hasNext()) {
                byte[] right = iter.next();
                md.reset();
                md.update(HashTreeUtils.INTERNAL_HASH_PREFIX);
                md.update(left, 0, left.length);
                md.update(right, 0, right.length);
                byte[] result = md.digest();
                ret.add(result);
            } else {
                ret.add(left);
            }
        }
        return ret;
    }

    /*
     * Create a generation of nodes. It is very important that nodeSize equals
     * 2^n (n>=10) or we will not get the expected generation of nodes of a
     * Merkle HashTree
     */
    public static List<byte[]> createTreeNodes(int nodeSize, long fileSize, InputStream is, MessageDigest messageDigest)
            throws IOException {
        List<byte[]> ret = new ArrayList<byte[]>((int) Math.ceil((double) fileSize / nodeSize));
        MessageDigest tt = new MerkleTree(messageDigest);
        byte[] block = new byte[HashTreeUtils.BLOCK_SIZE * 128];
        long offset = 0;
        int read = 0;
        while (offset < fileSize) {
            int nodeOffset = 0;
            long time = System.currentTimeMillis();
            // reset our TigerTree instance
            tt.reset();
            // hashing nodes independently
            while (nodeOffset < nodeSize && (read = is.read(block)) != -1) {
                tt.update(block, 0, read);
                // update offsets
                nodeOffset += read;
                offset += read;
                try {
                    long sleep = (System.currentTimeMillis() - time) * 2;
                    if(sleep > 0)
                        Thread.sleep(sleep);
                } catch (InterruptedException ie) {
                    throw new IOException("interrupted during hashing operation");
                }
                time = System.currentTimeMillis();
            }
            // node hashed, add the hash to our internal List.
            ret.add(tt.digest());
            
            // verify sanity of the hashing.
            if(offset == fileSize) {
                // if read isn't already -1, the next read MUST be -1.
                // it wouldn't already be -1 if the fileSize was a multiple
                // of BLOCK_SIZE * 128
                if(read != -1 && is.read() != -1) {
                    LOG.warn("More data than fileSize!");
                    throw new IOException("unknown file size.");
                }
            } else if(read == -1 && offset != fileSize) {
                if(LOG.isWarnEnabled()) {
                    LOG.warn("couldn't hash whole file. " +
                             "read: " + read + 
                           ", offset: " + offset +
                           ", fileSize: " + fileSize);
                }
                throw new IOException("couldn't hash whole file.");
            }
        }
        return ret;
    }

    /**
     * Calculates which depth we want to use for the HashTree. For small files
     * we can save a lot of memory by not creating such a large HashTree.
     * 
     * @param size
     *            the fileSize
     * @return int the ideal generation depth for the fileSize
     */    
    public static int calculateDepth(long size) {
        if (size < 256 * HashTreeUtils.KB) // 256KB chunk, 0b tree
            return 0;
        else if (size < 512 * HashTreeUtils.KB) // 256KB chunk, 24B tree
            return 1;
        else if (size < HashTreeUtils.MB)  // 256KB chunk, 72B tree
            return 2;
        else if (size < 2 * HashTreeUtils.MB) // 256KB chunk, 168B tree
            return 3;
        else if (size < 4 * HashTreeUtils.MB) // 256KB chunk, 360B tree
            return 4;
        else if (size < 8 * HashTreeUtils.MB) // 256KB chunk, 744B tree
            return 5;
        else if (size < 16 * HashTreeUtils.MB) // 256KB chunk, 1512B tree
            return 6;
        else if (size < 32 * HashTreeUtils.MB) // 256KB chunk, 3048B tree
            return 7;
        else if (size < 64 * HashTreeUtils.MB) // 256KB chunk, 6120B tree
            return 8;
        else if (size < 256 * HashTreeUtils.MB) // 512KB chunk, 12264B tree
            return 9;
        else if (size < 1024 * HashTreeUtils.MB) // 1MB chunk, 24552B tree 
            return 10;
        else if (size < 4096 * HashTreeUtils.MB) // 2MB chunks, 49128B tree 
            return 11; 
        else if (size < 64 * 1024 * HashTreeUtils.MB) 
            return 12; // 80kb tree
        else 
            return 13; // 160KB tree, 8k * 128MB chunks for 1TB file
    }

    /**
     *  Calculates a the node size based on the file size and the target depth.
     *  <p>
     *   A tree of depth n has 2^(n-1) leaf nodes, so ideally the file will be
     *   split in that many chunks.  However, since chunks have to be powers of 2,
     *   we make the size of each chunk the closest power of 2 that is bigger than
     *   the ideal size.
     *   <p>
     *   This ensures the resulting tree will have between 2^(n-2) and 2^(n-1) nodes.
     */
    public static int calculateNodeSize(long fileSize, int depth) {
        
        // don't create more than this many nodes
        long maxNodes = 1 << depth;        
        // calculate ideal node size, 
        long idealNodeSize = fileSize / maxNodes;
        // rounding up!
        if (fileSize % maxNodes != 0)
            idealNodeSize++;
        // calculate nodes size, node size must equal to 2^n, n in {10,11,...}
        int n = MerkleTree.log2Ceil(idealNodeSize);
        // 2^n
        int nodeSize = 1 << n;
        
        if (LOG.isDebugEnabled()) {
            LOG.debug("fileSize " + fileSize);
            LOG.debug("depth " + depth);
            LOG.debug("nodeSize " + nodeSize);
        }
    
        // this is just to make sure we have the right nodeSize for our depth
        // of choice
        assert nodeSize * maxNodes >= fileSize :
                    "nodeSize: " + nodeSize + 
                    ", fileSize: " + fileSize + 
                    ", maxNode: " + maxNodes;
        assert nodeSize * maxNodes <= fileSize * 2 :
                    "nodeSize: " + nodeSize + 
                    ", fileSize: " + fileSize + 
                    ", maxNode: " + maxNodes;
    
        return nodeSize;
    }
}