package com.limegroup.gnutella.tigertree;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.limegroup.gnutella.security.MerkleTree;
public class HashTreeUtils {
private static final Log LOG = LogFactory.getLog(HashTreeUtils.class);
public static final long KB = 1024;
public static final long MB = 1024 * KB;
public static final int BLOCK_SIZE = 1024;
public static final byte INTERNAL_HASH_PREFIX = 0x01;
/*
* Iterative method to generate the parent nodes of an arbitrary
* depth.
*
* The 0th element of the returned List will always be a List of size
* 1, containing a byte[] of the root hash.
*/
public static List<List<byte[]>> createAllParentNodes(List<byte[]> nodes, MessageDigest messageDigest) {
List<List<byte[]>> allNodes = new ArrayList<List<byte[]>>();
allNodes.add(Collections.unmodifiableList(nodes));
while (nodes.size() > 1) {
nodes = HashTreeUtils.createParentGeneration(nodes, messageDigest);
allNodes.add(0, nodes);
}
return allNodes;
}
/*
* Create the parent generation of the Merkle HashTree for a given child
* generation
*/
public static List<byte[]> createParentGeneration(List<byte[]> nodes, MessageDigest md) {
md.reset();
int size = nodes.size();
size = size % 2 == 0 ? size / 2 : (size + 1) / 2;
List<byte[]> ret = new ArrayList<byte[]>(size);
Iterator<byte[]> iter = nodes.iterator();
while (iter.hasNext()) {
byte[] left = iter.next();
if (iter.hasNext()) {
byte[] right = iter.next();
md.reset();
md.update(HashTreeUtils.INTERNAL_HASH_PREFIX);
md.update(left, 0, left.length);
md.update(right, 0, right.length);
byte[] result = md.digest();
ret.add(result);
} else {
ret.add(left);
}
}
return ret;
}
/*
* Create a generation of nodes. It is very important that nodeSize equals
* 2^n (n>=10) or we will not get the expected generation of nodes of a
* Merkle HashTree
*/
public static List<byte[]> createTreeNodes(int nodeSize, long fileSize, InputStream is, MessageDigest messageDigest)
throws IOException {
List<byte[]> ret = new ArrayList<byte[]>((int) Math.ceil((double) fileSize / nodeSize));
MessageDigest tt = new MerkleTree(messageDigest);
byte[] block = new byte[HashTreeUtils.BLOCK_SIZE * 128];
long offset = 0;
int read = 0;
while (offset < fileSize) {
int nodeOffset = 0;
long time = System.currentTimeMillis();
// reset our TigerTree instance
tt.reset();
// hashing nodes independently
while (nodeOffset < nodeSize && (read = is.read(block)) != -1) {
tt.update(block, 0, read);
// update offsets
nodeOffset += read;
offset += read;
try {
long sleep = (System.currentTimeMillis() - time) * 2;
if(sleep > 0)
Thread.sleep(sleep);
} catch (InterruptedException ie) {
throw new IOException("interrupted during hashing operation");
}
time = System.currentTimeMillis();
}
// node hashed, add the hash to our internal List.
ret.add(tt.digest());
// verify sanity of the hashing.
if(offset == fileSize) {
// if read isn't already -1, the next read MUST be -1.
// it wouldn't already be -1 if the fileSize was a multiple
// of BLOCK_SIZE * 128
if(read != -1 && is.read() != -1) {
LOG.warn("More data than fileSize!");
throw new IOException("unknown file size.");
}
} else if(read == -1 && offset != fileSize) {
if(LOG.isWarnEnabled()) {
LOG.warn("couldn't hash whole file. " +
"read: " + read +
", offset: " + offset +
", fileSize: " + fileSize);
}
throw new IOException("couldn't hash whole file.");
}
}
return ret;
}
/**
* Calculates which depth we want to use for the HashTree. For small files
* we can save a lot of memory by not creating such a large HashTree.
*
* @param size
* the fileSize
* @return int the ideal generation depth for the fileSize
*/
public static int calculateDepth(long size) {
if (size < 256 * HashTreeUtils.KB) // 256KB chunk, 0b tree
return 0;
else if (size < 512 * HashTreeUtils.KB) // 256KB chunk, 24B tree
return 1;
else if (size < HashTreeUtils.MB) // 256KB chunk, 72B tree
return 2;
else if (size < 2 * HashTreeUtils.MB) // 256KB chunk, 168B tree
return 3;
else if (size < 4 * HashTreeUtils.MB) // 256KB chunk, 360B tree
return 4;
else if (size < 8 * HashTreeUtils.MB) // 256KB chunk, 744B tree
return 5;
else if (size < 16 * HashTreeUtils.MB) // 256KB chunk, 1512B tree
return 6;
else if (size < 32 * HashTreeUtils.MB) // 256KB chunk, 3048B tree
return 7;
else if (size < 64 * HashTreeUtils.MB) // 256KB chunk, 6120B tree
return 8;
else if (size < 256 * HashTreeUtils.MB) // 512KB chunk, 12264B tree
return 9;
else if (size < 1024 * HashTreeUtils.MB) // 1MB chunk, 24552B tree
return 10;
else if (size < 4096 * HashTreeUtils.MB) // 2MB chunks, 49128B tree
return 11;
else if (size < 64 * 1024 * HashTreeUtils.MB)
return 12; // 80kb tree
else
return 13; // 160KB tree, 8k * 128MB chunks for 1TB file
}
/**
* Calculates a the node size based on the file size and the target depth.
* <p>
* A tree of depth n has 2^(n-1) leaf nodes, so ideally the file will be
* split in that many chunks. However, since chunks have to be powers of 2,
* we make the size of each chunk the closest power of 2 that is bigger than
* the ideal size.
* <p>
* This ensures the resulting tree will have between 2^(n-2) and 2^(n-1) nodes.
*/
public static int calculateNodeSize(long fileSize, int depth) {
// don't create more than this many nodes
long maxNodes = 1 << depth;
// calculate ideal node size,
long idealNodeSize = fileSize / maxNodes;
// rounding up!
if (fileSize % maxNodes != 0)
idealNodeSize++;
// calculate nodes size, node size must equal to 2^n, n in {10,11,...}
int n = MerkleTree.log2Ceil(idealNodeSize);
// 2^n
int nodeSize = 1 << n;
if (LOG.isDebugEnabled()) {
LOG.debug("fileSize " + fileSize);
LOG.debug("depth " + depth);
LOG.debug("nodeSize " + nodeSize);
}
// this is just to make sure we have the right nodeSize for our depth
// of choice
assert nodeSize * maxNodes >= fileSize :
"nodeSize: " + nodeSize +
", fileSize: " + fileSize +
", maxNode: " + maxNodes;
assert nodeSize * maxNodes <= fileSize * 2 :
"nodeSize: " + nodeSize +
", fileSize: " + fileSize +
", maxNode: " + maxNodes;
return nodeSize;
}
}