package com.limegroup.gnutella.tigertree; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.bitzi.util.Base32; import com.limegroup.gnutella.Assert; import com.limegroup.gnutella.FileDesc; import com.limegroup.gnutella.URN; import com.limegroup.gnutella.downloader.Interval; import com.limegroup.gnutella.http.HTTPConstants; import com.limegroup.gnutella.http.HTTPHeaderValue; import com.limegroup.gnutella.security.Tiger; import com.limegroup.gnutella.security.TigerTree; /** * This class stores HashTrees and is capable of verifying a file it is also * used for storing them in a file. * * Be careful when modifying any non transient variables, as this * class serialized to disk. * * @author Gregorio Roper */ public class HashTree implements HTTPHeaderValue, Serializable { private static final long serialVersionUID = -5752974896215224469L; private static transient final Log LOG = LogFactory.getLog(HashTree.class); // some static constants private static transient final int KB = 1024; private static transient final int MB = 1024 * KB; static transient final int BLOCK_SIZE = 1024; private static transient final byte INTERNAL_HASH_PREFIX = 0x01; // constants written to the outputstream when serialized. /** * The lowest depth list of nodes. */ private final List /* of byte[] */ NODES; /** * The tigertree root hash. */ private final byte[] ROOT_HASH; /** * The size of the file this hash identifies. */ private final long FILE_SIZE; /* * The depth of this tree. */ private final int DEPTH; /** * The URI for this hash tree. */ private final String THEX_URI; /** * The tree writer. */ private transient HashTreeHandler _treeWriter; /** * The size of each node */ private transient int _nodeSize; /** * Constructs a new HashTree out of the given nodes, root, sha1 * and filesize. */ private HashTree(List allNodes, String sha1, long fileSize) { this(allNodes,sha1,fileSize,calculateNodeSize(fileSize,allNodes.size()-1)); } /** * Constructs a new HashTree out of the given nodes, root, sha1 * filesize and chunk size. */ private HashTree(List allNodes, String sha1, long fileSize, int nodeSize) { THEX_URI = HTTPConstants.URI_RES_N2X + sha1; NODES = (List)allNodes.get(allNodes.size()-1); FILE_SIZE = fileSize; ROOT_HASH = (byte[])((List)allNodes.get(0)).get(0); DEPTH = allNodes.size()-1; Assert.that(TigerTree.log2Ceil(NODES.size()) == DEPTH); Assert.that(NODES.size() * nodeSize >= fileSize); HashTreeNodeManager.instance().register(this, allNodes); _nodeSize = nodeSize; } /** * Creates a new HashTree for the given FileDesc. */ static HashTree createHashTree(FileDesc fd) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("creating hashtree for file " + fd); InputStream in = null; try { in = fd.createInputStream(); return createHashTree(fd.getFileSize(), in, fd.getSHA1Urn()); } finally { if(in != null) { try { in.close(); } catch(IOException ignored) {} } } } /** * Calculates a the node size based on the file size and the target depth. * * A tree of depth n has 2^(n-1) leaf nodes, so ideally the file will be * split in that many chunks. However, since chunks have to be powers of 2, * we make the size of each chunk the closest power of 2 that is bigger than * the ideal size. * * This ensures the resulting tree will have between 2^(n-2) and 2^(n-1) nodes. */ public static int calculateNodeSize(long fileSize, int depth) { // don't create more than this many nodes int maxNodes = 1 << depth; // calculate ideal node size, int idealNodeSize = (int) (fileSize) / maxNodes; // rounding up! if (fileSize % maxNodes != 0) idealNodeSize++; // calculate nodes size, node size must equal to 2^n, n in {10,11,...} int n = TigerTree.log2Ceil(idealNodeSize); // 2^n int nodeSize = 1 << n; if (LOG.isDebugEnabled()) { LOG.debug("fileSize " + fileSize); LOG.debug("depth " + depth); LOG.debug("nodeSize " + nodeSize); } // this is just to make sure we have the right nodeSize for our depth // of choice Assert.that(nodeSize * (long)maxNodes >= fileSize, "nodeSize: " + nodeSize + ", fileSize: " + fileSize + ", maxNode: " + maxNodes); Assert.that(nodeSize * (long)maxNodes <= fileSize * 2, "nodeSize: " + nodeSize + ", fileSize: " + fileSize + ", maxNode: " + maxNodes); return nodeSize; } /** * Creates a new HashTree for the given file size, input stream and SHA1. * * Exists as a hook for tests, to create a HashTree from a File * when no FileDesc exists. */ private static HashTree createHashTree(long fileSize, InputStream is, URN sha1) throws IOException { // do the actual hashing int nodeSize = calculateNodeSize(fileSize,calculateDepth(fileSize)); List nodes = createTTNodes(nodeSize, fileSize, is); // calculate the intermediary nodes to get the root hash & others. List allNodes = createAllParentNodes(nodes); return new HashTree(allNodes, sha1.toString(), fileSize, nodeSize); } /** * Reads a new HashTree from the network. It is expected that the * data is in DIME format, the first record being an XML description * of the tree's structure, and the second record being the * breadth-first tree. * * @param is * the <tt>InputStream</tt> to read from * @param sha1 * a <tt>String</tt> containing the sha1 URN for the same file * @param root32 * a <tt>String</tt> containing the Base32 encoded expected * root hash * @param fileSize * the long specifying the size of the File * @return HashTree if we successfully read from the network * @throws IOException if there was an error reading from the network * or if the data was corrupted or invalid in any way. */ public static HashTree createHashTree(InputStream is, String sha1, String root32, long fileSize) throws IOException { if(LOG.isTraceEnabled()) LOG.trace("reading " + sha1 + "." + root32 + " dime data."); return new HashTree(HashTreeHandler.read(is, fileSize, root32), sha1, fileSize); } /** * Checks whether the specific area of the file matches the hash tree. */ public boolean isCorrupt(Interval in, byte [] data) { return isCorrupt(in, data, data.length); } /** * Checks whether the specific area of the file matches the hash tree. */ public boolean isCorrupt(Interval in, byte[] data, int length) { Assert.that(in.high <= FILE_SIZE); // if the interval is not a fixed chunk, we cannot verify it. // (actually we can but its more complicated) if (in.low % _nodeSize == 0 && in.high - in.low +1 <= _nodeSize && (in.high == in.low+_nodeSize-1 || in.high == FILE_SIZE -1)) { TigerTree digest = new TigerTree(); digest.update(data, 0, length); byte [] hash = digest.digest(); byte [] treeHash = (byte [])NODES.get(in.low / _nodeSize); boolean ok = Arrays.equals(treeHash, hash); if (LOG.isDebugEnabled()) LOG.debug("interval "+in+" verified "+ok); return !ok; } return true; } /** * @return Thex URI for this HashTree * @see com.limegroup.gnutella.http.HTTPHeaderValue#httpStringValue() */ public String httpStringValue() { return THEX_URI + ";" + Base32.encode(ROOT_HASH); } /** * @return true if the DEPTH is ideal according to our own standards, else * we know that we have to rebuild the HashTree */ public boolean isGoodDepth() { return (DEPTH == calculateDepth(FILE_SIZE)); } /** * @return true if the DEPTH is ideal enough according to our own standards */ public boolean isDepthGoodEnough() { // for some ranges newDepth actually returns smaller values than oldDepth return DEPTH >= calculateDepth(FILE_SIZE) - 1; } /** * Determines if this tree is better than another. * * A tree is considered better if the other's depth is not 'good', * and this depth is good, or if both are not good then the depth * closer to 'good' is best. */ public boolean isBetterTree(HashTree other) { if(other == null) return true; else if(other.isGoodDepth()) return false; else if(this.isGoodDepth()) return true; else { int ideal = calculateDepth(FILE_SIZE); int diff1 = Math.abs(this.DEPTH - ideal); int diff2 = Math.abs(other.DEPTH - ideal); if(diff1 < diff2) return true; else return false; } } /** * @return long Returns the FILE_SIZE. */ public long getFileSize() { return FILE_SIZE; } /** * @return String Returns the Base32 encoded root hash */ public String getRootHash() { return Base32.encode(ROOT_HASH); } /** * @return String the THEX_URI. */ public String getThexURI() { return THEX_URI; } /** * @return int the DEPTH */ public int getDepth() { return DEPTH; } /** * @return List the NODES. */ public List getNodes() { return NODES; } public synchronized int getNodeSize() { if (_nodeSize == 0) { // we were deserialized _nodeSize = calculateNodeSize(FILE_SIZE,DEPTH); } return _nodeSize; } /** * @return The number of nodes in the full tree. */ public int getNodeCount() { // This works by calculating how many nodes // will be in the tree based on the number of nodes // at the last depth. The previous depth is always // going to have ceil(current/2) nodes. double last = NODES.size(); int count = (int)last; for(int i = DEPTH-1; i >= 0; i--) { last = Math.ceil(last / 2); count += (int)last; } return count; } /** * @return all nodes. */ public List getAllNodes() { return HashTreeNodeManager.instance().getAllNodes(this); } /** * Writes this HashTree to the specified OutputStream using DIME. */ public void write(OutputStream out) throws IOException { getTreeWriter().write(out); } /** * Determines the length of the tree's output. */ public int getOutputLength() { return getTreeWriter().getLength(); } /** * Determines the type of the output. */ public String getOutputType() { return getTreeWriter().getType(); } /** * Calculates which depth we want to use for the HashTree. For small files * we can save a lot of memory by not creating such a large HashTree * * @param size * the fileSize * @return int the ideal generation depth for the fileSize */ public static int calculateDepth(long size) { if (size < 256 * KB) // 256KB chunk, 0b tree return 0; else if (size < 512 * KB) // 256KB chunk, 24B tree return 1; else if (size < MB) // 256KB chunk, 72B tree return 2; else if (size < 2 * MB) // 256KB chunk, 168B tree return 3; else if (size < 4 * MB) // 256KB chunk, 360B tree return 4; else if (size < 8 * MB) // 256KB chunk, 744B tree return 5; else if (size < 16 * MB) // 256KB chunk, 1512B tree return 6; else if (size < 32 * MB) // 256KB chunk, 3048B tree return 7; else if (size < 64 * MB) // 256KB chunk, 6120B tree return 8; else if (size < 256 * MB) // 512KB chunk, 12264B tree return 9; else if (size < 1024 * MB) // 1MB chunk, 24552B tree return 10; else return 11; // 2MB chunks, 49128B tree } /** * Returns the TreeWriter, initializing it if necessary. * No volatile or locking is necessary, because it's not a huge * deal if we create two of these. */ private HashTreeHandler getTreeWriter() { if(_treeWriter == null) _treeWriter = new HashTreeHandler(this); return _treeWriter; } /* * Static helper methods */ /* * Iterative method to generate the parent nodes of an arbitrary * depth. * * The 0th element of the returned List will always be a List of size * 1, containing a byte[] of the root hash. */ static List createAllParentNodes(List nodes) { List allNodes = new ArrayList(); allNodes.add(Collections.unmodifiableList(nodes)); while (nodes.size() > 1) { nodes = createParentGeneration(nodes); allNodes.add(0, nodes); } return allNodes; } /* * Create the parent generation of the Merkle HashTree for a given child * generation */ static List createParentGeneration(List nodes) { MessageDigest md = new Tiger(); int size = nodes.size(); size = size % 2 == 0 ? size / 2 : (size + 1) / 2; List ret = new ArrayList(size); Iterator iter = nodes.iterator(); while (iter.hasNext()) { byte[] left = (byte[]) iter.next(); if (iter.hasNext()) { byte[] right = (byte[]) iter.next(); md.reset(); md.update(INTERNAL_HASH_PREFIX); md.update(left, 0, left.length); md.update(right, 0, right.length); byte[] result = md.digest(); ret.add(result); } else { ret.add(left); } } return ret; } /* * Create a generation of nodes. It is very important that nodeSize equals * 2^n (n>=10) or we will not get the expected generation of nodes of a * Merkle HashTree */ private static List createTTNodes(int nodeSize, long fileSize, InputStream is) throws IOException { List ret = new ArrayList((int)Math.ceil((double)fileSize/nodeSize)); MessageDigest tt = new TigerTree(); byte[] block = new byte[BLOCK_SIZE * 128]; long offset = 0; int read = 0; while (offset < fileSize) { int nodeOffset = 0; long time = System.currentTimeMillis(); // reset our TigerTree instance tt.reset(); // hashing nodes independently while (nodeOffset < nodeSize && (read = is.read(block)) != -1) { tt.update(block, 0, read); // update offsets nodeOffset += read; offset += read; try { long sleep = (System.currentTimeMillis() - time) * 2; if(sleep > 0) Thread.sleep(sleep); } catch (InterruptedException ie) { throw new IOException("interrupted during hashing operation"); } time = System.currentTimeMillis(); } // node hashed, add the hash to our internal List. ret.add(tt.digest()); // verify sanity of the hashing. if(offset == fileSize) { // if read isn't already -1, the next read MUST be -1. // it wouldn't already be -1 if the fileSize was a multiple // of BLOCK_SIZE * 128 if(read != -1 && is.read() != -1) { LOG.warn("More data than fileSize!"); throw new IOException("unknown file size."); } } else if(read == -1 && offset != fileSize) { if(LOG.isWarnEnabled()) { LOG.warn("couldn't hash whole file. " + "read: " + read + ", offset: " + offset + ", fileSize: " + fileSize); } throw new IOException("couldn't hash whole file."); } } return ret; } }