package com.limegroup.gnutella.downloader; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.limegroup.gnutella.Assert; import com.limegroup.gnutella.RouterService; import com.limegroup.gnutella.tigertree.HashTree; import com.limegroup.gnutella.util.ByteArrayCache; import com.limegroup.gnutella.util.FileUtils; import com.limegroup.gnutella.util.IntervalSet; import com.limegroup.gnutella.util.PowerOf2ByteArrayCache; import com.limegroup.gnutella.util.ProcessingQueue; /** * A control point for all access to the file being downloaded to, also does * on-the-fly verification. * * Every region of the file can be in one of five states, and can move from one * state to another only in the following order: * * 1. available for download * 2. currently being downloaded * 3. waiting to be written. * 4. written (and immediately into, if possible..) * 5. verified, or if it doesn't verify back to * 1. available for download * * In order to maintain these constraints, the only possible operations are: * Lease a block - find an area which is available for download and claim it * Write a block - report that the specified block has been read from the network. * Release a block - report that the specified block will not be downloaded. */ public class VerifyingFile { private static final Log LOG = LogFactory.getLog(VerifyingFile.class); /** * The thread that does the actual verification & writing */ private static final ProcessingQueue QUEUE = new ProcessingQueue("BlockingVF", true, // managed Thread.NORM_PRIORITY+1); // a little higher priority than normal /** * If the number of corrupted data gets over this, assume the file will not be recovered */ static final float MAX_CORRUPTION = 0.9f; /** The default chunk size - if we don't have a tree we request chunks this big. * * This is a power of two in order to minimize the number of small partial chunk * downloads that will be required after we learn the chunk size from the TigerTree, * since the chunk size will always be a power of two. */ static final int DEFAULT_CHUNK_SIZE = 131072; //128 KB = 128 * 1024 B = 131072 bytes /** * A cache for byte[]s. */ private static final ByteArrayCache CACHE = new ByteArrayCache(512, HTTPDownloader.BUF_LENGTH); static { RouterService.schedule(new CacheCleaner(), 10 * 60 * 1000, 10 * 60 * 1000); } /** a bunch of cached byte[]s for verifyable chunks */ private static final PowerOf2ByteArrayCache CHUNK_CACHE = new PowerOf2ByteArrayCache(); /** * The file we're writing to / reading from. */ private volatile RandomAccessFile fos; /** * Whether this file is open for writing */ private volatile boolean isOpen; /** * The eventual completed size of the file we're writing. */ private final int completedSize; /** * How much data did we lose due to corruption */ private int lostSize; /** * The VerifyingFile uses an IntervalSet to keep track of the blocks written * to disk and find out which blocks to check before writing to disk */ private final IntervalSet verifiedBlocks; /** * Ranges that are currently being written by the ManagedDownloader. * * Replaces the IntervalSet of needed ranges previously stored in the * ManagedDownloader but which could get out of sync with the verifiedBlocks * IntervalSet and is therefore replaced by a more failsafe implementation. */ private IntervalSet leasedBlocks; /** * Ranges that are currently written to disk, but do not form complete chunks * so cannot be verified by the HashTree. */ private IntervalSet partialBlocks; /** * Ranges that are discarded (but verification was attempted) */ private IntervalSet savedCorruptBlocks; /** * Ranges which are pending writing & verification. */ private IntervalSet pendingBlocks; /** * Decides which blocks to start downloading next. */ private SelectionStrategy blockChooser = null; /** * The hashtree we use to verify chunks, if any */ private HashTree hashTree; /** * The expected TigerTree root (null if we'll accept any). */ private String expectedHashRoot; /** * Whether someone is currently requesting the hash tree */ private boolean hashTreeRequested; /** * Whether we are actually verifying chunks */ private boolean discardBad = true; /** * The IOException, if any, we got while writing. */ private IOException storedException; /** * The size of the file on disk if we're going to scan for completed * blocks. Otherwise -1. */ private long existingFileSize = -1; /** * Constructs a new VerifyingFile, without a given completion size. * * Useful for tests. */ public VerifyingFile() { this(-1); } /** * Constructs a new VerifyingFile for the specified size. * If checkOverlap is true, will scan for overlap corruption. */ public VerifyingFile(int completedSize) { this.completedSize = completedSize; verifiedBlocks = new IntervalSet(); leasedBlocks = new IntervalSet(); pendingBlocks = new IntervalSet(); partialBlocks = new IntervalSet(); savedCorruptBlocks = new IntervalSet(); } /** * Opens this VerifyingFile for writing. * MUST be called before anything else. * * If there is no completion size, this fails. */ public void open(File file) throws IOException { if(completedSize == -1) throw new IllegalStateException("cannot open for unknown size."); // Ensure that the directory this file is in exists & is writeable. File parentFile = file.getParentFile(); if( parentFile != null ) { parentFile.mkdirs(); FileUtils.setWriteable(parentFile); } FileUtils.setWriteable(file); this.fos = new RandomAccessFile(file,"rw"); SelectionStrategy myStrategy = SelectionStrategyFactory.getStrategyFor( FileUtils.getFileExtension(file), completedSize); synchronized(this) { storedException = null; // Figure out which SelectionStrategy to use blockChooser = myStrategy; isOpen = true; } } /** * used to add blocks direcly. Blocks added this way are marked * partial. */ public synchronized void addInterval(Interval interval) { //delegates to underlying IntervalSet partialBlocks.add(interval); } /** * Writes bytes to the underlying file. * @throws InterruptedException if the downloader gets killed during the process */ public void writeBlock(long pos, byte[] data) throws InterruptedException { writeBlock(pos, 0, data.length, data); } /** * Writes bytes to the underlying file. * @throws InterruptedException if the downloader gets killed during the process * @param currPos the position in the file to write to * @param start the start position in the buffer to read from * @param length the length of data in the buffer to use * @param buf the buffer of data */ public void writeBlock(long currPos, int start, int length, byte[] buf) throws InterruptedException { if (LOG.isTraceEnabled()) LOG.trace("trying to write block at offset " + currPos + " with size " + length); if(length == 0) //nothing to write? return return; if(fos == null) throw new IllegalStateException("no fos!"); if (!isOpen()) return; Interval intvl = new Interval(currPos, currPos + length - 1); synchronized(this) { /// some stuff to help debugging /// if (!leasedBlocks.contains(intvl)) { Assert.that(false, "trying to write an interval "+intvl+ " that wasn't leased.\n"+dumpState()); } if (partialBlocks.contains(intvl) || savedCorruptBlocks.contains(intvl) || pendingBlocks.contains(intvl)) { Assert.that(false,"trying to write an interval "+intvl+ " that was already written"+dumpState()); } leasedBlocks.delete(intvl); // add only the ranges that aren't already verified into pending. // this is necessary because full-scanning may have added unforeseen // blocks into verified. // technically the code in the if block would work for all cases, // but it's kind of inefficient to do lots of work all the time, // when the if is only necessary after a full-scan. if(verifiedBlocks.containsAny(intvl)) { IntervalSet remaining = new IntervalSet(); remaining.add(intvl); remaining.delete(verifiedBlocks); pendingBlocks.add(remaining); } else { pendingBlocks.add(intvl); } } byte[] temp = CACHE.get(); Assert.that(temp.length >= length); System.arraycopy(buf, start, temp, 0, length); QUEUE.add(new ChunkHandler(temp, intvl)); } /** * Set whether or not we're going to do a one-time full scan * on this file for verified blocks once we find a * hash tree. * * @param scan * @param length */ public void setScanForExistingBlocks(boolean scan, long length) { if(scan && length != 0) { existingFileSize = length; } else { existingFileSize = -1; } } public String dumpState() { return "verified:"+verifiedBlocks+"\npartial:"+partialBlocks+ "\ndiscarded:"+savedCorruptBlocks+ "\npending:"+pendingBlocks+"\nleased:"+leasedBlocks; } /** * Returns a block of data that needs to be written. * * This method will not break up contiguous chunks into smaller chunks. */ public Interval leaseWhite() throws NoSuchElementException { return leaseWhiteHelper(null, completedSize); } /** * Returns a block of data that needs to be written. * The returned block will NEVER be larger than chunkSize. */ public Interval leaseWhite(int chunkSize) throws NoSuchElementException { return leaseWhiteHelper(null, chunkSize); } /** * Returns a block of data that needs to be written * and is within the specified set of ranges. * The parameter IntervalSet is modified */ public Interval leaseWhite(IntervalSet ranges) throws NoSuchElementException { return leaseWhiteHelper(ranges, DEFAULT_CHUNK_SIZE); } /** * Returns a block of data that needs to be written * and is within the specified set of ranges. * The returned block will NEVER be larger than chunkSize. */ public Interval leaseWhite(IntervalSet ranges, int chunkSize) throws NoSuchElementException { return leaseWhiteHelper(ranges, chunkSize); } /** * Removes the specified internal from the set of leased intervals. */ public synchronized void releaseBlock(Interval in) { if (!leasedBlocks.contains(in)) { Assert.that(false, "trying to release an interval "+in+ " that wasn't leased "+dumpState()); } if(LOG.isInfoEnabled()) LOG.info("Releasing interval: " + in+" state "+dumpState()); leasedBlocks.delete(in); } /** * Returns all downloaded blocks with an Iterator. */ public synchronized Iterator getBlocks() { return getBlocksAsList().iterator(); } /** * Returns all verified blocks with an Iterator. */ public synchronized Iterator getVerifiedBlocks() { return verifiedBlocks.getAllIntervals(); } /** * @return byte-packed representation of the verified blocks. */ public synchronized byte [] toBytes() { return verifiedBlocks.toBytes(); } public String toString() { return dumpState(); } /** * @return List of Intervals that should be serialized. Excludes pending intervals. */ public synchronized List getSerializableBlocks() { IntervalSet ret = new IntervalSet(); for (Iterator iter = verifiedBlocks.getAllIntervals(); iter.hasNext();) ret.add((Interval) iter.next()); for (Iterator iter = partialBlocks.getAllIntervals(); iter.hasNext();) ret.add((Interval) iter.next()); for (Iterator iter = savedCorruptBlocks.getAllIntervals(); iter.hasNext();) ret.add((Interval) iter.next()); return ret.getAllIntervalsAsList(); } /** * @return all downloaded blocks as list */ public synchronized List getBlocksAsList() { List l = new ArrayList(); l.addAll(verifiedBlocks.getAllIntervalsAsList()); l.addAll(partialBlocks.getAllIntervalsAsList()); l.addAll(savedCorruptBlocks.getAllIntervalsAsList()); l.addAll(pendingBlocks.getAllIntervalsAsList()); IntervalSet ret = new IntervalSet(); for (Iterator iter = l.iterator();iter.hasNext();) ret.add((Interval)iter.next()); return ret.getAllIntervalsAsList(); } /** * Returns all verified blocks as a List. */ public synchronized List getVerifiedBlocksAsList() { return verifiedBlocks.getAllIntervalsAsList(); } /** * Returns the total number of bytes written to disk. */ public synchronized int getBlockSize() { return verifiedBlocks.getSize() + partialBlocks.getSize() + savedCorruptBlocks.getSize() + pendingBlocks.getSize(); } public synchronized int getPendingSize() { return pendingBlocks.getSize(); } public static int getNumPendingItems() { return QUEUE.size(); } /** * Returns the total number of verified bytes written to disk. */ public synchronized int getVerifiedBlockSize() { return verifiedBlocks.getSize(); } /** * @return how much data was lost due to corruption */ public synchronized int getAmountLost() { return lostSize; } /** * Determines if all blocks have been written to disk and verified */ public synchronized boolean isComplete() { if (hashTree != null) return verifiedBlocks.getSize() + savedCorruptBlocks.getSize() == completedSize; else { return verifiedBlocks.getSize() + savedCorruptBlocks.getSize() + partialBlocks.getSize()== completedSize; } } /** * If the last remaining chunks of the file are currently pending writing & verification, * wait until it finishes. */ public synchronized void waitForPendingIfNeeded() throws InterruptedException, DiskException { if(storedException != null) throw new DiskException(storedException); while (!isComplete() && getBlockSize() == completedSize) { if(storedException != null) throw new DiskException(storedException); if (LOG.isInfoEnabled()) LOG.info("waiting for a pending chunk to verify or write.."); wait(); } } /** * @return whether we think we will not be able to complete this file */ public synchronized boolean isHopeless() { return lostSize >= MAX_CORRUPTION * completedSize; } public boolean isOpen() { return isOpen; } /** * Determines if there are any blocks that are not assigned * or written. */ public synchronized int hasFreeBlocksToAssign() { return completedSize - (verifiedBlocks.getSize() + leasedBlocks.getSize() + partialBlocks.getSize() + savedCorruptBlocks.getSize() + pendingBlocks.getSize()); } /** * Closes the file output stream. */ public void close() { isOpen = false; if(fos==null) return; try { fos.close(); } catch (IOException ioe) {} } /////////////////////////private helpers////////////////////////////// /** * Determines which interval should be assigned next, leases that interval, * and returns that interval. * * @param availableRanges if ranges is non-null, the return value will be a chosen * from within availableRanges * @param chunkSize if greater than zero, the return value will end one byte before * a chunkSize boundary and will be at most chunkSize bytes large. * @return the leased interval */ private synchronized Interval leaseWhiteHelper(IntervalSet availableBytes, long chunkSize) throws NoSuchElementException { if (LOG.isDebugEnabled()) LOG.debug("leasing white, state:\n"+dumpState()); // If ranges is null, make ranges represent the entire file if (availableBytes == null) availableBytes = IntervalSet.createSingletonSet(0, completedSize-1); // Figure out which blocks we still need to assign IntervalSet neededBytes = IntervalSet.createSingletonSet(0, completedSize-1); neededBytes.delete(verifiedBlocks); neededBytes.delete(leasedBlocks); neededBytes.delete(partialBlocks); neededBytes.delete(savedCorruptBlocks); neededBytes.delete(pendingBlocks); if (LOG.isDebugEnabled()) LOG.debug("needed bytes: "+neededBytes); // Calculate the intersection of neededBytes and availableBytes availableBytes.delete(neededBytes.invert(completedSize)); Interval ret = blockChooser.pickAssignment(availableBytes, neededBytes, chunkSize); leaseBlock(ret); if (LOG.isDebugEnabled()) LOG.debug("leasing white interval "+ret+"\nof available intervals "+ neededBytes); return ret; } /** * Leases the specified interval. */ private synchronized void leaseBlock(Interval in) { //if(LOG.isDebugEnabled()) //LOG.debug("Obtaining interval: " + in); leasedBlocks.add(in); } /** * Sets the expected hash tree root. If non-null, we'll only accept * hash trees whose root hash matches this. */ public synchronized void setExpectedHashTreeRoot(String root) { expectedHashRoot = root; } public synchronized HashTree getHashTree() { return hashTree; } /** * sets the HashTree the current download will use. That affects whether * we do overlap checking. */ public synchronized void setHashTree(HashTree tree) { // doesn't match our expected tree, bail. if (expectedHashRoot != null && tree != null && !tree.getRootHash().equalsIgnoreCase(expectedHashRoot)) return; // if the tree is of incorrect size, ignore it if (tree != null && tree.getFileSize() != completedSize) return; // if we did not have a tree previously // and we do have a hash tree now // and either we want to scan the whole file once // or we don't have pending blocks but do have partial blocks, // trigger verification. HashTree previous = hashTree; hashTree = tree; if (previous == null && tree != null && (existingFileSize != -1 || (pendingBlocks.getSize() == 0 && partialBlocks.getSize() > 0)) ) { QUEUE.add(new EmptyVerifier(existingFileSize)); existingFileSize = -1; } } /** * flags that someone is currently requesting the tree */ public synchronized void setHashTreeRequested(boolean yes) { hashTreeRequested = yes; } public synchronized boolean isHashTreeRequested() { return hashTreeRequested; } public synchronized void setDiscardUnverified(boolean yes) { discardBad = yes; } public synchronized int getChunkSize() { return hashTree == null ? DEFAULT_CHUNK_SIZE : hashTree.getNodeSize(); } /** * Stub for calling verifyChunks(-1). */ private void verifyChunks() { verifyChunks(-1); } /** * Schedules those chunks that can be verified against the hash tree * for verification. */ private void verifyChunks(long existingFileSize) { boolean fullScan = existingFileSize != -1; HashTree tree = getHashTree(); // capture the tree. if(tree != null) { // if we have a tree, see if there is a completed chunk in the partial list for (Iterator iter = findVerifyableBlocks(existingFileSize).iterator(); iter.hasNext();) { Interval i = (Interval)iter.next(); boolean good = verifyChunk(i, tree); synchronized (this) { partialBlocks.delete(i); if (good) verifiedBlocks.add(i); else { if (!fullScan) { if (!discardBad) savedCorruptBlocks.add(i); lostSize += (i.high - i.low + 1); } } } } } } /** * @return whether this chunk is corrupt according to the given hash tree */ private boolean verifyChunk(Interval i, HashTree tree) { if (LOG.isDebugEnabled()) LOG.debug("verifying interval "+i); int length = i.high - i.low + 1; byte[] b = CHUNK_CACHE.get(length); // read the interval from the file try { synchronized(fos) { fos.seek(i.low); fos.readFully(b, 0, length); } } catch (IOException bad) { // we failed reading back from the file - assume block is corrupt // and it will have to be re-downloaded return false; } boolean corrupt = tree.isCorrupt(i, b, length); if (LOG.isDebugEnabled() && corrupt) LOG.debug("block corrupt!"); return !corrupt; } /** * iterates through the pending blocks and checks if the recent write has created * some (verifiable) full chunks. Its not possible to verify more than two chunks * per method call unless the downloader is being deserialized from disk */ private synchronized List findVerifyableBlocks(long existingFileSize) { if (LOG.isTraceEnabled()) LOG.trace("trying to find verifyable blocks out of "+partialBlocks); boolean fullScan = existingFileSize != -1; List verifyable = new ArrayList(2); List partial; int chunkSize = getChunkSize(); if(fullScan) { IntervalSet temp = (IntervalSet)partialBlocks.clone(); temp.add(new Interval(0, existingFileSize)); partial = temp.getAllIntervalsAsList(); } else { partial = partialBlocks.getAllIntervalsAsList(); } for (int i = 0; i < partial.size() ; i++) { Interval current = (Interval)partial.get(i); // find the beginning of the first chunk offset int lowChunkOffset = current.low - current.low % chunkSize; if (current.low % chunkSize != 0) lowChunkOffset += chunkSize; while (current.high >= lowChunkOffset+chunkSize-1) { Interval complete = new Interval(lowChunkOffset, lowChunkOffset+chunkSize -1); verifyable.add(complete); lowChunkOffset += chunkSize; } } // special case for the last chunk if (!partial.isEmpty()) { int lastChunkOffset = completedSize - (completedSize % chunkSize); if (lastChunkOffset == completedSize) lastChunkOffset-=chunkSize; Interval last = (Interval) partial.get(partial.size() - 1); if (last.high == completedSize-1 && last.low <= lastChunkOffset ) { if(LOG.isDebugEnabled()) LOG.debug("adding the last chunk for verification"); verifyable.add(new Interval(lastChunkOffset, last.high)); } } return verifyable; } /** * Runnable that writes chunks to disk & verifies partial blocks. */ private class ChunkHandler implements Runnable { /** The buffer we are about to write to the file */ private final byte[] buf; /** The interval that we are about to write */ private final Interval intvl; public ChunkHandler(byte[] buf, Interval intvl) { this.buf = buf; this.intvl = intvl; } public void run() { boolean freedPending = false; try { if(LOG.isTraceEnabled()) LOG.trace("Writing intvl: " + intvl); synchronized(fos) { fos.seek(intvl.low); fos.write(buf, 0, intvl.high - intvl.low + 1); } synchronized(VerifyingFile.this) { pendingBlocks.delete(intvl); partialBlocks.add(intvl); freedPending = true; } verifyChunks(); } catch(IOException diskIO) { synchronized(VerifyingFile.this) { pendingBlocks.delete(intvl); storedException = diskIO; } } finally { // return the buffer to the cache CACHE.release(buf); synchronized(VerifyingFile.this) { if (!freedPending) pendingBlocks.delete(intvl); VerifyingFile.this.notify(); } } } } /** A simple Runnable that schedules a verification of the file. */ private class EmptyVerifier implements Runnable { private final long existingFileSize; EmptyVerifier(long existingFileSize) { this.existingFileSize = existingFileSize; } public void run() { verifyChunks(existingFileSize); synchronized(VerifyingFile.this) { VerifyingFile.this.notify(); } } } /** * A Runnable that clears the cache used for storing byte[]s used for * writing data read from network to disk, and schedules a ChunkCacheCleaner. */ private static class CacheCleaner implements Runnable { public void run() { LOG.info("clearing cache"); CACHE.clear(); QUEUE.add(new ChunkCacheCleaner()); } } /** A Runnable that clears the cache storing byte[]s used for verifying. */ private static class ChunkCacheCleaner implements Runnable { public void run() { CHUNK_CACHE.clear(); } } }