package com.limegroup.gnutella.downloader; import java.io.File; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.limegroup.gnutella.RemoteFileDesc; import com.limegroup.gnutella.RouterService; import com.limegroup.gnutella.URN; import com.limegroup.gnutella.settings.SharingSettings; import com.limegroup.gnutella.util.CommonUtils; import com.limegroup.gnutella.util.Comparators; import com.limegroup.gnutella.util.FileUtils; /** * A repository of temporary filenames. Gives out file names for temporary * files, ensuring that two duplicate files always get the same name. This * enables smart resumes across hosts. Also keeps track of the blocks * downloaded, for smart downloading purposes. <b>Thread safe.</b><p> */ public class IncompleteFileManager implements Serializable { /** Ensures backwards compatibility. */ static final long serialVersionUID = -7658285233614679878L; /** The delimiter to use between the size and a real name of a temporary * file. To make it easier to break the temporary name into its constituent * parts, this should not contain a number. */ static final String SEPARATOR="-"; /** The prefix added to preview copies of incomplete files. */ public static final String PREVIEW_PREFIX="Preview-"; private static final Log LOG = LogFactory.getLog(IncompleteFileManager.class); /* * IMPORTANT SERIALIZATION NOTE * * The original version of IncompleteFileManager consisted solely of a * mapping from File to List<Interval> and used default serialization. * Starting with version 1.10 of this file, we started using VerifyingFile * instead of List<Interval> internally. But because we wanted forward- and * backward-compatibility, we replaced each VerifyingFile with an * equivalent List<Interval> when writing to downloads.dat. We reversed * this transformation when reading from downloads.dat. All this was * implemented with custom writeObject and readObject methods. * * Starting with CVS version 1.15, IncompleteFileManager keeps track of * hashes as well. This makes it difficult to write a custom readObject * method that maintains backwards compatibility--how do you know whether * HASHES can be read from the input stream? To get around this, we * reverted back to default Java serialization with one twist; before * delegating to defaultWriteObject, we temporarily transform BLOCKS to use * List<Interval>. Similarly, we do the inverse transformation after calling * defaultReadObject. This is backward-compatible and will make versioning * less difficult in the future. * * The moral of the story is this: be very careful when modifying this class * in any way! IncompleteFileManagerTest has some test case to check * backwards compatibility, but you will want to do additional testing. */ /** * A mapping from incomplete files (File) to the blocks of the file stored * on disk (VerifyingFile). Needed for resumptive smart downloads. * INVARIANT: all blocks disjoint, no two intervals can be coalesced into * one interval. Note that blocks are not sorted; there are typically few * blocks so performance isn't an issue. */ private Map /* File -> VerifyingFile */ blocks= new TreeMap(Comparators.fileComparator()); /** * Bijection between SHA1 hashes (URN) and incomplete files (File). This is * used to ensure that any two RemoteFileDesc with the same hash get the * same incomplete file, regardless of name. The inverse of this map is * used to get the hash of an incomplete file for query-by-hash and * resuming. Note that the hash is that of the desired completed file, not * that of the incomplete file.<p> * * Entries are added to hashes before the temp file is actually created on * disk. For this reason, there can be files in the value set of hashes * that are not in the key set of blocks. These entries are not serialized * to disk in the downloads.dat file. Similarly there may be files in the * key set of blocks that are not in the value set of hashes. This happens * if we received RemoteFileDesc's without hashes, or when loading old * downloads.dat files without hash info. * * INVARIANT: the range (value set) of hashes contains no duplicates. * INVARIANT: for all keys k in hashes, k.isSHA1() */ private Map /* URN -> File */ hashes=new HashMap(); /////////////////////////////////////////////////////////////////////////// /** * Removes entries in this for which there is no file on disk. * * @return true iff any entries were purged */ public synchronized boolean purge() { boolean ret=false; //Remove any blocks for which the file doesn't exist. for (Iterator iter=blocks.keySet().iterator(); iter.hasNext(); ) { File file=(File)iter.next(); if (!file.exists() ) { ret=true; RouterService.getFileManager().removeFileIfShared(file); file.delete(); //always safe to call; return value ignored iter.remove(); } } return ret; } /** * Deletes incomplete files more than INCOMPLETE_PURGE_TIME days old from disk * Then removes entries in this for which there is no file on disk. * * @param activeFiles which files are currently being downloaded. * @return true iff any entries were purged */ public synchronized boolean initialPurge(Collection activeFiles) { //Remove any files that are old. boolean ret = false; for (Iterator iter=blocks.keySet().iterator(); iter.hasNext(); ) { File file=(File)iter.next(); try { file = FileUtils.getCanonicalFile(file); } catch (IOException iox) { file = file.getAbsoluteFile(); } if (!file.exists() || (isOld(file) && !activeFiles.contains(file))) { ret=true; RouterService.getFileManager().removeFileIfShared(file); file.delete(); iter.remove(); } } for (Iterator iter=hashes.values().iterator(); iter.hasNext(); ) { File file=(File)iter.next(); if (!file.exists()) { iter.remove(); ret=true; } } return ret; } /** Returns true iff file is "too old". */ private static final boolean isOld(File file) { //Inlining this method allows some optimizations--not that they matter. long days=SharingSettings.INCOMPLETE_PURGE_TIME.getValue(); //Back up a couple days. //24 hour/day * 60 min/hour * 60 sec/min * 1000 msec/sec long purgeTime=System.currentTimeMillis()-days*24l*60l*60l*1000l; return file.lastModified() < purgeTime; } /* * Returns true if both rfd "have the same content". Currently * rfd1~=rfd2 iff either of the following conditions hold: * * <ul> * <li>Both files have the same hash, i.e., * rfd1.getSHA1Urn().equals(rfd2.getSHA1Urn(). Note that this (almost) * always means that rfd1.getSize()==rfd2.getSize(), though rfd1 and * rfd2 may have different names. * <li>Both files have the same name and size and don't have conflicting * hashes, i.e., rfd1.getName().equals(rfd2.getName()) && * rfd1.getSize()==rfd2.getSize() && (rfd1.getSHA1Urn()==null || * rfd2.getSHA1Urn()==null || * rfd1.getSHA1Urn().equals(rfd2.getSHA1Urn())). * </ul> * Note that the second condition allows risky resumes, i.e., resumes when * one (or both) of the files doesn't have a hash. * * @see getFile */ static boolean same(RemoteFileDesc rfd1, RemoteFileDesc rfd2) { return same(rfd1.getFileName(), rfd1.getSize(), rfd1.getSHA1Urn(), rfd2.getFileName(), rfd2.getSize(), rfd2.getSHA1Urn()); } /** @see similar(RemoteFileDesc, RemoteFileDesc) */ static boolean same(String name1, int size1, URN hash1, String name2, int size2, URN hash2) { //Either they have the same hashes... if (hash1!=null && hash2!=null) return hash1.equals(hash2); //..or same name and size and no conflicting hashes. else return size1==size2 && name1.equals(name2); } /** * Canonicalization is not as important on windows, * and is causing problems. * Therefore, don't do it. */ private static File canonicalize(File f) throws IOException { f = f.getAbsoluteFile(); if(CommonUtils.isWindows()) return f; else return f.getCanonicalFile(); } /** * Same as getFile(String, urn, int), except taking the values from the RFD. * getFile(rfd) == getFile(rfd.getFileName(), rfd.getSHA1Urn(), rfd.getSize()); */ public synchronized File getFile(RemoteFileDesc rfd) throws IOException { return getFile(rfd.getFileName(), rfd.getSHA1Urn(), rfd.getSize()); } /** * Stub for calling * getFile(String, URN, int, SharingSettings.INCOMPLETE_DIRECTORY.getValue()); */ public synchronized File getFile(String name, URN sha1, int size) throws IOException { return getFile(name, sha1, size, SharingSettings.INCOMPLETE_DIRECTORY.getValue()); } /** * Returns the fully-qualified temporary download file for the given * file/location pair. If an incomplete file already exists for this * URN, that file is returned. Otherwise, the location of the file is * determined by the "incDir" variable. For example, getFile("test.txt", 1999) * may return "C:\Program Files\LimeWire\Incomplete\T-1999-Test.txt" if * "incDir" is "C:\Program Files\LimeWire\Incomplete". The * disk is not modified, except for the file possibly being created.<p> * * This method gives duplicate files the same temporary file, which is * critical for resume and swarmed downloads. That is, for all rfd_i and * rfd_j * <pre> * similar(rfd_i, rfd_j) <==> getFile(rfd_i).equals(getFile(rfd_j))<p> * </pre> * * It is imperative that the files are compared as in their canonical * formats to preserve the integrity of the filesystem. Otherwise, * multiple downloads could be downloading to "FILE A", and "file a", * although only "file a" exists on disk and is being written to by * both. * * @throws IOException if there was an IOError while determining the * file's name. */ public synchronized File getFile(String name, URN sha1, int size, File incDir) throws IOException { boolean dirsMade = false; File baseFile = null; File canonFile = null; //make sure its created.. (the user might have deleted it) dirsMade = incDir.mkdirs(); String convertedName = CommonUtils.convertFileName(name); try { if (sha1!=null) { File file=(File)hashes.get(sha1); if (file!=null) { //File already allocated for hash return file; } else { //Allocate unique file for hash. By "unique" we mean not in //the value set of HASHES. Because we allow risky resumes, //there's no need to look at BLOCKS as well... for (int i=1 ; ; i++) { file = new File(incDir, tempName(convertedName, size, i)); baseFile = file; file = canonicalize(file); canonFile = file; if (! hashes.values().contains(file)) break; } //...and record the hash for later. hashes.put(sha1, file); //...and make sure the file exists on disk, so that // future File.getCanonicalFile calls will match this // file. This was a problem on OSX, where // File("myfile") and File("MYFILE") aren't equal, // but File("myfile").getCanonicalFile() will only return // a File("MYFILE") if that already existed on disk. // This means that in order for the canonical-checking // within this class to work, the file must exist on disk. FileUtils.touch(file); return file; } } else { //No hash. File f = new File(incDir, tempName(convertedName, size, 0)); baseFile = f; f = canonicalize(f); canonFile = f; return f; } } catch(IOException ioe) { IOException ioe2 = new IOException( "dirsMade: " + dirsMade + "\ndirExist: " + incDir.exists() + "\nbaseFile: " + baseFile + "\ncannFile: " + canonFile); ioe2.initCause(ioe); throw ioe2; } } /** * Returns the file associated with the specified URN. If no file matches, * returns null. * * @return the file associated with the URN, or null if none. */ public synchronized File getFileForUrn(URN urn) { if( urn == null ) throw new NullPointerException("null urn"); return (File)hashes.get(urn); } /** * Returns the unqualified file name for a file with the given name * and size, with an optional suffix to make it unique. * @param count a suffix to attach before the file extension in parens * before the file extension, or 1 for none. */ private static String tempName(String filename, int size, int suffix) { if (suffix<=1) { //a) No suffix return "T-"+size+"-"+filename; } int i=filename.lastIndexOf('.'); if (i<0) { //b) Suffix, no extension return "T-"+size+"-"+filename+" ("+suffix+")"; } else { //c) Suffix, file extension String noExtension=filename.substring(0,i); String extension=filename.substring(i); //e.g., ".txt" return "T-"+size+"-"+noExtension+" ("+suffix+")"+extension; } } /////////////////////////////////////////////////////////////////////////// private synchronized void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { //Ensure hashes non-null if not present. hashes=new HashMap(); //Read hashes and blocks. stream.defaultReadObject(); //Convert blocks from interval lists to VerifyingFile. //See serialization note above. if (LOG.isDebugEnabled()) LOG.debug("blocks before transform "+blocks); blocks=transform(blocks); if (LOG.isDebugEnabled()) LOG.debug("blocks after transform "+blocks); //Ensure that all information in hashes is canonicalized. This must be //done because older LimeWires did not canonicalize the files before //adding them. hashes = verifyHashes(); //Notify FileManager about the new incomplete files. registerAllIncompleteFiles(); } private synchronized void writeObject(ObjectOutputStream stream) throws IOException, ClassNotFoundException { //Temporarily change blocks from VerifyingFile to interval lists... Map blocksSave=blocks; try { if (LOG.isDebugEnabled()) LOG.debug("blocks before invtransform: "+blocks); blocks=invTransform(); if (LOG.isDebugEnabled()) LOG.debug("blocks after invtransform: "+blocks); stream.defaultWriteObject(); } finally { //...and restore when done. See serialization note above. blocks=blocksSave; } } /** * Ensures that that integrity of the hashes HashMap is valid. * This must be done to ensure that older version of LimeWire * are started with a valid hashes map. Previously, * entries added to the map were not canonicalized, resulting * in multiple downloads thinking they're going to seperate files, * but actually going to the same file. */ private Map verifyHashes() { Map retMap = new HashMap(); for(Iterator i = hashes.entrySet().iterator(); i.hasNext(); ) { Map.Entry entry = (Map.Entry)i.next(); if(entry.getKey() instanceof URN && entry.getValue() instanceof File) { URN urn = (URN)entry.getKey(); File f = (File)entry.getValue(); try { f = canonicalize(f); // We must purge old entries that had mapped // multiple URNs to uncanonicalized files. // This is done by ensuring that we only add // this entry to the map if no other URN points to it. if(!retMap.values().contains(f)) retMap.put(urn, f); } catch(IOException ioe) {} } } return retMap; } /** Takes a map of File->List<Interval> and returns a new equivalent Map * of File->VerifyingFile*/ private Map transform(Object object) { Map map = (Map)object; Map retMap = new TreeMap(Comparators.fileComparator()); for(Iterator i = map.keySet().iterator(); i.hasNext();) { Object incompleteFile = i.next(); Object o = map.get(incompleteFile); if(o==null) //no entry??! continue; else if( incompleteFile instanceof File ) { // (o instanceof List) ie. old downloads.dat //Canonicalize the file to fix older LimeWires that allowed //non-canonicalized files to be inserted into the table. File f = (File)incompleteFile; try { f = canonicalize(f); } catch(IOException ioe) { // ignore entry continue; } VerifyingFile vf; try { vf = new VerifyingFile((int) getCompletedSize(f)); } catch (IllegalArgumentException iae) { vf = new VerifyingFile(); } List list = (List)o; for(Iterator iter = list.iterator(); iter.hasNext(); ) { Interval interval = (Interval) iter.next(); // older intervals excuded the high'th byte, so we decrease // the value of interval.high. An effect of this is that // an older client with a newer download.dat downloads one // byte extra for each interval. interval = new Interval(interval.low, interval.high - 1); vf.addInterval(interval); } if(list.isEmpty()) vf.setScanForExistingBlocks(true, f.length()); retMap.put(f, vf); } }//end of for return retMap; } /** Takes a map of File->VerifyingFile and returns a new equivalent Map * of File->List<Interval>*/ private Map invTransform() { Map retMap = new HashMap(); for(Iterator iter=blocks.keySet().iterator(); iter.hasNext();) { List writeList = new ArrayList();//the list we will write out Object incompleteFile = iter.next(); VerifyingFile vf = (VerifyingFile)blocks.get(incompleteFile); synchronized(vf) { List l = vf.getSerializableBlocks(); for(int i=0; i< l.size(); i++ ) { //clone the list because we cant mutate VerifyingFile's List Interval inter = (Interval)l.get(i); //Increment interval.high by 1 to maintain semantics of //Inerval Interval interval = new Interval(inter.low,inter.high+1); writeList.add(interval); } } retMap.put(incompleteFile,writeList); } return retMap; } /////////////////////////////////////////////////////////////////////////// /** * Removes the block and hash information for the given incomplete file. * Typically this is called after incompleteFile has been deleted. * @param incompleteFile a temporary file returned by getFile */ public synchronized void removeEntry(File incompleteFile) { //Remove downloaded blocks. blocks.remove(incompleteFile); //Remove any key k from hashes for which hashes[k]=incompleteFile. //There should be at most one value of k. for (Iterator iter=hashes.entrySet().iterator(); iter.hasNext(); ) { Map.Entry entry=(Map.Entry)iter.next(); if (incompleteFile.equals(entry.getValue())) //Could also break here as a small optimization. iter.remove(); } //Remove the entry from FileManager RouterService.getFileManager().removeFileIfShared(incompleteFile); } /** * Associates the incompleteFile with the VerifyingFile vf. * Notifies FileManager about a new Incomplete File. */ public synchronized void addEntry(File incompleteFile, VerifyingFile vf) throws IOException { // We must canonicalize the file. try { incompleteFile = canonicalize(incompleteFile); } catch(IOException ignored) {} blocks.put(incompleteFile,vf); registerIncompleteFile(incompleteFile); } public synchronized VerifyingFile getEntry(File incompleteFile) { Object o = blocks.get(incompleteFile); return (VerifyingFile)o; } public synchronized int getBlockSize(File incompleteFile) { Object o = blocks.get(incompleteFile); if(o==null) return 0; else { VerifyingFile vf = (VerifyingFile)o; return vf.getBlockSize(); } } /** * Notifies file manager about all incomplete files. */ public synchronized void registerAllIncompleteFiles() { for (Iterator iter=blocks.keySet().iterator(); iter.hasNext(); ) { File file=(File)iter.next(); if (file.exists() && !isOld(file)) { registerIncompleteFile(file); } } } /** * Notifies file manager about a single incomplete file. */ private synchronized void registerIncompleteFile(File incompleteFile) { // Only register if it has a SHA1 -- otherwise we can't share. Set completeHashes = getAllCompletedHashes(incompleteFile); if( completeHashes.size() == 0 ) return; RouterService.getFileManager().addIncompleteFile( incompleteFile, completeHashes, getCompletedName(incompleteFile), (int)getCompletedSize(incompleteFile), getEntry(incompleteFile) ); } /** * Returns the name of the complete file associated with the given * incomplete file, i.e., what incompleteFile will be renamed to * when the download completes (without path information). Slow; runs * in linear time with respect to the number of hashes in this. * @param incompleteFile a file returned by getFile * @return the complete file name, without path * @exception IllegalArgumentException incompleteFile was not the * return value from getFile */ public static String getCompletedName(File incompleteFile) throws IllegalArgumentException { //Given T-<size>-<name> return <name>. // i j //This is not as strict as it could be. TODO: what about (x) suffix? String name=incompleteFile.getName(); int i=name.indexOf(SEPARATOR); if (i<0) throw new IllegalArgumentException("Missing separator: "+name); int j=name.indexOf(SEPARATOR, i+1); if (j<0) throw new IllegalArgumentException("Missing separator: "+name); if (j==name.length()-1) throw new IllegalArgumentException("No name after last separator"); return name.substring(j+1); } /** * Returns the size of the complete file associated with the given * incomplete file, i.e., the number of bytes in the file when the * download completes. * @param incompleteFile a file returned by getFile * @return the complete file size * @exception IllegalArgumentException incompleteFile was not * returned by getFile */ public static long getCompletedSize(File incompleteFile) throws IllegalArgumentException { //Given T-<size>-<name>, return <size>. // i j String name=incompleteFile.getName(); int i=name.indexOf(SEPARATOR); if (i<0) throw new IllegalArgumentException("Missing separator: "+name); int j=name.indexOf(SEPARATOR, i+1); if (j<0) throw new IllegalArgumentException("Missing separator: "+name); try { return Long.parseLong(name.substring(i+1, j)); } catch (NumberFormatException e) { throw new IllegalArgumentException("Bad number format: "+name); } } /** * Returns the hash of the complete file associated with the given * incomplete file, i.e., the hash of incompleteFile when the * download is complete. * @param incompleteFile a file returned by getFile * @return a SHA1 hash, or null if unknown */ public synchronized URN getCompletedHash(File incompleteFile) { //Return a key k s.t., hashes.get(k)==incompleteFile... for (Iterator iter=hashes.entrySet().iterator(); iter.hasNext(); ) { Map.Entry entry=(Map.Entry)iter.next(); if (incompleteFile.equals(entry.getValue())) return (URN)entry.getKey(); } return null; //...or null if no such k. } /** * Returns any known hashes of the complete file associated with the given * incomplete file, i.e., the hashes of incompleteFile when the * download is complete. * @param incompleteFile a file returned by getFile * @return a set of known hashes */ public synchronized Set getAllCompletedHashes(File incompleteFile) { Set urns = new HashSet(1); //Return a set S s.t. for each K in S, hashes.get(k)==incpleteFile for (Iterator iter=hashes.entrySet().iterator(); iter.hasNext(); ) { Map.Entry entry=(Map.Entry)iter.next(); if (incompleteFile.equals(entry.getValue())) urns.add(entry.getKey()); } return urns; } public synchronized String toString() { StringBuffer buf=new StringBuffer(); buf.append("{"); boolean first=true; for (Iterator iter=blocks.keySet().iterator(); iter.hasNext(); ) { if (! first) buf.append(", "); File key=(File)iter.next(); List intervals=((VerifyingFile)blocks.get(key)).getVerifiedBlocksAsList(); buf.append(key); buf.append(":"); buf.append(intervals.toString()); first=false; } buf.append("}"); return buf.toString(); } public synchronized String dumpHashes () { return hashes.toString(); } }