package com.limegroup.gnutella.downloader; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.StringTokenizer; import java.util.Vector; import org.xml.sax.SAXException; import com.limegroup.gnutella.FileManager; import com.limegroup.gnutella.MediaType; import com.limegroup.gnutella.RemoteFileDesc; import com.limegroup.gnutella.ResponseVerifier; import com.limegroup.gnutella.util.ApproximateMatcher; import com.limegroup.gnutella.xml.LimeXMLDocument; import com.limegroup.gnutella.xml.SchemaNotFoundException; /** * Encapsulates important details about an auto download. Serializable for * downloads.dat file; be careful when modifying! */ public class AutoDownloadDetails implements Serializable { static final long serialVersionUID = 3400666689236195243L; // the query associated with this search private String query = null; // the rich query associated with this search private String richQuery = null; // the LimeXMLDocument of this rich query ... // initialized when needed. private transient LimeXMLDocument xmlDoc = null; // flag of whether or not we've tried to create the doc. private transient boolean xmlCreated = false; // the 'filter' associated with this search private transient MediaType type = null; // the GUID associated with this search private byte[] guid = null; // the list of downloads made so far - should not exceed size // MAX_DOWNLOADS private List /* of RemoteFileDesc */ dlList = null; /** * The description of the media type. */ private String mediaDesc; /** the size of the approx matcher 2d buffer... */ private static final int MATCHER_BUF_SIZE = 120; /** this is used for matching of filenames. kind of big so we only want * one. */ private static ApproximateMatcher matcher = new ApproximateMatcher(MATCHER_BUF_SIZE); /** the precision that the matcher uses for comparing candidates to RFDs * that have already been accepted for download.... */ private float MATCH_PRECISION_DL = .30f; /** the percentage of matching that invalidates a new file from being * downloaded. in other words, if a file matches on more than ~51% of * words, then don't download it. */ private float WORD_INCIDENCE_RATE = .509999f; /** what is considered to be a low score, compared to the return value of * the score method... */ private int LOW_SCORE = 95; /** the set of words that are already being downloaded. this can be used * as a heuristic when determining what to download..... */ private Set wordSet = null; static { matcher.setIgnoreCase(true); matcher.setIgnoreWhitespace(true); matcher.setCompareBackwards(true); } // don't auto dl any more than this number of files.... public static final int MAX_DOWNLOADS = 1; // keeps track of committed downloads.... private int committedDLs = 0; /** * @param inQuery the standard query string associated with this query. * @param inRichQuery the rich query associated with this string. * @param inType the mediatype associated with this string..... */ public AutoDownloadDetails(String inQuery, String inRichQuery, byte[] inGuid, MediaType inType) { query = inQuery; richQuery = inRichQuery; type = inType; if(type != null) mediaDesc = type.getMimeType(); else mediaDesc = null; guid = inGuid; dlList = new Vector(); wordSet = new HashSet(); } /** * Extended to set the media type. */ private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { stream.defaultReadObject(); if(mediaDesc == null) type = MediaType.getAnyTypeMediaType(); else type = MediaType.getMediaTypeForSchema(mediaDesc); if(type == null) type = MediaType.getAnyTypeMediaType(); } public String getQuery() { return query; } public String getRichQuery() { return richQuery; } public MediaType getMediaType() { return type; } /** * @param toAdd The RFD you are TRYING to add. * @return Whether or not the add was successful. */ public synchronized boolean addDownload(RemoteFileDesc toAdd) { debug("ADD.addDownload(): *-----------"); debug("ADD.addDownload(): entered."); // this is used not only as a return value but to control processing. // if it every turns false we just stop processing.... boolean retVal = true; // if this hasn't become expired.... if (!expired()) { final String inputFileName = toAdd.getFileName(); // make sure the file ext is legit.... if ((type != null) && !(type.matches(inputFileName))) { retVal = false; debug("ADD.addDownload(): file " + inputFileName + " isn't the right type."); } // create our xml doc if we need to... if( !xmlCreated ) { xmlCreated = true; if( richQuery != null && !richQuery.equals("") ) { try { xmlDoc = new LimeXMLDocument(richQuery); } catch(SchemaNotFoundException ignored) { } catch(SAXException ignored) { } catch(IOException ignored) { } } } // make sure the score for this file isn't too low.... int score = ResponseVerifier.score(query, xmlDoc, toAdd); if (score < LOW_SCORE) { retVal = false; debug("ADD.addDownload(): file " + inputFileName + " has low score of " + score); } // check to see there is a high incidence of words here in stuff we // are already downloading.... if (retVal && (wordSet.size() > 0)) { StringTokenizer st = new StringTokenizer(ripExtension(inputFileName), FileManager.DELIMITERS); int additions = 0; final int numTokens = st.countTokens(); while (st.hasMoreTokens()) { String currToken = st.nextToken().toLowerCase(); debug("ADD.addDownload(): currToken = " + currToken); if (!wordSet.contains(currToken)) additions++; } float matchRate = ((float)(numTokens - additions)/ (float)wordSet.size()); if ((additions == 0) || (matchRate > WORD_INCIDENCE_RATE)) { retVal = false; debug("ADD.addDownload(): file " + inputFileName + " has many elements similar to" + " other files. matchRate = " + matchRate + ", additions = " + additions); } } // see if it compares to any other file already being DLed.... if (retVal && (dlList.size() > 0)) { String processedFileName; synchronized (matcher) { processedFileName = matcher.process(inputFileName); } for (int i = 0; i < dlList.size(); i++) { RemoteFileDesc currRFD = (RemoteFileDesc) dlList.get(i); String currFileName = currRFD.getFileName(); String currProcessedFileName; int diffs = 0; synchronized (matcher) { currProcessedFileName = matcher.process(currFileName); diffs = matcher.match(processedFileName, currProcessedFileName); } int smaller = Math.min(processedFileName.length(), currProcessedFileName.length()); if (((float)diffs)/((float)smaller) < MATCH_PRECISION_DL) { retVal = false; debug("ADD.addDownload(): conflict for file " + inputFileName + " and " + currFileName); } // oops, we have already accepted that file for DL, don't // add it and break out of this costly loop.... if (!retVal) break; } } // ok, all processing passed, add this... if (retVal) { // used by the approx. matcher... dlList.add(toAdd); // used by my hashset comparator.... StringTokenizer st = new StringTokenizer(ripExtension(inputFileName), FileManager.DELIMITERS); while (st.hasMoreTokens()) wordSet.add(st.nextToken().toLowerCase()); debug("ADD.addDownload(): wordSet = " + wordSet); } } else retVal = false; debug("ADD.addDownload(): returning " + retVal); debug("ADD.addDownload(): -----------*"); return retVal; } /** Removes the input RFD from the list. Use this if the DL failed and * you want to back it out.... */ public synchronized void removeDownload(RemoteFileDesc toRemove) { // used by the approx. matcher... dlList.remove(toRemove); // used by the hashset comparator.... // technically, this is bad. i'm doing it because in practice this will // decrease the amount of downloads, which isn't horrible. also, i // don't see a download being removed very frequently. if i want i can // move to a new set which keeps a count for each element of the set and // only discards after the appropriate amt. of removes.... StringTokenizer st = new StringTokenizer(ripExtension(toRemove.getFileName()), FileManager.DELIMITERS); while (st.hasMoreTokens()) wordSet.remove(st.nextToken().toLowerCase()); } /** Call this when the DL was 'successful'. */ public synchronized void commitDownload(RemoteFileDesc toCommit) { if (dlList.contains(toCommit)) committedDLs++; } /** @return true when the AutoDownload process is complete. */ public synchronized boolean expired() { boolean retVal = false; if (committedDLs >= MAX_DOWNLOADS) retVal = true; return retVal; } // take the extension off the filename... private String ripExtension(String fileName) { String retString = null; int extStart = fileName.lastIndexOf('.'); if (extStart == -1) retString = fileName; else retString = fileName.substring(0, extStart); return retString; } private static final boolean debugOn = false; private static void debug(String out) { if (debugOn) System.out.println(out); } private static void debug(Exception e) { if (debugOn) e.printStackTrace(); } }