package com.limegroup.gnutella; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import com.limegroup.gnutella.messages.QueryRequest; import com.limegroup.gnutella.settings.FilterSettings; import com.limegroup.gnutella.util.DualIterator; import com.limegroup.gnutella.util.ForgetfulHashMap; import com.limegroup.gnutella.util.StringUtils; import com.limegroup.gnutella.xml.LimeXMLDocument; /** * Records information about queries so that responses can be validated later. * Typical use is to call record(..) on an outgoing query request, and * score/matchesType/isMandragoreWorm on each incoming response. */ public class ResponseVerifier { private static class RequestData { /** The original query. */ final String query; /** The rich query. */ final LimeXMLDocument richQuery; /** The keywords of the original query, lowercased. */ final List queryWords; /** The type of the original query. */ final MediaType type; /** Whether this is a what is new query */ final boolean whatIsNew; RequestData(String query, MediaType type) { this(query, null, type, false); } RequestData(String query, LimeXMLDocument richQuery, MediaType type, boolean whatIsNew) { this.query=query; this.richQuery=richQuery; this.queryWords=getSearchTerms(query, richQuery); this.type=type; this.whatIsNew = whatIsNew; } public boolean xmlQuery() { return richQuery != null; } } /** * A mapping from GUIDs to the words of the search made with that GUID. */ private ForgetfulHashMap /* GUID -> RequestData */ mapper = new ForgetfulHashMap(15); /** The characters to use in stripping apart queries. */ private static final String DELIMITERS="+ "; /** The size of a Mandragore worm response, i.e., 8KB. */ private static final long Mandragore_SIZE=8*1024l; /** Same as record(qr, null). */ public synchronized void record(QueryRequest qr) { record(qr, null); } /** * @modifies this * @effects memorizes the query string for qr; this will be used to score * responses later. If type!=null, also memorizes that qr was for the given * media type; otherwise, this is assumed to be for any type. */ public synchronized void record(QueryRequest qr, MediaType type){ byte[] guid = qr.getGUID(); mapper.put(new GUID(guid),new RequestData(qr.getQuery(), qr.getRichQuery(), type, qr.isWhatIsNewRequest())); } public synchronized boolean matchesQuery(byte [] guid, Response response) { RequestData data = (RequestData) mapper.get(new GUID(guid)); if (data == null || data.queryWords == null) return false; if (data.whatIsNew) return true; int minGood = FilterSettings.MIN_MATCHING_WORDS.getValue(); if (score(data.queryWords, response.getName()) > minGood) return true; LimeXMLDocument doc = response.getDocument(); if (doc != null) { for (Iterator iter = new DualIterator( doc.getKeyWords().iterator(),doc.getKeyWordsIndivisible().iterator()); iter.hasNext();) { String xmlWord = (String) iter.next(); if (score(data.queryWords,xmlWord) > minGood ) return true; } } return false; } /** * Returns the score of the given response compared to the given query. * * @param query the query keyword string sent * @param richQuery the XML metadata string sent, or null if none * @param response the response to score, converted to RemoteFileDesc * @return the percentage of query keywords (0-100) matching */ public static int score(String query, LimeXMLDocument richQuery, RemoteFileDesc response) { return score(getSearchTerms(query, richQuery), response.getFileName()); } /** Actual implementation of scoring; called from both public versions. * @param queryWords the tokenized query keywords * @param filename the name of the response*/ private static int score(List queryWords, String filename) { int numMatchingWords=0; int numQueryWords=queryWords.size(); if (numQueryWords==0) return 100; // avoid divide-by-zero errors below //Count the number of regular expressions from the query that //match the result's name. Ignore case in comparison. for (int i=0; i<numQueryWords; i++) { String pattern = (String) queryWords.get(i); if (StringUtils.contains(filename, pattern, true)) { numMatchingWords++; continue; } } return (int)(100.0f * ((float)numMatchingWords/numQueryWords)); } /** * Returns true if response has the same media type as the * corresponding query request the given GUID. In the rare case * that guid is not known (because this' buffers overflowed), * conservatively returns true. */ public boolean matchesType(byte[] guid, Response response) { RequestData request=(RequestData)mapper.get(new GUID(guid)); if (request == null || request.type==null) return true; String reply = response.getName(); return request.type.matches(reply); } /** * Returns true if the given response is an instance of the Mandragore * Worm. This worm responds to the query "x" with a 8KB file named * "x.exe". In the rare case that the query for guid can't be found * returns false. */ public boolean isMandragoreWorm(byte[] guid, Response response) { RequestData request=(RequestData)mapper.get(new GUID(guid)); if (request == null) return false; return response.getSize()==Mandragore_SIZE && response.getName().equals(request.query+".exe"); } public String toString() { return mapper.toString(); } private static List getSearchTerms(String query, LimeXMLDocument richQuery) { String[] terms = null; // combine xml and standard keywords // --------------------------------------- HashSet qWords=new HashSet(); terms = StringUtils.split(query.toLowerCase(), DELIMITERS); // add the standard query words.. for (int i = 0; i < terms.length; i++) qWords.add(terms[i]); List xmlWords=null; if (richQuery != null) { xmlWords = richQuery.getKeyWords(); final int size = xmlWords.size(); // add a lowercase version of the xml words... for (int i = 0; i < size; i++) { String currWord = (String) xmlWords.remove(0); qWords.add(currWord.toLowerCase()); } } return Collections.unmodifiableList(new ArrayList(qWords)); } }