package com.limegroup.gnutella;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.settings.FilterSettings;
import com.limegroup.gnutella.util.DualIterator;
import com.limegroup.gnutella.util.ForgetfulHashMap;
import com.limegroup.gnutella.util.StringUtils;
import com.limegroup.gnutella.xml.LimeXMLDocument;
/**
* Records information about queries so that responses can be validated later.
* Typical use is to call record(..) on an outgoing query request, and
* score/matchesType/isMandragoreWorm on each incoming response.
*/
public class ResponseVerifier {
private static class RequestData {
/** The original query. */
final String query;
/** The rich query. */
final LimeXMLDocument richQuery;
/** The keywords of the original query, lowercased. */
final List queryWords;
/** The type of the original query. */
final MediaType type;
/** Whether this is a what is new query */
final boolean whatIsNew;
RequestData(String query, MediaType type) {
this(query, null, type, false);
}
RequestData(String query, LimeXMLDocument richQuery, MediaType type, boolean whatIsNew) {
this.query=query;
this.richQuery=richQuery;
this.queryWords=getSearchTerms(query, richQuery);
this.type=type;
this.whatIsNew = whatIsNew;
}
public boolean xmlQuery() {
return richQuery != null;
}
}
/**
* A mapping from GUIDs to the words of the search made with that GUID.
*/
private ForgetfulHashMap /* GUID -> RequestData */ mapper =
new ForgetfulHashMap(15);
/** The characters to use in stripping apart queries. */
private static final String DELIMITERS="+ ";
/** The size of a Mandragore worm response, i.e., 8KB. */
private static final long Mandragore_SIZE=8*1024l;
/** Same as record(qr, null). */
public synchronized void record(QueryRequest qr) {
record(qr, null);
}
/**
* @modifies this
* @effects memorizes the query string for qr; this will be used to score
* responses later. If type!=null, also memorizes that qr was for the given
* media type; otherwise, this is assumed to be for any type.
*/
public synchronized void record(QueryRequest qr, MediaType type){
byte[] guid = qr.getGUID();
mapper.put(new GUID(guid),new RequestData(qr.getQuery(),
qr.getRichQuery(),
type,
qr.isWhatIsNewRequest()));
}
public synchronized boolean matchesQuery(byte [] guid, Response response) {
RequestData data = (RequestData) mapper.get(new GUID(guid));
if (data == null || data.queryWords == null)
return false;
if (data.whatIsNew)
return true;
int minGood = FilterSettings.MIN_MATCHING_WORDS.getValue();
if (score(data.queryWords, response.getName()) > minGood)
return true;
LimeXMLDocument doc = response.getDocument();
if (doc != null) {
for (Iterator iter = new DualIterator(
doc.getKeyWords().iterator(),doc.getKeyWordsIndivisible().iterator());
iter.hasNext();) {
String xmlWord = (String) iter.next();
if (score(data.queryWords,xmlWord) > minGood ) return true;
}
}
return false;
}
/**
* Returns the score of the given response compared to the given query.
*
* @param query the query keyword string sent
* @param richQuery the XML metadata string sent, or null if none
* @param response the response to score, converted to RemoteFileDesc
* @return the percentage of query keywords (0-100) matching
*/
public static int score(String query,
LimeXMLDocument richQuery,
RemoteFileDesc response) {
return score(getSearchTerms(query, richQuery), response.getFileName());
}
/** Actual implementation of scoring; called from both public versions.
* @param queryWords the tokenized query keywords
* @param filename the name of the response*/
private static int score(List queryWords, String filename) {
int numMatchingWords=0;
int numQueryWords=queryWords.size();
if (numQueryWords==0)
return 100; // avoid divide-by-zero errors below
//Count the number of regular expressions from the query that
//match the result's name. Ignore case in comparison.
for (int i=0; i<numQueryWords; i++) {
String pattern = (String) queryWords.get(i);
if (StringUtils.contains(filename, pattern, true)) {
numMatchingWords++;
continue;
}
}
return (int)(100.0f * ((float)numMatchingWords/numQueryWords));
}
/**
* Returns true if response has the same media type as the
* corresponding query request the given GUID. In the rare case
* that guid is not known (because this' buffers overflowed),
* conservatively returns true.
*/
public boolean matchesType(byte[] guid, Response response) {
RequestData request=(RequestData)mapper.get(new GUID(guid));
if (request == null || request.type==null)
return true;
String reply = response.getName();
return request.type.matches(reply);
}
/**
* Returns true if the given response is an instance of the Mandragore
* Worm. This worm responds to the query "x" with a 8KB file named
* "x.exe". In the rare case that the query for guid can't be found
* returns false.
*/
public boolean isMandragoreWorm(byte[] guid, Response response) {
RequestData request=(RequestData)mapper.get(new GUID(guid));
if (request == null)
return false;
return response.getSize()==Mandragore_SIZE
&& response.getName().equals(request.query+".exe");
}
public String toString() {
return mapper.toString();
}
private static List getSearchTerms(String query,
LimeXMLDocument richQuery) {
String[] terms = null;
// combine xml and standard keywords
// ---------------------------------------
HashSet qWords=new HashSet();
terms = StringUtils.split(query.toLowerCase(), DELIMITERS);
// add the standard query words..
for (int i = 0; i < terms.length; i++)
qWords.add(terms[i]);
List xmlWords=null;
if (richQuery != null) {
xmlWords = richQuery.getKeyWords();
final int size = xmlWords.size();
// add a lowercase version of the xml words...
for (int i = 0; i < size; i++) {
String currWord = (String) xmlWords.remove(0);
qWords.add(currWord.toLowerCase());
}
}
return Collections.unmodifiableList(new ArrayList(qWords));
}
}