package com.limegroup.gnutella.spam;
import java.util.Locale;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.settings.SearchSettings;
public class SpamManager {
private static final Log LOG = LogFactory.getLog(SpamManager.class);
/**
* If an RFDs spam rating is > MAX_THRESHOLD we will not remember the rating
* for the Tokens of the RFD because it e.g. a spammer very frequently
* sending a bad UrnToken with varying keywords, sizes and addresses may be
* able to pollute the filter data
*/
public static final float MAX_THRESHOLD = 0.995f;
/**
* inverse rating (1 - probability) for an RFD without SHA1 urn.
*/
private static final float NO_SHA1_URN_RATING = 0.5f;
/**
* incomplete file... save the user the trouble of downloading it, if he has
* his spam filter enabled
*/
private static final float INCOMPLETE_FILE_RATING = 0.2f;
private static final SpamManager INSTANCE = new SpamManager();
public static SpamManager instance() {
return INSTANCE;
}
private SpamManager() {
}
/**
* informs the SpamManager of any query that was started and clears bad
* ratings for the keywords in the query
*
* @param qr
* the QueryRequest for the query.
*/
public void startedQuery(QueryRequest qr) {
if (SearchSettings.ENABLE_SPAM_FILTER.getValue())
RatingTable.instance().mark(qr, Token.RATING_CLEARED);
}
/**
* This method will rate a given rfd and return whether or not the
* SpamManager believes this is spam
*
* @param rfd
* the RemoteFileDesc to rate
* @modifies rfd
* @return true if the SpamManager internally rated it as spam and false if
* the SpamManager did not rate it as spam
*/
public boolean isSpam(RemoteFileDesc rfd) {
if (!SearchSettings.ENABLE_SPAM_FILTER.getValue())
return false;
// rate simple spam...
float rating = 0.f;
if (rfd.getSHA1Urn() == null &&
rfd.getXMLDocument() != null &&
rfd.getXMLDocument().getAction().length() == 0)
rating = 1 - (1 - rating) * NO_SHA1_URN_RATING;
if (isIncompleteFile(rfd.getFileName().toLowerCase(Locale.US))) {
rating = 1 - (1 - rating) * INCOMPLETE_FILE_RATING;
}
// apply bayesian filter
rating = 1 - (1 - rating) * (1 - RatingTable.instance().getRating(rfd));
rfd.setSpamRating(rating);
return rating >= Math.max(SearchSettings.FILTER_SPAM_RESULTS.getValue(),
SearchSettings.QUERY_SPAM_CUTOFF.getValue());
}
/**
* this method is called if the user marked some RFDs as being spam
*
* @param rfds
* an array of RemoteFileDesc that should be marked as good
*/
public void handleUserMarkedSpam(RemoteFileDesc[] rfds) {
for (int i = 0; i < rfds.length; i++)
rfds[i].setSpamRating(1.f);
RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_SPAM);
}
/**
* this method is called if the user marked some RFDs as not being spam
*
* @param rfds
* an array of RemoteFileDesc that should be marked as good
*/
public void handleUserMarkedGood(RemoteFileDesc[] rfds) {
for (int i = 0; i < rfds.length; i++)
rfds[i].setSpamRating(0.f);
RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_GOOD);
}
/**
* clears all collected filter data
*/
public void clearFilterData() {
RatingTable.instance().clear();
}
/**
* look for
* <ul>
* <li>__INCOMPLETE</li>
* <li>___ARESTRA</li>
* <li>___INCOMPLETED</li>
* <li>PREVIEW-T-</li>
* <li>CORRUPT-(number)-</li>
* <li>T-(number)-</li>
*
* @param name
* the name of the file from a search result
* @return true if we think that this is an incomplete file
*/
private boolean isIncompleteFile(String name) {
if (name.startsWith("__incomplete"))
return true;
if (name.startsWith("___incompleted"))
return true;
if (name.startsWith("___arestra"))
return true;
if (name.startsWith("preview-t-"))
return true;
if (name.startsWith("t-")) {
for (int i = 2; i < name.length(); i++) {
if (Character.isDigit(name.charAt(i)))
continue;
else
return name.charAt(i) == '-';
}
}
if (name.startsWith("corrupt-")) {
for (int i = 8; i < name.length(); i++) {
if (Character.isDigit(name.charAt(i)))
continue;
else
return name.charAt(i) == '-';
}
}
return false;
}
}