package com.limegroup.gnutella.spam;
import java.util.Locale;
import org.limewire.core.settings.SearchSettings;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.messages.QueryReply;
import com.limegroup.gnutella.messages.QueryRequest;
/**
* Calculates spam ratings for search results based on their similarity to
* previous results that have been marked, either manually or automatically, as
* spam or not spam.
*/
@Singleton
class SpamManagerImpl implements SpamManager {
/**
* Initial rating for a file that appears from its name to be incomplete.
*/
private static final float INCOMPLETE_FILE_RATING = 0.8f;
private final RatingTable ratingTable;
@Inject
SpamManagerImpl(RatingTable ratingTable) {
this.ratingTable = ratingTable;
}
/**
* Returns the spam manager's rating table. For testing.
*/
@Override
public RatingTable getRatingTable() {
return ratingTable;
}
/**
* Clears bad ratings for the keywords in a query started by the user.
*
* @param qr the QueryRequest started by the user
*/
@Override
public void startedQuery(QueryRequest qr) {
if (SearchSettings.ENABLE_SPAM_FILTER.getValue())
ratingTable.clear(qr);
}
/**
* Calculates, sets and returns the spam rating for a RemoteFileDesc.
*
* @param rfd the RemoteFileDesc to rate
* @return the spam rating of the RemoteFileDesc, between 0 (not spam) and 1
* (spam)
*/
@Override
public float calculateSpamRating(RemoteFileDesc rfd) {
if (!SearchSettings.ENABLE_SPAM_FILTER.getValue())
return 0;
float rating = 0;
// TODO: these results should probably be ignored (possibly using the
// filters package) rather than treated as spam
if (isIncompleteFile(rfd.getFileName().toLowerCase(Locale.US))) {
rating = 1 - (1 - rating) * (1 - INCOMPLETE_FILE_RATING);
}
// Apply the 'Bayesian' filter
rating = 1 - (1 - rating) * (1 - ratingTable.getRating(rfd));
rfd.setSpamRating(rating);
return rating;
}
/**
* Increases the spam ratings of tokens associated with a spam query reply.
*/
@Override
public void handleSpamQueryReply(QueryReply qr) {
if (SearchSettings.ENABLE_SPAM_FILTER.getValue())
ratingTable.rate(qr, 1);
}
/**
* Increases the spam ratings of RFDs marked by the user as being spam.
*
* @param rfds an array of RemoteFileDescs that should be marked as spam
*/
@Override
public void handleUserMarkedSpam(RemoteFileDesc[] rfds) {
for (RemoteFileDesc rfd : rfds)
rfd.setSpamRating(1);
// Update the ratings of the tokens associated with the RFDs
ratingTable.rate(rfds, 1);
}
/**
* Reduces the spam ratings of RFDs marked by the user as being good.
*
* @param rfds an array of RemoteFileDescs that should be marked as good
*/
@Override
public void handleUserMarkedGood(RemoteFileDesc[] rfds) {
for (RemoteFileDesc rfd : rfds)
rfd.setSpamRating(0);
// Update the ratings of the tokens associated with the RFDs
ratingTable.rate(rfds, 0);
}
/**
* Clears all collected filter data.
*/
@Override
public void clearFilterData() {
ratingTable.clear();
}
/**
* Checks whether a filename appears to indicate an incomplete file.
*
* @param name the name of the file (from a search result)
* @return true if we think that this is an incomplete file
*/
private boolean isIncompleteFile(String name) {
if (name.startsWith("incomplete_"))
return true;
if (name.startsWith("incomplete~"))
return true;
if (name.startsWith("inacheve_"))
return true;
if (name.startsWith("in_"))
return true;
if (name.startsWith("__incomplete"))
return true;
if (name.startsWith("___incompleted"))
return true;
if (name.startsWith("___arestra"))
return true;
if (name.startsWith("preview-t-"))
return true;
if (name.startsWith("t-")) {
for (int i = 2; i < name.length(); i++) {
if (Character.isDigit(name.charAt(i)))
continue;
else
return name.charAt(i) == '-';
}
}
if (name.startsWith("corrupt-")) {
for (int i = 8; i < name.length(); i++) {
if (Character.isDigit(name.charAt(i)))
continue;
else
return name.charAt(i) == '-';
}
}
return false;
}
}