package com.limegroup.gnutella.spam;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.settings.SearchSettings;
import com.limegroup.gnutella.util.CommonUtils;
import com.limegroup.gnutella.util.IOUtils;
public class RatingTable {
private static final Log LOG = LogFactory.getLog(Tokenizer.class);
/**
* don't hold more than this many entries * 2 and don't save more than this
* many entries...
*/
private static final int MAX_SIZE = 50000;
private static final RatingTable INSTANCE = new RatingTable();
/**
* @return single instance of this
*/
public static RatingTable instance() {
return INSTANCE;
}
/**
* a Map containing all tokens.
*
* Although we stored the data as Token -> Token,
* this is by design (and purposely not a Set), so that
* we can retrieve the stored value from the Set, using
* a Token as an identifier. This way a third-party can
* create a blank Token object without rating data and ask
* the RatingTable to return the actual Token that should
* be used inplace of that Token (one that has rating data).
*/
private final Map _tokenMap;
/**
* constructor, tries to deserialize filter data from disc, which will fail
* silently, if it fails
*/
private RatingTable() {
// deserialize
_tokenMap = readData();
if (LOG.isDebugEnabled())
LOG.debug("size of tokenSet " + _tokenMap.size());
}
/**
* clears the filter data
*/
synchronized void clear() {
_tokenMap.clear();
}
/**
* Returns the rating for a RemoteFileDesc
*
* @param desc
* the RemoteFileDesc to rate
* @return the rating for the RemoteFileDesc
*/
float getRating(RemoteFileDesc desc) {
float ret = getRating(lookup(Tokenizer.getTokens(desc)));
if (LOG.isDebugEnabled())
LOG.debug(desc.toString() + " rated " + ret);
return ret;
}
/**
* Returns the cumulative rating for a RemoteFileDesc
*
* @param tokens
* an array of Token
* @return the cumulative rating
*/
float getRating(Token[] tokens) {
float rating = 1;
for (int i = 0; i < tokens.length && rating > 0; i++) {
rating *= (1 - tokens[i].getRating());
}
rating = 1 - rating;
float bad = SearchSettings.FILTER_SPAM_RESULTS.getValue();
if (rating >= bad && rating <= SpamManager.MAX_THRESHOLD)
markInternal(tokens, Token.RATING_SPAM);
else if (rating <= 1f - bad)
markInternal(tokens, Token.RATING_GOOD);
return rating;
}
/**
* mark an array of RemoteFileDesc
*
* @param descs
* an array of RemoteFileDesc
* @param rating
* must be a rating as defined by the Token interface
*/
void mark(RemoteFileDesc[] descs, int rating) {
markInternal(lookup(Tokenizer.getTokens(descs)), rating);
}
/**
* mark a the Tokens of a RemoteFileDesc
*
* @param desc
* the RemoteFileDesc to mark
* @param rating
* must be a rating as defined by the Token interface
*/
void mark(RemoteFileDesc desc, int rating) {
markInternal(lookup(Tokenizer.getTokens(desc)), rating);
}
/**
* mark a single QueryRequest, or rather the Tokens associated with it
*
* @param qr
* the QueryRequest to mark
* @param rating
* must be a rating as defined by the Token interface
*/
void mark(QueryRequest qr, int rating) {
markInternal(lookup(Tokenizer.getTokens(qr)), rating);
}
/**
* mark an array of Token
*
* @param tokens
* the Tokens to mark
* @param rating
* must be a rating as defined by the Token interface
*/
private void markInternal(Token[] tokens, int rating) {
for (int i = 0; i < tokens.length; i++)
tokens[i].rate(rating);
}
/**
* Replaces all tokens with equal tokens from the _tokenMap
*
* @param tokens
* an array of Token
* @return an array of Token of equal length where all Tokens that are equal
* to Tokens we have already seen before are replaced with the
* matching Tokens we remember
*/
private Token[] lookup(Token[] tokens) {
for (int i = 0; i < tokens.length; i++) {
// lookup stored token
tokens[i] = lookup(tokens[i]);
}
return tokens;
}
/**
* Replaces a Token with the copy stored in our internal _tokenMap if
* possible, stores the Token in the _tokenMap otherwise
*
* @param token
* the Token to look up in _tokenMap
* @return token or the matching copy of it from _tokenMap
*/
private synchronized Token lookup(Token token) {
Token stored = (Token)_tokenMap.get(token);
if(stored == null) {
_tokenMap.put(token, token);
checkSize();
stored = token;
}
return stored;
}
/**
* read data from disk
*
* @return Map of <tt>Token</tt> to <tt>Token</tt> as read from disk
*/
private Map readData() {
ObjectInputStream is = null;
try {
is = new ObjectInputStream(
new BufferedInputStream(
new FileInputStream(getSpamDat())));
Object read = is.readObject();
if(read instanceof Map)
return (Map)read;
else
return new HashMap();
} catch(Exception someKindOfError) {
return new HashMap();
} finally {
IOUtils.close(is);
}
}
/**
* Save data from this table to disk.
*/
public void save() {
Map copy;
synchronized(this) {
if (_tokenMap.size() > MAX_SIZE)
pruneEntries();
copy = new HashMap(_tokenMap);
}
if (LOG.isDebugEnabled())
LOG.debug("size of tokenMap " + copy.size());
ObjectOutputStream oos = null;
try {
oos = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(getSpamDat())));
oos.writeObject(copy);
oos.flush();
} catch (IOException iox) {
if (LOG.isDebugEnabled())
LOG.debug("saving rating table failed", iox);
} finally {
IOUtils.close(oos);
}
}
/**
* Marks that the table will be serialized to disc and not accessed for a long time (i.e. LimeWire is about to get
* shut down)
*/
public synchronized void ageAndSave() {
for (Iterator iter = _tokenMap.values().iterator(); iter.hasNext();)
((Token) iter.next()).incrementAge();
save();
}
/**
* check size of _tokenMap and clears old entries if necessary
*/
private synchronized void checkSize() {
if (_tokenMap.size() < MAX_SIZE * 2)
return;
pruneEntries();
}
/**
* removes lowest importance elements from _tokenSet until there
* are at most MAX_SIZE entries.
*
* LOCKING: MUST hold monitor (synchronize) of "this" when calling
* this method.
*/
private void pruneEntries() {
if (LOG.isDebugEnabled())
LOG.debug("pruning unimportant entries from RatingTable");
int tokensToRemove = _tokenMap.size() - MAX_SIZE;
if (tokensToRemove <= 0) {
return;
}
// Make a set of sorted tokens, low importance first
Set sortedTokens = new TreeSet(_tokenMap.values());
Iterator it = sortedTokens.iterator();
while (tokensToRemove > 0) {
// Note: Although we are iterating over the sorted values or the map,
// and then removing from it using those items (as opposed to the keys),
// this works fine because the Map stores the same element in key/value.
_tokenMap.remove(it.next());
--tokensToRemove;
}
}
private static File getSpamDat() {
return new File(CommonUtils.getUserSettingsDir(),"spam.dat");
}
}