package com.limegroup.gnutella.spam;
import java.util.Locale;
/**
* A token representing a template that may have been used to create a filename.
*/
public class TemplateToken extends KeywordToken {
/**
* Unlike keywords or file extensions, templates should be quite unlikely to
* occur in both spam and non-spam responses, so we can give them a fairly
* high weight.
*/
private static final float TEMPLATE_WEIGHT = 0.9f;
/**
* Short templates should be given less weight because they're more likely
* to occur in both spam and non-spam responses.
*/
private static final int SHORT_TEMPLATE_LENGTH = 8;
/**
* The string that is used to replace the query when creating a template
* from a filename.
*/
private static final String REPLACEMENT_STRING = "****";
private final float weight;
/**
* Private constructor - create tokens by calling create().
*/
private TemplateToken(String template) {
super(template);
int length = template.length() - REPLACEMENT_STRING.length();
if(length >= SHORT_TEMPLATE_LENGTH)
weight = TEMPLATE_WEIGHT;
else
weight = TEMPLATE_WEIGHT * length / SHORT_TEMPLATE_LENGTH;
}
/**
* If the filename contains the query (ignoring case), returns a new token
* representing the filename with the query replaced with a fixed string.
* Digits and whitespace are stripped from the start of the template.
* Otherwise returns null.
*/
public static TemplateToken create(String query, String filename) {
query = query.trim().toLowerCase(Locale.US);
filename = filename.trim().toLowerCase(Locale.US);
if(filename.contains(query) && !filename.equals(query)) {
String template = filename.replace(query, REPLACEMENT_STRING);
template = template.replaceFirst("^[0-9\\s]*", "");
return new TemplateToken(template);
}
return null;
}
@Override
protected float getWeight() {
return weight;
}
@Override public boolean equals(Object o) {
if(!(o instanceof TemplateToken))
return false;
return keyword.equals(((TemplateToken)o).keyword);
}
@Override
public String toString() {
return "template " + keyword;
}
}