package qa.qcri.aidr.predict.classification;
import java.util.LinkedList;
import qa.qcri.aidr.predict.data.Document;
import qa.qcri.aidr.predict.featureextraction.WordSet;
public class DocumentHistory {
LinkedList<WordSet> recentWordVectors = new LinkedList<WordSet>();
int bufferSize;
double similarity;
public DocumentHistory(int bufferSize, double similarity) {
this.bufferSize = bufferSize;
this.similarity = similarity;
}
public boolean addItemIfNovel(Document doc) {
WordSet w1 = doc.getFeatures(WordSet.class).get(0);
for (WordSet w2 : recentWordVectors) {
double sim = w2.getSimilarity(w1);
if (sim > similarity) {
// TODO: This threshold needs some tuning,
// probably
return false;
}
}
recentWordVectors.add(w1);
if (recentWordVectors.size() > bufferSize)
recentWordVectors.remove();
return true;
}
}