package info.ephyra.trec;
import info.ephyra.nlp.NETagger;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.nlp.indices.FunctionWords;
import java.util.HashSet;
/**
* A <code>TRECNugget</code> is a simple data structure for a nugget to be
* covered by the results returned for an 'other' question.
*
* @author Guido Sautter
* @version 2008-02-10
*/
public class TRECNugget {
public final String targetID;
public final String questionID;
public final String nuggetID;
public final String nuggetType;
public final String nugget;
public final int size;
/**
* @param targetID the targetID of the TREC target the OTHER question belongs to
* @param questionID the ID of the OTHER question
* @param nuggetID the ID of the nugget
* @param nuggetType the type of the nugget (okay or vital)
* @param nugget the nugget's text
*/
public TRECNugget(String targetID, String questionID, String nuggetID, String nuggetType, String nugget) {
this.targetID = targetID;
this.questionID = questionID;
this.nuggetID = nuggetID;
this.nuggetType = nuggetType;
this.nugget = nugget;
String[] nTokens = NETagger.tokenize(nugget);
HashSet<String> nSet = new HashSet<String>();
for (String n : nTokens)
if (!FunctionWords.lookup(n) && (n.length() > 1))
nSet.add(SnowballStemmer.stem(n).toLowerCase());
this.size = nSet.size();
}
/** @see java.lang.Object#equals(java.lang.Object)
*/
public boolean equals(Object o) {
if ((o == null) || !(o instanceof TRECNugget)) return false;
TRECNugget nug = ((TRECNugget) o);
return (this.targetID.equals(nug.targetID) && this.nuggetID.equals(nug.nuggetID));
}
}