package uk.ac.shef.dcs.jate.algorithm; import org.apache.log4j.Logger; import uk.ac.shef.dcs.jate.JATEException; import uk.ac.shef.dcs.jate.feature.AbstractFeature; import uk.ac.shef.dcs.jate.feature.FrequencyTermBased; import uk.ac.shef.dcs.jate.model.JATETerm; import java.util.*; /** * Average Total Term Frequency (ATTF). Compute the total frequency of a term in a corpus (ttf), and total document frequency (tdf). * Then divide ttf by tdf */ public class ATTF extends Algorithm{ private static Logger LOG = Logger.getLogger(ATTF.class.getName()); @Override public List<JATETerm> execute(Collection<String> candidates) throws JATEException { LOG.info("Calculating ATTF for "+candidates.size()+" candidate terms."); AbstractFeature feature = features.get(FrequencyTermBased.class.getName()); validateFeature(feature, FrequencyTermBased.class); FrequencyTermBased fFeature = (FrequencyTermBased) feature; List<JATETerm> result = new ArrayList<>(); for(String tString: candidates){ Integer ttf = fFeature.getTTF(tString); Integer docFrequency = fFeature.getTermFrequencyInDocument(tString).size(); double score; if(ttf==0) score=0; else score = (double)ttf/docFrequency; JATETerm term = new JATETerm(tString, score); result.add(term); } Collections.sort(result); LOG.info("Complete calculating ATTF"); return result; } }