package ivory.ffg.stats; import tl.lin.data.map.HMapIF; /** * Global statistics used in computing features (e.g., idf and cf) * * @author Nima Asadi */ public class GlobalStats { private static HMapIF idf; private static HMapIF cf; private static int documentCount; private static long collectionLength; private static float avgDocumentLength; private static float defaultDf; private static float defaultCf; private static float defaultIdf; /** * Creates an instance of this class with the provided information * * @param idf Map of term ids to IDF values * @param cf Map of term ids to Collection Frequency (CF) values * @param documentCount Number of documents in the collection * @param collectionLength Length of the collection (i.e., number of terms) * @param avgDocumentLength Average length of documents (i.e., average number of term per document * @param defaultDf Default value for document frequency (used for phrase queries) * @param defaultCf Default value for collection frequency (used for phrase queries) */ public GlobalStats(HMapIF idf, HMapIF cf, int documentCount, long collectionLength, float avgDocumentLength, float defaultDf, float defaultCf) { this.idf = idf; this.cf = cf; this.documentCount = documentCount; this.collectionLength = collectionLength; this.avgDocumentLength = avgDocumentLength; this.defaultDf = defaultDf; this.defaultCf = defaultCf; this.defaultIdf = (float) Math.log((documentCount - defaultDf + 0.5f) / (defaultDf + 0.5f)); } /** * @param term Term id * @return idf value of a term */ public float getIdf(int term) { return idf.get(term); } /** * @return idf values */ public HMapIF getIdfs() { return idf; } /** * @param term Term id * @return cf value of a term */ public float getCf(int term) { return cf.get(term); } /** * @return collection frequencies */ public HMapIF getCfs() { return cf; } /** * @return Total number of documents in the collection */ public int getDocumentCount() { return documentCount; } /** * @return Length of the collection (i.e., number of terms) */ public long getCollectionLength() { return collectionLength; } /** * @return Average document length in the collection */ public float getAvgDocumentLength() { return avgDocumentLength; } /** * @return Default document frequency */ public float getDefaultDf() { return defaultDf; } /** * @return Default collection frequency */ public float getDefaultCf() { return defaultCf; } /** * @return Default inverse-document frequency */ public float getDefaultIdf() { return defaultIdf; } }