package ch.akuhn.hapax.index; import ch.akuhn.hapax.linalg.Vector; import ch.akuhn.hapax.linalg.Vector.Entry; public enum GlobalWeighting { ENTROPY1 { @Override public double weight(Vector term) { return 1 - (ENTROPY2.weight(term) / Math.log(term.size())); } }, ENTROPY2 { @Override public double weight(Vector term) { double gf = globalFrequency(term); double prop = 0; for (Entry each: term.entries()) { if (each.value == 0) continue; prop += (each.value / gf) * Math.log(each.value * gf); } return -prop; } }, GFIDF { @Override public double weight(Vector term) { return globalFrequency(term) / documentFrequency(term); } }, IDF { @Override public double weight(Vector term) { return Math.log((term.size() / documentFrequency(term))); } }, NULL; public int documentFrequency(Vector term) { return term.used(); } public int globalFrequency(Vector term) { return (int) term.sum(); } public double weight(Vector term) { return 1.0d; } }