package ivory.pwsim.score; public class Bm25 extends ScoringModel { private float k_1 = 1.2f; private float k_3 = 1000f; private float b = 0.75f; public float computeScore(int q_tf, int doc_tf, int q_len, int doc_len) { // This definition of K is different from standard BM25: it has an // additional d_tf at the end. Empirically, the extra factor increases // effectiveness. float K = k_1 * ((1 - b) + b * (doc_len / mAvgDocLength)) + doc_tf; float rsj = (float) Math.log((mDocCount - mDF + 0.5f) / (mDF + 0.5f)); float val = ((k_1 + 1.0f) * doc_tf / (K + doc_tf)) * ((k_3 + 1.0f) * q_tf) / (k_3 + q_tf); return rsj * val; } public float computeDocumentWeight(int doc_tf, int doc_len) { float K = k_1 * ((1 - b) + b * (doc_len / mAvgDocLength)) + doc_tf; float rsj = (float) Math.log((mDocCount - mDF + 0.5f) / (mDF + 0.5f)); float val = ((k_1 + 1.0f) * doc_tf / (K + doc_tf)); return rsj * val; } public float computeDocumentWeight(float doc_tf, float df, int doc_len) { float K = k_1 * ((1 - b) + b * (doc_len / mAvgDocLength)) + doc_tf; float rsj = (float) Math.log((mDocCount - df + 0.5f) / (df + 0.5f)); float val = ((k_1 + 1.0f) * doc_tf / (K + doc_tf)); return rsj * val; } public float computeQueryWeight(int q_tf, int q_len) { return ((k_3 + 1.0f) * q_tf) / (k_3 + q_tf); } }