/*
* Copyright (c) 2016, Peter Anderson <peter.anderson@anu.edu.au>
*
* This file is part of Semantic Propositional Image Caption Evaluation
* (SPICE).
*
* SPICE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* SPICE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public
* License along with SPICE. If not, see <http://www.gnu.org/licenses/>.
*
*/
package edu.anu.spice;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
/**
* Calculates SemanticTuple (inverse) document frequency based on the reference set.
*
*/
public class DocumentFrequency {
protected HashMap<String, Integer> df;
protected int N;
public DocumentFrequency() {
this.df = new HashMap<String, Integer>();
this.N = 0;
}
public void addTuples(Map<String, ArrayList<ArrayList<String>>> tuples){
for (Entry<String, ArrayList<ArrayList<String>>> entry : tuples.entrySet()) {
for (ArrayList<String> t : entry.getValue()){
String tString = t.toString();
if (this.df.containsKey(tString)){
this.df.put(tString, this.df.get(tString)+1);
} else {
this.df.put(tString, 1);
}
this.N += 1;
}
}
}
public int df(SemanticTuple tuple) {
ArrayList<ArrayList<String>> enumeratedTuples = tuple.enumerateTuples();
int df = 0;
// Examine all the enumerated tuples of a SemanticConcept
for (ArrayList<String> t: enumeratedTuples){
String tString = t.toString();
if (this.df.containsKey(tString)){
df += this.df.get(tString);
}
}
return df;
}
public double idf(SemanticTuple tuple) {
return Math.log( (double)N / Math.max(this.df(tuple),1) );
}
}