package hu.u_szeged.kpe.features;
import hu.u_szeged.kpe.candidates.NGram;
import hu.u_szeged.kpe.candidates.NGramStats;
import hu.u_szeged.kpe.readers.DocumentData;
import hu.u_szeged.utils.NLPUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import edu.stanford.nlp.util.CoreMap;
/**
* The class determines the ratio of the number of reference containing sentences and the total number of sentences both containing a particular
* orthographic form of an NGram references.
*/
public class HasReferenceFeature extends Feature {
private static final long serialVersionUID = -2156406089444467312L;
private List<Integer> sentencesNum;
public HasReferenceFeature() {
scale = Scale.NUMERIC;
}
public void value(String phrase, int[] length, Entry<NGram, NGramStats> ngramForm, boolean train, int docToCheck,
List<Map<String, Map<NGram, NGramStats>>> listOfHashs, List<CoreMap> sentences, DocumentData... docs) {
if (sentencesNum == null) {
sentencesNum = new ArrayList<Integer>(docs.length);
for (int i = 0; i < docs.length; ++i)
sentencesNum.add(0);
}
sentencesNum.set(docToCheck, sentencesNum.get(docToCheck) + ngramForm.getValue().getSentencePositions().size());
updateFeatureVals(ngramForm.getValue().numWithReferences(), docToCheck);
}
protected double aggregation(List<Collection<Number>> docVals, String phrase, boolean train, List<int[]> length) {
double[] perDocFeatureVals = new double[docVals.size()];
for (int doc = 0; doc < docVals.size(); ++doc) {
List<Number> docNums = (ArrayList<Number>) docVals.get(doc);
if (sentencesNum.get(doc) > 0)
perDocFeatureVals[doc] = docNums.get(docNums.size() - 1).doubleValue() / sentencesNum.get(doc);
}
sentencesNum = null;
return NLPUtils.mean(perDocFeatureVals);
}
}