package hu.u_szeged.kpe.features;
import hu.u_szeged.kpe.candidates.NGram;
import hu.u_szeged.kpe.candidates.NGramStats;
import hu.u_szeged.kpe.readers.DocumentData;
import hu.u_szeged.utils.NLPUtils;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import edu.stanford.nlp.util.CoreMap;
/**
* Computes the standard deviation based on the positions of the candidate phrase within the document.
*/
public class STDevFeature extends Feature {
private static final long serialVersionUID = 8928290175581042743L;
public STDevFeature() {
scale = Scale.NUMERIC;
collectionToStoreDocVals = LinkedList.class;
}
public void value(String phrase, int[] length, Entry<NGram, NGramStats> ngramForm, boolean train, int docToCheck,
List<Map<String, Map<NGram, NGramStats>>> listOfHashs, List<CoreMap> sentences, DocumentData... docs) {
for (Integer position : ngramForm.getValue().getPositions())
updateFeatureVals(position, docToCheck);
}
protected double aggregation(List<Collection<Number>> docVals, String phrase, boolean train, List<int[]> length) {
double[] perDocFeatureVals = new double[docVals.size()];
for (int doc = 0; doc < docVals.size(); ++doc) {
LinkedList<Number> positions = (LinkedList<Number>) docVals.get(doc);
double[] relativePositions = new double[positions.size()];
int i = 0;
for (Number position : positions) {
relativePositions[i++] = position.doubleValue() / length.get(doc)[0];
}
double mean = NLPUtils.mean(relativePositions), summ = 0.0;
for (int j = 0; j < relativePositions.length; j++)
summ += (relativePositions[j] - mean) * (relativePositions[j] - mean);
perDocFeatureVals[doc] = Math.sqrt(summ / relativePositions.length);
}
return NLPUtils.mean(perDocFeatureVals);
}
}