package edu.isi.karma.semantictypes.numeric;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest;
import edu.isi.karma.modeling.semantictypes.SemanticTypeLabel;
import edu.isi.karma.modeling.semantictypes.SemanticTypeLabelComparator;
public class KSTest {
public List<SemanticTypeLabel> predictLabelsForColumn(int numPredictions, Map<String, List<Double>> trainingLabelToExamplesMap,
List<Double> testExamples) {
List<SemanticTypeLabel> sortedPredictions = new ArrayList<>(); // descending order of p-Value
KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
double pValue;
double[] sample1 = new double[testExamples.size()];
for(int i = 0; i < testExamples.size(); i++){
sample1[i] = testExamples.get(i);
}
for (Entry<String, List<Double>> entry : trainingLabelToExamplesMap.entrySet()) {
String label = entry.getKey();
List<Double> trainExamples = entry.getValue();
double[] sample2 = new double[trainExamples.size()];
for(int i = 0; i < trainExamples.size(); i++){
sample2[i] = trainExamples.get(i);
}
if (sample1.length > 1 && sample2.length > 1) {
pValue = test.kolmogorovSmirnovTest(sample1, sample2);
SemanticTypeLabel pred = new SemanticTypeLabel(label, (float)pValue);
sortedPredictions.add(pred);
}
}
// sorting based on p-Value
Collections.sort(sortedPredictions, new SemanticTypeLabelComparator());
return sortedPredictions;
}
}