package edu.isi.karma.research.modeling; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.isi.karma.modeling.alignment.ModelEvaluation; import edu.isi.karma.modeling.alignment.SemanticModel; import edu.isi.karma.modeling.alignment.learner.ModelReader; import edu.isi.karma.modeling.research.Params; public class ModelOverlap { private static Logger logger = LoggerFactory.getLogger(ModelOverlap.class); private static double roundTwoDecimals(double d) { DecimalFormat twoDForm = new DecimalFormat("#.##"); return Double.valueOf(twoDForm.format(d)); } public static double getMaxOverlap(List<SemanticModel> trainingModels, SemanticModel testModel) { if (trainingModels == null || testModel == null) return 0.0; double maxOverlap = 0.0; ModelEvaluation me; for (SemanticModel trainingModel : trainingModels) { if (trainingModel == null) continue; me = trainingModel.evaluate(testModel, false, true); if (me.getJaccard() > maxOverlap) maxOverlap = me.getJaccard(); } return maxOverlap; } public static double getAvgOverlap(List<SemanticModel> trainingModels, SemanticModel testModel) { if (trainingModels == null || testModel == null) return 0.0; double sum = 0.0; int count = 0; ModelEvaluation me; for (SemanticModel trainingModel : trainingModels) { if (trainingModel == null) continue; me = trainingModel.evaluate(testModel, false, true); sum += me.getJaccard(); count ++; } return roundTwoDecimals(count == 0 ? 0.0 : sum/(double)count); } public static void main(String[] args) throws Exception { List<SemanticModel> semanticModels = ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); double sum = 0.0; double min = 0.0; double max = 0.0; double avg = 0.0; double median = 0.0; List<Double> jaccard = new ArrayList<Double>(); ModelEvaluation me; for (int i = 0; i < semanticModels.size() - 1; i++) { SemanticModel s1 = semanticModels.get(i); for (int j = i + 1; j < semanticModels.size(); j++) { SemanticModel s2 = semanticModels.get(j); me = s1.evaluate(s2, false, true); jaccard.add(me.getJaccard()); System.out.println("jaccard similarity of (" + i + "," + j + "): " + me.getJaccard()); } } int n = jaccard.size(); if (n == 0) return; for (Double d : jaccard) { sum += d; } Collections.sort(jaccard); min = jaccard.get(0); max = jaccard.get(jaccard.size() - 1); avg = roundTwoDecimals(sum/(double)n); if (jaccard.size() % 2 == 0) median = (jaccard.get(n/2) + jaccard.get(n/2 - 1))/2; else median = jaccard.get(n/2); System.out.println("dataset: " + Params.DATASET_NAME); System.out.println("min overlap: " + min); System.out.println("max overlap: " + max); System.out.println("average overlap: " + avg); System.out.println("median overlap: " + median); logger.info("done."); } // public static void main(String[] args) throws Exception { // // boolean useMaxOverlap = false; // boolean useAvgOverlap = !useMaxOverlap; // // List<SemanticModel> semanticModels = // ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); // // double[] sumOverlap = new double[semanticModels.size()]; // for (int i = 0; i < sumOverlap.length; i++) sumOverlap[i] = 0.0; // // List<SemanticModel> trainingData = new ArrayList<SemanticModel>(); // for (int i = 0; i < semanticModels.size(); i++) { // // int newSourceIndex = i; // int numberOfKnownModels = 0; // // while (numberOfKnownModels < semanticModels.size()) // { // // trainingData.clear(); // // int j = 0, count = 0; // while (count < numberOfKnownModels) { // if (j != newSourceIndex) { // trainingData.add(semanticModels.get(j)); // count++; // } // j++; // } // // double overlap = 0.0; // if (useMaxOverlap) // overlap = ModelOverlap.getMaxOverlap(trainingData, semanticModels.get(newSourceIndex)); // else if (useAvgOverlap) // overlap = ModelOverlap.getAvgOverlap(trainingData, semanticModels.get(newSourceIndex)); // //// logger.info(overlap); //// System.out.println(overlap); // // sumOverlap[numberOfKnownModels] += overlap; // numberOfKnownModels ++; // } // } // // double avgOverlap; // for (int i = 0; i < semanticModels.size(); i++) { // avgOverlap = semanticModels.size() == 0 ? 0.0 : roundTwoDecimals(sumOverlap[i] / (double)semanticModels.size()); // System.out.println("avg overlap , num of known models " + i + ": " + avgOverlap); // } // } }