package edu.isi.karma.semantictypes.evaluation;
/**
*
*
*
* @author aditi and pranav
* @Date 6th June 2015
* @AIM:Evaluation code for models published in karma using Mean Reciprocal Rank Metric and Accuracy
*/
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
public class EvaluateMRR {
// private final static String INPUT_DIR_NAME="/home/v/karma-dev-home/models-json/"; //path to models-json in karma home
// private final static String OUTPUT_DIR_NAME="evaluation-results";
public static MRRItem calculateMRRValue(String inputFile, Integer numberOfCandidates){
JSONParser parser = new JSONParser();
int noOfAttributes=0; // No of columns in data source
double sumRR=0; // Sum of Reciprocal ranks of all columns in data source
int correctHits=0;
try {
//String tokens[]=inputFile.split("/");
//String fileName= tokens[tokens.length -1];
//FileWriter file = new FileWriter(outputDir+"//"+fileName.split(".json",2)[0]+".MRR"+".json");
JSONObject jsonObject = (JSONObject) parser.parse(new FileReader(inputFile));
JSONObject graph = (JSONObject) jsonObject.get("graph");
JSONArray jsonArray = (JSONArray) graph.get("nodes");
for (Object o: jsonArray){
JSONObject obj= (JSONObject) o;
// Reading column attribute nodes from the JSON Object
if(obj.get("type").equals("ColumnNode")){
noOfAttributes++;
// Reading correct Semantic labels and storing in an array list
JSONArray userArray = (JSONArray) obj.get("userSemanticTypes");
List<String> correctTypes= new ArrayList<>();
for(Object o1: userArray){
JSONObject userObj= (JSONObject) o1;
correctTypes.add(userObj.get("domain").toString() + userObj.get("type").toString());
}
// Reading learned Semantic labels and storing in an array list
JSONArray learnedArray = (JSONArray) obj.get("learnedSemanticTypes");
List<String> learnedTypes= new ArrayList<>();
if (learnedArray != null)
for(Object o2: learnedArray){
JSONObject learnedObj= (JSONObject) o2;
learnedTypes.add(learnedObj.get("domain").toString() + learnedObj.get("type").toString());
}
int rank=1; // rank of correct semantic type in the learned semantic labels ordered list
boolean attributeFound=false;
int count = 0;
// Calculating reciprocal rank for each column attribute
for(String correctLabel :correctTypes ){
attributeFound=false;
for(String suggestedLabel: learnedTypes){
if (numberOfCandidates != null && count == numberOfCandidates.intValue())
break;
count ++;
if(correctLabel.equals(suggestedLabel)){
attributeFound=true;
sumRR+=1/(double)rank;
if(rank==1){
correctHits++;
}
break;
}
rank++;
}
if(attributeFound==false){
sumRR+=0;
}
}
}
}
// Calculating MRR
double accuracy = (double)correctHits/noOfAttributes;
double mrr = (double)sumRR/noOfAttributes;
MRRItem mrrItem = new MRRItem(accuracy, mrr);
return mrrItem;
} catch (Exception e1) {
e1.printStackTrace();
return null;
}
}
@SuppressWarnings("unchecked")
public static JSONObject calculateMRR(String inputFile){
JSONObject printObj=new JSONObject();
JSONParser parser = new JSONParser();
int noOfAttributes=0; // No of columns in data source
double sumRR=0; // Sum of Reciprocal ranks of all columns in data source
int correctHits=0;
try {
//String tokens[]=inputFile.split("/");
//String fileName= tokens[tokens.length -1];
//FileWriter file = new FileWriter(outputDir+"//"+fileName.split(".json",2)[0]+".MRR"+".json");
JSONObject jsonObject = (JSONObject) parser.parse(new FileReader(inputFile));
JSONObject graph = (JSONObject) jsonObject.get("graph");
JSONArray jsonArray = (JSONArray) graph.get("nodes");
JSONArray columns = new JSONArray();
for (Object o: jsonArray){
JSONObject obj= (JSONObject) o;
JSONObject columnNode= new JSONObject();
JSONObject columnNameNode= new JSONObject();
// Reading column attribute nodes from the JSON Object
if(obj.get("type").equals("ColumnNode")){
noOfAttributes++;
// Reading correct Semantic labels and storing in an array list
JSONArray userArray = (JSONArray) obj.get("userSemanticTypes");
List<String> correctTypes= new ArrayList<>();
for(Object o1: userArray){
JSONObject userObj= (JSONObject) o1;
correctTypes.add(userObj.get("domain").toString() + userObj.get("type").toString());
}
// Reading learned Semantic labels and storing in an array list
JSONArray learnedArray = (JSONArray) obj.get("learnedSemanticTypes");
List<String> learnedTypes= new ArrayList<>();
for(Object o2: learnedArray){
JSONObject learnedObj= (JSONObject) o2;
learnedTypes.add(learnedObj.get("domain").toString() + learnedObj.get("type").toString());
}
int rank=1; // rank of correct semantic type in the learned semantic labels ordered list
boolean attributeFound=false;
// Calculating reciprocal rank for each column attribute
for(String correctLabel :correctTypes ){
attributeFound=false;
for(String suggestedLabel: learnedTypes){
if(correctLabel.equals(suggestedLabel)){
columnNameNode.put(EvaluatedJSONLabels.FOUND_NAME,"true");
columnNameNode.put(EvaluatedJSONLabels.RECIPROCAL_RANK_NAME,new Double((double)1/rank));
columnNode.put(obj.get("columnName"),columnNameNode);
columns.add(columnNode);
attributeFound=true;
sumRR+=1/(double)rank;
if(rank==1){
correctHits++;
}
break;
}
rank++;
}
if(attributeFound==false){
columnNameNode.put(EvaluatedJSONLabels.FOUND_NAME,"false");
columnNameNode.put(EvaluatedJSONLabels.RECIPROCAL_RANK_NAME,new Double(0));
columnNode.put(obj.get("columnName"),columnNameNode);
columns.add(columnNode);
sumRR+=0;
}
}
}
}
// Calculating MRR
printObj.put(EvaluatedJSONLabels.COLUMNS_NAME, columns);
printObj.put(EvaluatedJSONLabels.MRR_NAME, (double)sumRR/noOfAttributes);
printObj.put(EvaluatedJSONLabels.ACCURACY_NAME, (double)correctHits/noOfAttributes);
} catch (Exception e1) {
e1.printStackTrace();
}
return printObj;
}
public static void printEvaluatedJSON(String inputFileName,String outputFileName){
JSONObject obtainedObject = calculateMRR(inputFileName);
Gson gson = new GsonBuilder().setPrettyPrinting().create();
String json = gson.toJson(obtainedObject);
try {
PrintWriter fw = new PrintWriter(outputFileName);
fw.write(json);
fw.flush();
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void printAllEvaluatedJSON(File inputDir,File outputDir){
String[] inputFileNameList = inputDir.list();
String extension = ".mrr.json";
for(String inputFileName : inputFileNameList){
printEvaluatedJSON(inputDir.getAbsolutePath() + inputFileName, outputDir.getAbsolutePath() + inputFileName + extension);
}
}
public static void main(String[] args) {
/*
* For evaluating the json file after model is published
* Call the function calculateMRR after the 'PUBLISH MODEL' command
* passing the inputFile path and Output Directory path as parameters to the function
Example:
'evaluation-results' must be an existing folder in the workspace
*/
/*
* Call for Single File
*
*
String inputFileName = "cbev2.WebArtistBio.csv.model.json";
String file = EvaluateMRR.INPUT_DIR_NAME + inputFileName;
printEvaluatedJSON(file, EvaluateMRR.OUTPUT_DIR_NAME);
*/
/*
* Call for Entire Directory
*/
// printAllEvaluatedJSON(EvaluateMRR.INPUT_DIR_NAME, EvaluateMRR.OUTPUT_DIR_NAME);
}
}