package it.unito.geosummly; import it.unito.geosummly.io.CSVDataIO; import it.unito.geosummly.io.LogDataIO; import it.unito.geosummly.tools.CoordinatesNormalizationType; import it.unito.geosummly.tools.EvaluationTools; import it.unito.geosummly.tools.ImportTools; import it.unito.geosummly.utils.Pair; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Vector; import org.apache.commons.csv.CSVRecord; public class EvaluationOperator { private EvaluationTools eTools; public EvaluationOperator() { eTools=new EvaluationTools(); } public void executeCorrectness( String inLog, String inDens, String inNorm, String out, int mnum) throws IOException{ //Read input files CSVDataIO dataIO=new CSVDataIO(); List<CSVRecord> list=dataIO.readCSVFile(inNorm); LogDataIO logIO=new LogDataIO(); ArrayList<ArrayList<String>> infos = logIO.readClusteringLog(inLog); //Get features labels, minpts and eps ArrayList<String> labels=infos.get(0); ArrayList<String> minpts=infos.get(1); double eps=Double.parseDouble(infos.get(2).get(0)); //sse of clustering on entire dataset double cl_sse=Double.parseDouble(infos.get(2).get(1)); //Fill in the matrix of aggregate (frequency) values //without consider timestamp and coordinates /*ArrayList<ArrayList<Double>> matrix = eTools.buildAggregatesFromList(list);*/ //Normalize coordinates //Fill in the list of features ArrayList<String> features=eTools.getFeaturesFromListC(list); //Matrix of normalized values with coordinates ArrayList<ArrayList<Double>> matrix=eTools.build(list); /****NEW CORRECTNESS VARIABLE*****/ //Get the areas //ImportTools tools=new ImportTools(); //ArrayList<BoundingBox> data=tools.getFocalPoints(matrix); //ArrayList<Double> bboxArea=tools.getAreasFromFocalPoints(data, matrix.size()); //Create the random matrices and print them to file //ArrayList<ArrayList<Double>> frequencyRandomMatrix; //ArrayList<ArrayList<Double>> densityRandomMatrix; ArrayList<ArrayList<Double>> normalizedRandomMatrix; //get min and max values of features occurrences //ArrayList<Double> minArray=tools.getMinArray(matrix); //ArrayList<Double> maxArray=tools.getMaxArray(matrix); ArrayList<Double> SSEs=new ArrayList<Double>(); ClusteringOperator co=new ClusteringOperator(); //mnum matrices for(int i=0;i<mnum;i++) { /*frequencyRandomMatrix = eTools.buildFrequencyRandomMatrix(matrix.size(), minArray, maxArray); densityRandomMatrix = tools.buildDensityMatrix(CoordinatesNormalizationType.MISSING, frequencyRandomMatrix, bboxArea); normalizedRandomMatrix = tools.buildNormalizedMatrix(CoordinatesNormalizationType.MISSING, densityRandomMatrix);*/ ArrayList<String>feat=eTools.changeFeaturesLabel("f", "", features); /*dataIO.printResultHorizontal(null, densityRandomMatrix, tools.getFeaturesLabel(CoordinatesNormalizationType.MISSING, "density_rnd", feat), out, "/random-density-transformation-matrix-"+i+".csv"); dataIO.printResultHorizontal(null, normalizedRandomMatrix, tools.getFeaturesLabel(CoordinatesNormalizationType.MISSING, "normalized_density_rnd", feat), out, "/random-normalized-transformation-matrix-"+i+".csv");*/ normalizedRandomMatrix = eTools.buildNormalizedUniformly(matrix); /****NEW CORRECTNESS VARIABLE*****/ dataIO.printResultHorizontal(null, normalizedRandomMatrix, eTools.getFeaturesLabel("normalized_density_rnd", feat), out, "/random-normalized-transformation-matrix-"+i+".csv"); /****NEW CORRECTNESS VARIABLE*****/ SSEs.add(co.executeForCorrectness(inDens, normalizedRandomMatrix, labels, minpts, eps)); } //Get the sse ratio //double ratio = eTools.getSSERatio(SSEs, cl_sse); double pvalue = eTools.getPvalue(SSEs, cl_sse); //Write down the log file with SSE values //logIO.writeSSELog(SSEs, cl_sse, pvalue, out); logIO.writeSSELog(SSEs, cl_sse, pvalue, out); logIO.writeSSEforR(SSEs, out); } public void executeValidation( String logFile, String inDens, String out, int fnum) throws IOException { //Read input files CSVDataIO dataIO=new CSVDataIO(); List<CSVRecord> list=dataIO.readCSVFile(inDens); LogDataIO logIO=new LogDataIO(); ArrayList<ArrayList<String>> infos=logIO.readClusteringLog(logFile); //Get feature labels, minpts and eps ArrayList<String> labels=infos.get(0); ArrayList<String> minpts=infos.get(1); double eps=Double.parseDouble(infos.get(2).get(0)); //minpts/2 /*ArrayList<String> minpts = new ArrayList<String>(); for(String s: minptsS) { Double d = Double.parseDouble(s)/2; minpts.add(d.toString()); }*/ // Build feature list (timestamp and coordinates are not considered) ArrayList<String> features = new ArrayList<String>(); for(String s: list.get(0)) features.add(s); features.remove(0); //remove timestamp features.remove(0); //remove lat features.remove(0); //remove lng //Density matrix (convert values from string to double) ArrayList<ArrayList<Double>> matrix = new ArrayList<ArrayList<Double>>(); //i=1 --> no header for(int i=1; i<list.size(); i++) { ArrayList<Double> record = new ArrayList<Double>(); //j=1 --> no timestamp for(int j=1; j<list.get(i).size(); j++) { record.add(Double.parseDouble(list.get(i).get(j))); } matrix.add(record); } //Get labels and minpts DiscoveryOperator dO = new DiscoveryOperator(); ArrayList<ArrayList<String>> deltad = dO.executeForValidation(inDens, 3); labels = new ArrayList<String>(deltad.get(0)); minpts = new ArrayList<String>(deltad.get(1)); //This variable contains all the pairs of sets of the folds ArrayList<Pair<?,?>> pairs = new ArrayList<>(); ImportTools tools = new ImportTools(); ClusteringOperator co=new ClusteringOperator(); int index = 1; int length = 0; //Create the folds for(int i=0; i<fnum; i++) { //Do the holdout ArrayList<ArrayList<ArrayList<Double>>> sets = eTools.doHoldoutDensity(matrix); //This variable will contain the cells of the resulting clustering //for the pair of sets Pair<HashMap<String, Vector<Integer>>, HashMap<String, Vector<Integer>>> pair = new Pair<>(null, null); char name = 'A'; //For each set for(ArrayList<ArrayList<Double>> set: sets) { ArrayList<ArrayList<Double>> normalized = tools.buildNormalizedMatrix(CoordinatesNormalizationType.NORM, set); dataIO.printResultHorizontal(null, set, eTools.getFeaturesLabelNoTimestamp( CoordinatesNormalizationType.NORM, "density", features), out+"/fold_"+index, "/"+name+"-density-transformation-matrix.csv"); dataIO.printResultHorizontal(null, normalized, eTools.getFeaturesLabelNoTimestamp( CoordinatesNormalizationType.NORM, "normalized_density", features), out+"/fold_"+index, "/"+name+"-normalized-transformation-matrix.csv"); //Clustering of the sets HashMap<String, Vector<Integer>> setClustering = co.executeForValidation(inDens, normalized, length, labels, minpts, eps); if(name == 'A') pair.setFirst(setClustering); else pair.setSecond(setClustering); //write down the clustering of the resulting holdout to file logIO.writeHoldoutLog2(setClustering, out, name, index); //switch the set name from 'A' to 'B' name++; length+=normalized.size(); //update last_cellId value } index++; //just for file name //Add the pair to the list pairs.add(pair); } //Compute jaccard and write the result to file StringBuilder builder=eTools.computeJaccard2(pairs); logIO.writeJaccardLog(builder, out); } /*public void executeValidation2(String logFile, String inSingles, String out, int fnum) throws IOException { //Read input files CSVDataIO dataIO=new CSVDataIO(); List<CSVRecord> list=dataIO.readCSVFile(inSingles); LogDataIO logIO=new LogDataIO(); ArrayList<ArrayList<String>> infos=logIO.readClusteringLog(logFile); //Get feature labels, minpts and eps ArrayList<String> labels=infos.get(0); ArrayList<String> minpts=infos.get(1); double eps=Double.parseDouble(infos.get(2).get(0)); Double north = new Double(infos.get(3).get(0)); //north of bbox Double east = new Double(infos.get(3).get(1)); Double south = new Double(infos.get(3).get(2)); Double west = new Double(infos.get(3).get(3)); //number of cell of a side of the bbox int gnum = (int) Math.sqrt(Integer.parseInt(infos.get(3).get(4))); //Get the grid BoundingBox bbox = new BoundingBox(north, east, south, west); ArrayList<BoundingBox> data = new ArrayList<BoundingBox>(); Grid grid=new Grid(); grid.setCellsNumber(gnum); grid.setBbox(bbox); grid.setStructure(data); grid.createCells(); //Fill in the matrix of single venues //All the columns will be considered. ArrayList<ArrayList<String>> matrix = eTools.buildSinglesFromList(list); //Get the header ArrayList<String> header = eTools.getHeaderFromList(list); //Fill in the list of features for transformation //Only the categories will be considered ArrayList<String> features = eTools.getFeaturesFromListV(list); //Group the venues and get the value of each cell ArrayList<Double> bboxArea = eTools.getAreas(data); //This variable will contain all the pairs of sets of the folds ArrayList<Pair<?,?>> pairs = new ArrayList<>(); ImportTools tools = new ImportTools(); ClusteringOperator co=new ClusteringOperator(); int index = 1; int length = 0; for(int i=0; i<fnum; i++) { //Do the holdout ArrayList<ArrayList<String>> temp = new ArrayList<ArrayList<String>>(matrix); ArrayList<ArrayList<ArrayList<String>>> holdoutPrint = eTools.doHoldOut(temp, 2); //Write to file the singles dataIO.printSinglesForValidation(holdoutPrint.get(0), header, out+"/fold_"+index, "/A-singles-matrix.csv"); dataIO.printSinglesForValidation(holdoutPrint.get(1), header, out+"/fold_"+index, "/B-singles-matrix.csv"); //I don't need timestamp, been_here, venue_id, venue_lat, venue_lng anymore //Cast also strings to double values ArrayList<ArrayList<ArrayList<Double>>> holdout = eTools.removeVenueInformations(holdoutPrint); //Group the sets to cell ArrayList<ArrayList<ArrayList<Double>>> initialSets = eTools.groupFolds(data, holdout); //Check if all the cells are included in each set //ArrayList<ArrayList<ArrayList<Double>>> sets = eTools.checkCells(data, initialSets); ArrayList<ArrayList<ArrayList<Double>>> sets = new ArrayList<ArrayList<ArrayList<Double>>>(initialSets); //This variable will contain the cells of the resulting clustering //for the pair of sets Pair<HashMap<String, Vector<Integer>>, HashMap<String, Vector<Integer>>> pair = new Pair<>(null, null); char name = 'A'; //For each set for(ArrayList<ArrayList<Double>> set: sets) { //create the density and normalized matrix ArrayList<ArrayList<Double>> density = tools.buildDensityMatrix(CoordinatesNormalizationType.NORM, set, bboxArea); ArrayList<ArrayList<Double>> normalized = tools.buildNormalizedMatrix(CoordinatesNormalizationType.NORM, density); //write down the matrices to file dataIO.printResultHorizontal(null, set, eTools.getFeaturesLabelNoTimestamp( CoordinatesNormalizationType.NORM, "f", features), out+"/fold_"+index, "/"+name+"-frequency-transformation-matrix.csv"); dataIO.printResultHorizontal(null, density, eTools.getFeaturesLabelNoTimestamp( CoordinatesNormalizationType.NORM, "density", features), out+"/fold_"+index, "/"+name+"-density-transformation-matrix.csv"); dataIO.printResultHorizontal(null, normalized, eTools.getFeaturesLabelNoTimestamp( CoordinatesNormalizationType.NORM, "normalized_density", features), out+"/fold_"+index, "/"+name+"-normalized-transformation-matrix.csv"); //Clustering of the sets HashMap<String, Vector<Integer>> setClustering = co.executeForValidation(normalized, length, labels, minpts, eps); if(name == 'A') pair.setFirst(setClustering); else pair.setSecond(setClustering); //write down the clustering of the resulting holdout to file logIO.writeHoldoutLog2(setClustering, out, name, index); //switch the set name from 'A' to 'B' name++; length+=normalized.size(); //update last_cellId value } index++; //just for file name //Add the pair to the list pairs.add(pair); } //Compute jaccard and write the result to file StringBuilder builder=eTools.computeJaccard2(pairs); logIO.writeJaccardLog(builder, out); }*/ }