package it.unito.geosummly.io; import it.unito.geosummly.BoundingBox; import it.unito.geosummly.Venue; import it.unito.geosummly.pareto.ParetoPoint; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; public class LogDataIO { /** * Read the sampling log file */ public ArrayList<Integer> readSamplingLog(String logFile) throws IOException { int cellsNum=0; int catNum=0; BufferedReader br = new BufferedReader(new FileReader(logFile)); String current=null; //Get the values from file while((current=br.readLine()) !=null) { if(current.contains("Number of cells")) { cellsNum=Integer.parseInt(current.split(":")[1].trim()); //get the number of cells of the bbox } else if(current.contains("1st level")) { catNum=Integer.parseInt(current.split(":")[1].trim()); //get the total number of categories found } } br.close(); ArrayList<Integer> logInfos=new ArrayList<Integer>(); logInfos.add(cellsNum); logInfos.add(catNum); return logInfos; } /** * Read the clustering log file */ public ArrayList<ArrayList<String>> readClusteringLog(String logFile) throws IOException { ArrayList<String> labels = new ArrayList<String>(); ArrayList<String> minpts = new ArrayList<String>(); ArrayList<String> eps_sse = new ArrayList<String>(); //eps value and sse value ArrayList<String> coord_cells = new ArrayList<String>(); //coordinates of the bbox and //number of cells of the bbox String current; BufferedReader br = new BufferedReader(new FileReader(logFile)); Pattern p1=Pattern.compile("\\.(.*)\\."); //regex for feature label Pattern p2=Pattern.compile("=(.*)"); //regex for minpts value Pattern p3=Pattern.compile(":(.*)"); //regex for the other values Matcher matcher; //read the file while ((current = br.readLine()) != null) { //get the label if(current.startsWith("{")) { matcher=p1.matcher(current); if(matcher.find()) { boolean found =false; //check if label is already in the list for(int i=0;i<labels.size() && !found;i++) { if(matcher.group(1).equals(labels.get(i))) found=true; } //add label if it's not in the list yet //add also minpts if(!found) { labels.add(matcher.group(1)); matcher=p2.matcher(current); if (matcher.find()) minpts.add(matcher.group(1)); } } } //get the eps else if (current.startsWith("eps value")) { matcher=p3.matcher(current); if(matcher.find()) eps_sse.add(matcher.group(1).trim()); } //get the SSE else if (current.startsWith("SSE value")) { matcher=p3.matcher(current); if(matcher.find()) eps_sse.add(matcher.group(1).trim()); } //get the bbox coordinates else if(current.startsWith("north") || current.startsWith("east") || current.startsWith("south") || current.startsWith("west")) { matcher=p3.matcher(current); if(matcher.find()) coord_cells.add(matcher.group(1).trim()); } //get the cell number //get the SSE else if (current.startsWith("Cells of the grid")) { matcher=p3.matcher(current); if(matcher.find()) coord_cells.add(matcher.group(1).trim()); } } br.close(); ArrayList<ArrayList<String>> logInfo=new ArrayList<ArrayList<String>>(); logInfo.add(labels); logInfo.add(minpts); logInfo.add(eps_sse); logInfo.add(coord_cells); return logInfo; } /** * Write the log file of sampling process */ public void writeSamplingLog(BoundingBox bbox, ArrayList<BoundingBox> data, int categories_1st, int categories_2nd, String output, boolean secondLevel) { int cellNumber=data.size(); double cellArea=data.get(0).getArea().doubleValue(); try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/sampling.log")); FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); bw.write("Bounding box: "+bbox.getNorth()+", "+bbox.getEast()+", "+bbox.getSouth()+","+bbox.getWest()+"\n"); bw.write("Area of the bounding box (km^2): "+bbox.getArea()+"\n"); bw.write("Number of cells of the grid: "+cellNumber+"\n"); bw.write("Area of a cell (km^2): "+cellArea+"\n"); bw.write("Categories number (1st level): "+categories_1st+"\n"); if(secondLevel) bw.write("Categories number (2nd level): "+categories_2nd); bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of clustering process */ public void writeClusteringLog(StringBuilder sb, double eps, double sse, Double north, Double east, Double south, Double west, int cellNum, String output) { sb.append("\n\neps value: "+ eps); sb.append("\nSSE value: "+ sse); sb.append("\n\nBounding Box"); sb.append("\nnorth: "+ north); sb.append("\neast: "+ east ); sb.append("\nsouth: "+ south); sb.append("\nwest: "+ west); sb.append("\n\nCells of the grid: "+ cellNum); try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/clustering.log")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); bw.write(sb.toString()); bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of SSE values */ public void writeSSELog(ArrayList<Double> SSEs, double cl_sse, double pvalue, String output) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/SSEs.log")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); int index=0; for(Double d: SSEs) { bw.write(index+","+d+"\n"); index++; } bw.write("\n\nPDF(x) evaluated at " + cl_sse + " is equal to: " + pvalue); bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write a R script in order to get the SSEs' gaussian distribution */ public void writeSSEforR(ArrayList<Double> SSEs, String output) { Double min=Collections.min(SSEs); //.intValue(); Double max=Collections.max(SSEs); //.intValue(); StringBuilder sb=new StringBuilder(); try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/SSE_distribution.R")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); sb.append("x=c("); for(Double d: SSEs) { //sb.append((int) Math.floor(d)+", "); sb.append(d.toString().concat(", ")); } sb=sb.replace(sb.length()-1, sb.length(), ""); sb.append(");\n"); sb.append("bins=seq("+(min-5)+","+(max+5)+",by=0.5);\n"); sb.append("hist(x,breaks=bins,xlab=\"SSE\",ylab=\"count\",main=\"Histogram of SSE for "+ SSEs.size() +" random data sets\")"); bw.write(sb.toString()); bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of holdouts */ public void writeHoldoutLog(HashMap<String, Vector<Integer>> holdout, String output) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/holdout_results.log")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } //get the last line to know the last fold number created BufferedReader br = new BufferedReader(new FileReader(dir.getPath().concat("/holdout_results.log"))); String currentLine=""; String lastLine=""; int lastFold=0; while ((currentLine=br.readLine())!=null) { lastLine = currentLine; } br.close(); if(lastLine.length()>0) lastFold=Integer.parseInt(lastLine.substring(lastLine.length()-1)); FileWriter fw = new FileWriter(file, true); //true=append BufferedWriter bw = new BufferedWriter(fw); if(lastFold > 0) bw.write("\n"); ArrayList<String> keys=new ArrayList<String>(holdout.keySet()); for(String label: keys) { bw.write(label+";"); for(Integer i: holdout.get(label)) { bw.write(i/*-length*/+" "); } bw.write("\n"); } bw.write("_END_HO"+(lastFold+1)); bw.close(); fw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of holdouts */ public void writeHoldoutLog2(HashMap<String, Vector<Integer>> holdout, String output, char set, int index) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/holdout_results.log")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } //get the last line to know the last fold number created BufferedReader br = new BufferedReader(new FileReader(dir.getPath().concat("/holdout_results.log"))); String currentLine=""; String lastLine=""; while ((currentLine=br.readLine())!=null) { lastLine = currentLine; } br.close(); FileWriter fw = new FileWriter(file, true); //true=append BufferedWriter bw = new BufferedWriter(fw); if(lastLine.length() > 0) bw.write("\n"); ArrayList<String> keys=new ArrayList<String>(holdout.keySet()); for(String label: keys) { bw.write(label+";"); for(Integer i: holdout.get(label)) { bw.write(i/*-length*/+" "); } bw.write("\n"); } bw.write("_END_HO"+set); if(set == 'B'){ bw.write("\n"); if(index==10) bw.write("_END_F"+index); else bw.write("_END_F0"+index); } bw.close(); fw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of Jaccard evaluation */ public void writeJaccardLog(StringBuilder builder, String output) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/jaccard_report.log")); //if file doesn't exist, then create it if(!file.exists()){ file.createNewFile(); } FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); bw.write(builder.toString()); bw.flush(); bw.close(); fw.close(); } catch(IOException e){ e.printStackTrace(); } } /** * Write the log file of optimization process */ @SuppressWarnings("rawtypes") public void writeOptimizationLog(List<Integer> selected, Map<Integer, Double> map, ArrayList<Double> weights, ArrayList<Double> f1, ArrayList<Double> f2, ArrayList<Double> f3, ArrayList<Double> f0, String output) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/optimization.log")); FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); ArrayList<Integer> keys=new ArrayList<Integer>(map.keySet()); Collections.sort(keys); bw.write("Clusters before optimization: "+keys.size()+"\n"); bw.write("Clusters after optimization: "+selected.size()+"\n"); bw.write("Top clusters selected (cluster_id): "+selected.toString()+"\n"); bw.write("Weights: "+weights.toString()+"\n"); bw.write("\n---------------------------------------------\n"); bw.write("Ranking\n"); bw.write("---------------------------------------------\n"); for (Map.Entry entry : map.entrySet()) { bw.write("cluster_id : " + entry.getKey() + "\t\tf0_value : " + entry.getValue()+"\n"); } for(int i=0;i<f1.size();i++) { bw.write("\n---------------\n"); bw.write("CLUSTER "+(i+1)+"\n"); bw.write("---------------\n"); bw.write("Spatial coverage = "+f1.get(i)+"\n"); bw.write("Density = "+f2.get(i)+"\n"); bw.write("Heterogeneity = "+f3.get(i)+"\n"); bw.write("Total = "+f0.get(i)+"\n"); } bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } public void writeParetoLog(Collection<ParetoPoint> paretoPoints, ArrayList<String[]> labels, ArrayList<ArrayList<Integer>> cellIDs, List<Integer> selected, String output) { try { File dir=new File(output); //create the output directory if it doesn't exist dir.mkdirs(); File file=new File(dir.getPath().concat("/pareto.log")); FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); bw.write("Clusters before optimization: "+ paretoPoints.size() +"\n"); bw.write("Object on the Pareto Efficient Frontier: "+selected.size()+"\n"); for (int i=0; i< selected.size(); i++) { String name=""; for(String s: labels.get(i)) name=name.concat(s).concat(","); name=name.substring(0, name.length()-1); String ids = ""; for (Integer id : cellIDs.get(i)) ids = ids.concat(id.toString()).concat(","); ids = ids.substring(0, ids.length()-1); bw.write(name.concat("\n\t").concat(ids).concat("\n")); } bw.flush(); bw.close(); } catch(IOException e){ e.printStackTrace(); } } }