/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package di.uniba.it.tri.script; import java.io.BufferedReader; import java.io.FileReader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; /** * * @author pierpaolo */ public class WCScorer { private static String[] levels = new String[]{"10", "20", "50", "100", "250", "500", "1000", "all"}; /** * @param args the command line arguments */ public static void main(String[] args) { try { if (args.length == 2) { //load rank List<TimeWord> rank = new ArrayList<>(); BufferedReader reader = new BufferedReader(new FileReader(args[0])); while (reader.ready()) { String[] split = reader.readLine().split("\\t"); TimeWord w = new TimeWord(split[0], Double.parseDouble(split[1]), Integer.parseInt(split[2])); w.setFreq(Double.parseDouble(split[3])); rank.add(w); } reader.close(); //load word set Map<String, Integer> cwmap = new HashMap<>(); reader = new BufferedReader(new FileReader(args[1])); while (reader.ready()) { String[] split = reader.readLine().split("\\t"); String[] words = split[0].split(","); String[] years = split[1].split(","); int min = Integer.MAX_VALUE; for (String y : years) { int v = Integer.parseInt(y.trim()); if (v < min) { min = v; } } for (String s : words) { cwmap.put(s, min); } } Collections.sort(rank, new TimeWordSorter()); System.out.println(); double[] map = new double[levels.length]; double[] acc = new double[levels.length]; double[] avgDist = new double[levels.length]; double[] varDist = new double[levels.length]; for (int j = 0; j < levels.length; j++) { String level = levels[j]; int k; if (level.equalsIgnoreCase("all")) { k = rank.size(); } else { k = Math.min(Integer.parseInt(level), rank.size()); } double correct = 0; double ap = 0; for (int i = 0; i < k; i++) { Integer y = cwmap.get(rank.get(i).getWord()); if (y != null && rank.get(i).getCp() >= y) { avgDist[j] += rank.get(i).getCp() - y; correct++; } ap += correct / (double) (i + 1); } ap /= (double) k; map[j] = ap; acc[j] = correct / (double) cwmap.size(); avgDist[j] /= correct; for (int i = 0; i < k; i++) { Integer y = cwmap.get(rank.get(i).getWord()); if (y != null && rank.get(i).getCp() >= y) { varDist[j] += Math.pow(avgDist[j] - (rank.get(i).getCp() - y), 2); } } varDist[j] = Math.sqrt(varDist[j] / (correct - 1)); } System.out.print("MAP\t"); for (double v : map) { System.out.print(v); System.out.print("\t"); } System.out.println(); System.out.print("Acc.\t"); for (double v : acc) { System.out.print(v); System.out.print("\t"); } System.out.println(); System.out.print("avgDist.\t"); for (double v : avgDist) { System.out.print(v); System.out.print("\t"); } System.out.println(); System.out.print("varDist.\t"); for (double v : varDist) { System.out.print(v); System.out.print("\t"); } System.out.println(); } } catch (Exception ex) { Logger.getLogger(WCScorer.class.getName()).log(Level.SEVERE, null, ex); } } static private class TimeWordSorter implements Comparator<TimeWord> { @Override public int compare(TimeWord o1, TimeWord o2) { int c1 = Double.compare(o1.getPvalue(), o2.getPvalue()); if (c1 == 0) { return -Double.compare(o1.getFreq(), o2.getFreq()); } else { return c1; } } } }