package edu.uncc.cs.watsonsim; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.concurrent.ConcurrentSkipListMap; import edu.uncc.cs.watsonsim.scorers.Merge; /** * Represent how to create and merge a score. * This is mostly autogenerated. * @author Sean */ final class Meta implements Comparable<Meta> { public final String name; public final double default_value; public final Merge merge_type; public Meta(String name, double default_value, Merge merge_type) { this.name = name; this.default_value = default_value; this.merge_type = merge_type; } @Override public int compareTo(Meta o) { if (o == null) return 0; return o.name.compareTo(name); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Meta other = (Meta) obj; if (name == null) { if (other.name != null) return false; } else if (!name.equals(other.name)) return false; return true; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((name == null) ? 0 : name.hashCode()); return result; } } /** * Namespace for managing score vectors. * * The score vectors are designed to be memory efficient. * So they have no objects or pointers; only primitives. * You can manage them using static methods in this class. * @author Sean */ public class Score extends HashMap<String, Double> implements Map<String, Double> { private static final long serialVersionUID = 3368114859528405852L; private static final SortedMap<String, Meta> template = new ConcurrentSkipListMap<>(); static { // This means the length of the incoming double[] is the same as // the index into versions[]. register("COUNT", 1, Merge.Sum); } public Score() { super(); } /** * Returns a convenient copy of scores as a map. * @param scores * @return */ public static Map<String, Double> asMap(Score scores) { return scores; } /** * Get a "blank" vector (all defaults) */ public static Score empty() { return new Score(); } /** * Get a specific score * * @param scores The score vector * @param name The name of the score */ public double get(String name) { if (template.containsKey(name)) return getOrDefault(name, template.get(name).default_value); else return 0.0; } /** * Get a bunch of scores in a new order. * There is no going back! * You can't get() or set() or update() the output of this function! * @param incoming * @param names * @return */ public double[] getEach(Collection<String> names) { double[] outgoing = new double[names.size()]; int i=0; for (String name : names) { outgoing[i] = get(name); i++; } return outgoing; } public static Set<String> latestSchema() { return template.keySet(); } /** * Merge two scores */ public static Score merge(Score left, Score right) { double left_count = left.get("COUNT"), right_count = right.get("COUNT"); if (left_count + right_count > 0) { Score center = new Score(); for ( Meta m : template.values() ) { switch (m.merge_type) { case Mean: double val = left_count * left.get(m.name) + right_count * right.get(m.name); val /= left_count + right_count; center.put(m.name, val); break; case Or: center.put(m.name, left.get(m.name) + right.get(m.name) > 0 ? 1.0 : 0.0); break; case Min: center.put(m.name, Math.min(left.get(m.name), right.get(m.name))); break; case Max: center.put(m.name, Math.max(left.get(m.name), right.get(m.name))); break; case Sum: center.put(m.name, left.get(m.name) + right.get(m.name)); break; } } return center; } else { Score nscore = new Score(); nscore.putAll(left); return nscore; } } /** * Normalize a set of scores against one another. * This is intended to be run once per question. * Afterward, the mean will be 0 and the stdev 1. */ public static List<Answer> normalizeGroup(List<Answer> mat) { Set<String> keys = template.keySet(); final int len = keys.size(); String[] keysarr = new String[len]; int k_idx = 0; for (String k: keys) { keysarr[k_idx] = k; k_idx++; } int preserve_attr = Arrays.binarySearch(keysarr, "CORRECT"); double[] sum = new double[keys.size()]; // Generate sum for (Answer row : mat) { for (int i=0; i<len; i++) { sum[i] += row.scores.get(keysarr[i]); } } // Make sum an average for (int i=0; i<len; i++) { sum[i] /= mat.size(); } // Generate variance double[] variance = new double[len]; for (Answer row : mat) { for (int i=0; i<len; i++) { double diff = sum[i] - row.scores.get(keysarr[i]); variance[i] += diff * diff; } } // Generate stdev double[] stdev = variance.clone(); for (int i=0; i<len; i++) { stdev[i] = Math.sqrt(stdev[i]); } // Scale the copy for (Answer row: mat) { for (int col=0; col<len; col++) { if (col != preserve_attr && stdev[col] != 0) { row.scores.put(keysarr[col], (row.scores.get(keysarr[col]) - sum[col]) / stdev[col]); } } } return mat; } /** Register the answer score for automatically generated model data * * This function is idempotent. * @param name The name of the score as it will be presented to Weka * @param default_value What the value of the score should be if it is missing * @param merge_mode How to merge two scores of the same name */ public static void register(String name, double default_value, Merge merge_mode) { template.putIfAbsent(name, new Meta(name, default_value, merge_mode)); } public Score clone() { Score s = new Score(); s.putAll(this); return s; } }