package project.utils.statistics.impl; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import project.client.persistence.Message; import project.utils.statistics.MessageSimilarity; public class MessageSimilarity_Impl implements MessageSimilarity { private Map<String, Integer> getMessageFrequency (Message m) { if (m == null || m.getFormattedContent() == null || m.getFormattedContent().length() == 0) { return new HashMap<String, Integer> (); } String str = m.getFormattedContent().trim(); Map<String,Integer> map = new HashMap<String, Integer> (); String[] tokens = str.split(";"); if (tokens.length == 0) { return new HashMap<String, Integer> (); } for (String t : tokens) { if (t.length() > 0) { if (map.containsKey(t)) { Integer count = map.get(t); map.put(t, new Integer (count + 1)); } else { map.put(t, new Integer (1)); } } } tokens = null; return map; } private float length (Map<String,Integer> v) { float total = 0.0f; Iterator<String> i = v.keySet().iterator(); while (i.hasNext()) { String key = i.next(); int value = v.get(key); total += value * value; } return total; } private float distance (Map<String,Integer> v1, Map<String,Integer> v2) { float dist = 0.0f; Iterator<String> i = v1.keySet().iterator(); while (i.hasNext()) { String key = i.next(); if (v2.containsKey(key)) { int c1 = v1.get(key); int c2 = v2.get(key); dist += 0.5f * (c1 - c2) * (c1 - c2); } else { int c1 = v1.get(key); dist += c1 * c1; } } i = v2.keySet().iterator(); while (i.hasNext()) { String key = i.next(); if (v1.containsKey(key)) { int c1 = v1.get(key); int c2 = v2.get(key); dist += 0.5f * (c1 - c2) * (c1 - c2); } else { int c2 = v2.get(key); dist += c2 * c2; } } return dist; } private float dot (Map<String, Integer> v1, Map<String, Integer> v2) { if (v1.keySet().size() < v2.keySet().size()) return dot (v2, v1); float total = 0.0f; Iterator<String> i = v2.keySet().iterator(); while (i.hasNext()) { String key = i.next(); if (v1.containsKey(key)) { total += v1.get(key) * v2.get(key); } } if (length(v1) == 0 || length(v2) == 0) return 0.0f; return total / (length (v1) * length (v2)); } public float sim1(Message m1, Message m2) { if (m1 == null || m2 == null) return 0.0f; if (m1.getFormattedContent() == null || m2.getFormattedContent() == null) return 0.0f; if (m1.getFormattedContent().equals(m2.getFormattedContent())) return 1.0f; float dist = this.distance(this.getMessageFrequency(m1), this.getMessageFrequency(m2)); return (float) Math.exp(-dist * dist); } public float sim2(Message m1, Message m2) { if (m1 == null || m2 == null) return 0.0f; if (m1.getFormattedContent() == null || m2.getFormattedContent() == null) return 0.0f; if (m1.getFormattedContent().equals(m2.getFormattedContent())) return 1.0f; float dist = this.distance(this.getMessageFrequency(m1), this.getMessageFrequency(m2)); return 1.0f / (1.0f + dist); } public float sim3(Message m1, Message m2) { if (m1 == null || m2 == null) return 0.0f; if (m1.getFormattedContent() == null || m2.getFormattedContent() == null || m1.getFormattedContent().length() == 0 || m2.getFormattedContent().length() == 0) return 0.0f; if (m1.getFormattedContent().equals(m2.getFormattedContent())) return 1.0f; return this.dot(this.getMessageFrequency(m1), this.getMessageFrequency(m2)); } public float sim4(Message m1, Message m2) { // TODO Auto-generated method stub return 0; } public float sim5(Message m1, Message m2) { // TODO Auto-generated method stub return 0; } }