package uk.ac.cam.echo.server.analysis; import org.hibernate.criterion.Order; import org.hibernate.criterion.Restrictions; import uk.ac.cam.echo.data.*; import uk.ac.cam.echo.server.HibernateUtil; import uk.ac.cam.echo.server.analysis.cmp.*; import uk.ac.cam.echo.server.analysis.internal.*; import uk.ac.cam.echo.server.models.ConferenceModel; import uk.ac.cam.echo.server.models.ConversationModel; import uk.ac.cam.echo.server.models.MessageModel; import uk.ac.cam.echo.server.models.UserModel; import java.util.*; /** Author: Petar 'PetarV' Veličković An implementation of the ServerDataAnalyst interface. Note that many implementations are possible, depending on the particular analysis criteria we're after. */ public class DataAnalyst implements ServerDataAnalyst { long parentID; String dictionary = this.getClass().getResource("/dictionaries/en_GB/en_GB.dic").getPath(); String affix = this.getClass().getResource("/dictionaries/en_GB/en_GB.aff").getPath(); String stopWords = this.getClass().getResource("/stop_lists/stop_list.txt").getPath(); public DataAnalyst(long conferenceID) { parentID = conferenceID; } @Override public Map<String, Long> getKeywords(Conversation conversation, long lastTimeStamp) { List<Message> msgs = (List<Message>)conversation.getSortedMessages(); Collections.reverse(msgs); // because the query returns them in the opposite order Map<String, Long> ret = new HashMap<String, Long>(); long lastTS = lastTimeStamp; for (Message M : msgs) { long TS = M.getTimeStamp(); if (TS < lastTimeStamp) break; if (TS > lastTS) lastTS = TS; List<String> kwds = MessageLexer.lexAnalyse(M.getContents(), dictionary, affix, stopWords); for (String kwd : kwds) { if (!ret.containsKey(kwd)) ret.put(kwd, 1L); else { long prevValue = ret.get(kwd); ret.put(kwd, prevValue+1); } } } ret.put("TS", lastTS); return ret; } @Override public Map<String, Long> getKeywords(long lastTimeStamp) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); Map<String, Long> ret = new HashMap<String, Long>(); long lastTS = lastTimeStamp; for (Conversation C : conversations) { List<Message> msgs = (List<Message>)C.getSortedMessages(); Collections.reverse(msgs); // because the query returns them in the opposite order for (Message M : msgs) { long TS = M.getTimeStamp(); if (TS < lastTimeStamp) break; if (TS > lastTS) lastTS = TS; List<String> kwds = MessageLexer.lexAnalyse(M.getContents(), dictionary, affix, stopWords); for (String kwd : kwds) { if (!ret.containsKey(kwd)) ret.put(kwd, 1L); else { long prevValue = ret.get(kwd); ret.put(kwd, prevValue+1); } } } } ret.put("TS", lastTS); return ret; } @Override public void updateGraph() { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); long nextLastTS = GraphUtil.lastTS; for (Conversation C : conversations) { List<Message> msgs = (List<Message>)C.getSortedMessages(); Collections.reverse(msgs); // because the query returns them in the opposite order for (Message M : msgs) { if (M.getTimeStamp() > nextLastTS) nextLastTS = M.getTimeStamp(); if (M.getTimeStamp() <= GraphUtil.lastTS) break; List<String> keywords = MessageLexer.lexAnalyse(M.getContents(), dictionary, affix, stopWords); ListIterator<String> it1 = keywords.listIterator(); while (it1.hasNext()) { String u = it1.next(); String U = u.substring(0, 1).concat(".").concat(u); ListIterator<String> it2 = keywords.listIterator(it1.nextIndex()); while (it2.hasNext()) { String v = it2.next(); String V = v.substring(0, 1).concat(".").concat(v); GraphUtil.addEdge(U, V); } } } } GraphUtil.lastTS = nextLastTS; } @Override public void updateGraph(int lim) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); long nextLastTS = GraphUtil2.lastTS; for (Conversation C : conversations) { if (lim <= 0) break; List<Message> msgs = (List<Message>)C.getSortedMessages(); Collections.reverse(msgs); // because the query returns them in the opposite order for (Message M : msgs) { if (M.getTimeStamp() > nextLastTS) nextLastTS = M.getTimeStamp(); if (M.getTimeStamp() <= GraphUtil2.lastTS) break; List<String> keywords = MessageLexer.lexAnalyse(M.getContents(), dictionary, affix, stopWords); ListIterator<String> it1 = keywords.listIterator(); while (it1.hasNext()) { String u = it1.next(); String U = u.substring(0, 1).concat(".").concat(u); ListIterator<String> it2 = keywords.listIterator(it1.nextIndex()); while (it2.hasNext()) { String v = it2.next(); String V = v.substring(0, 1).concat(".").concat(v); GraphUtil2.addEdge(U, V); } } lim--; if (lim <= 0) break; } } GraphUtil2.lastTS = nextLastTS; } @Override public void updateFGraph() { /* Multi-pass method. 1. Get Conference (type 0). 2. For each Conversation (type 1): 3. Connect to Conference. 4. Connect to Tags (type 4). 5. For each Message (type 2) (**incrementally**): 6. Connect to Conversation. 7. Connect to User (type 3). 8. For each User: 9. Connect to Conversation. 10. Connect to Interests (type 5). */ Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); List<User> users = HibernateUtil.getTransaction().createCriteria(UserModel.class).list(); long nextLastTS = NPForceGraph.lastTS; String confName = parentConference.getName(); long confIid = parentConference.getId(); NPForceGraph.addNode(confName, 0, confIid); for (Conversation C : conversations) { String convName = C.getName(); long convIid = C.getId(); NPForceGraph.addEdge(confName, 0, confIid, convName, 1, convIid); Collection<Tag> tags = C.getTags(); if (tags != null) { for (Tag T : tags) { String tagName = T.getName(); long tagIid = T.getId(); NPForceGraph.addEdge(convName, 1, convIid, tagName, 4, tagIid); } } /* List<Message> messages = (List<Message>)C.getSortedMessages(); Collections.reverse(messages); // because query outputs them in reverse order for (Message M : messages) { //if (M.getTimeStamp() > nextLastTS) nextLastTS = M.getTimeStamp(); //if (M.getTimeStamp() <= GraphUtil.lastTS) break; String msgFullContents = M.getContents(); String msgName = msgFullContents.substring(0, Math.min(10, msgFullContents.length())).concat("..."); long msgIid = M.getId(); NPForceGraph.addEdge(convName, 1, convIid, msgName, 2, msgIid); User sender = M.getSender(); if (sender != null) { String senderName = sender.getUsername(); long senderIid = sender.getId(); NPForceGraph.addEdge(msgName, 2, msgIid, senderName, 3, senderIid); } }*/ } for (User U : users) { String userName = U.getUsername(); long userIid = U.getId(); NPForceGraph.addNode(userName, 3, userIid); Conversation convo = U.getCurrentConversation(); if (convo != null) { String convoName = convo.getName(); long convoIid = convo.getId(); NPForceGraph.addEdge(userName, 3, userIid, convoName, 1, convoIid); } Collection<Interest> interests = U.getInterests(); if (interests != null) { for (Interest I : interests) { String interestName = I.getName(); long interestIid = I.getId(); NPForceGraph.addEdge(userName, 3, userIid, interestName, 5, interestIid); } } } NPForceGraph.lastTS = nextLastTS; } @Override public List<Conversation> search(String keyword, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); keyword = keyword.toLowerCase(Locale.ENGLISH); String[] keywords = keyword.split("\\s+"); List<Conversation> ret = new LinkedList<Conversation>(); List<Conversation> matchesByName = new LinkedList<Conversation>(); List<Conversation> matchesByTag = new LinkedList<Conversation>(); Set<Conversation> processed = new HashSet<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); for (Conversation C : conversations) { boolean foundByName = false; for (String kwd : keywords) { if (C.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { matchesByName.add(C); processed.add(C); foundByName = true; break; } } if (!foundByName) { boolean foundByTag = false; Collection<Tag> tags = C.getTags(); for (Tag t : tags) { for (String kwd : keywords) { if (t.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { matchesByTag.add(C); processed.add(C); foundByTag = true; break; } } if (foundByTag) break; } } } ListIterator<Conversation> it1 = matchesByName.listIterator(); ListIterator<Conversation> it2 = matchesByTag.listIterator(); while (n > 0 && it1.hasNext()) { ret.add(it1.next()); n--; } while (n > 0 && it2.hasNext()) { ret.add(it2.next()); n--; } if (n > 0) { PriorityQueue<DoubleConversationPair> pq = new PriorityQueue<DoubleConversationPair>(11, new ConversationComparatorByMatchFrequency()); for (Conversation C : conversations) { if (processed.contains(C)) continue; Collection<Message> messages = C.getMessages(); double totalScore = 0.0; for (Message msg : messages) { List<String> baseWords = MessageLexer.lexAnalyse(msg.getContents(), dictionary, affix, stopWords); if (baseWords.isEmpty()) continue; double total = (double)baseWords.size(); for (String word : baseWords) { for (String kwd : keywords) { totalScore += StringMatcher.Match(kwd, word) / total; } } } pq.offer(new DoubleConversationPair(totalScore, C)); } while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } } return ret; } @Override public List<Conversation> onlyKeywordSearch(String keyword, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); keyword = keyword.toLowerCase(Locale.ENGLISH); String[] keywords = keyword.split("\\s+"); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); PriorityQueue<DoubleConversationPair> pq = new PriorityQueue<DoubleConversationPair>(11, new ConversationComparatorByMatchFrequency()); for (Conversation C : conversations) { Collection<Message> messages = C.getMessages(); double totalScore = 0.0; for (Message msg : messages) { List<String> baseWords = MessageLexer.lexAnalyse(msg.getContents(), dictionary, affix, stopWords); if (baseWords.isEmpty()) continue; double total = (double)baseWords.size(); for (String word : baseWords) { for (String kwd : keywords) { totalScore += StringMatcher.Match(kwd, word) / total; } } } pq.offer(new DoubleConversationPair(totalScore, C)); } while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } return ret; } @Override public List<Conversation> onlyTagSearch(String keyword, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); keyword = keyword.toLowerCase(Locale.ENGLISH); String[] keywords = keyword.split("\\s+"); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); for (Conversation C : conversations) { boolean foundByTag = false; Collection<Tag> tags = C.getTags(); for (Tag t : tags) { for (String kwd : keywords) { if (t.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { ret.add(C); n--; foundByTag = true; break; } } if (foundByTag) break; } if (n == 0) break; } return ret; } @Override public List<Conversation> onlyNameSearch(String keyword, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); keyword = keyword.toLowerCase(Locale.ENGLISH); String[] keywords = keyword.split("\\s+"); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); for (Conversation C : conversations) { for (String kwd : keywords) { if (C.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { ret.add(C); break; } } if (ret.size() == n) break; } return ret; } @Override public List<Conversation> nameAndTagSearch(String keyword, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); keyword = keyword.toLowerCase(Locale.ENGLISH); String[] keywords = keyword.split("\\s+"); List<Conversation> ret = new LinkedList<Conversation>(); List<Conversation> matchesByName = new LinkedList<Conversation>(); List<Conversation> matchesByTag = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); for (Conversation C : conversations) { boolean foundByName = false; for (String kwd : keywords) { if (C.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { matchesByName.add(C); foundByName = true; break; } } if (!foundByName) { boolean foundByTag = false; Collection<Tag> tags = C.getTags(); for (Tag t : tags) { for (String kwd : keywords) { if (t.getName().toLowerCase(Locale.ENGLISH).contains(kwd)) { matchesByTag.add(C); foundByTag = true; break; } } if (foundByTag) break; } } } ListIterator<Conversation> it1 = matchesByName.listIterator(); ListIterator<Conversation> it2 = matchesByTag.listIterator(); while (n > 0 && it1.hasNext()) { ret.add(it1.next()); n--; } while (n > 0 && it2.hasNext()) { ret.add(it2.next()); n--; } return ret; } @Override public List<Conversation> mostUsers(int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); PriorityQueue<IntegerConversationPair> pq = new PriorityQueue<IntegerConversationPair>(11, new ConversationComparatorByUserCount()); for (Conversation C : conversations) pq.offer(new IntegerConversationPair(C.getUsers().size(), C)); while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } return ret; } @Override public List<Conversation> mostActiveRecently(long millis, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); PriorityQueue<IntegerConversationPair> pq = new PriorityQueue<IntegerConversationPair>(11, new ConversationComparatorByActivity()); long now = new Date().getTime(); for (Conversation C : conversations) { int cnt = 0; List<Message> msgs = (List<Message>)C.getSortedMessages(); Collections.reverse(msgs); // because the query returns them in the opposite order // PRECONDITION: msgs is sorted descending by timestamp. for (Message M : msgs) { if ((now - M.getTimeStamp()) > millis) break; else cnt++; } pq.offer(new IntegerConversationPair(cnt, C)); } while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } return ret; } @Override public List<Conversation> recommend(User user, int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Map<String, Integer> keywords = UserKeyworder.extractKeywords(user); if (keywords.isEmpty()) return null; // NO DATA TO QUERY UPON; should not happen! Collection<Conversation> conversations = parentConference.getConversationSet(); long now = new Date().getTime(); PriorityQueue<DoubleConversationPair> pq = new PriorityQueue<DoubleConversationPair>(11, new ConversationComparatorByMatchFrequency()); List<Conversation> ret = new LinkedList<Conversation>(); for (Conversation C : conversations) { String normalisedName = C.getName().toLowerCase(Locale.ENGLISH).replaceAll("[^a-zA-Z ]"," "); List<String> normalisedTags = new LinkedList<String>(); Collection<Tag> tags = C.getTags(); for (Tag T : tags) { normalisedTags.add(T.getName().toLowerCase(Locale.ENGLISH).replaceAll("[^a-zA-Z ]"," ")); } double currScore = 0.0; double total = (double)normalisedTags.size(); for (String kwd : keywords.keySet()) { currScore += StringMatcher.Match(kwd, normalisedName) * (double)keywords.get(kwd); if (normalisedTags.isEmpty()) continue; for (String ntg : normalisedTags) { currScore += StringMatcher.Match(kwd, ntg) * (double)keywords.get(kwd) / total; } } // get the amount of minutes since conversation was last active double time = (double)(now - ((List<Message>)C.getMessages(1)).get(0).getTimeStamp()) / 60000.0; if (time == 0.0) time = 0.0000000001; currScore /= time; pq.offer(new DoubleConversationPair(currScore, C)); } while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } return ret; } @Override public Message notify(User user, long currentId, long millis) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Map<String, Integer> keywords = UserKeyworder.extractKeywords(user); if (keywords.isEmpty()) return null; // NO DATA TO QUERY UPON; should not happen! Collection<Conversation> conversations = parentConference.getConversationSet(); long now = new Date().getTime(); Message ret = null; double maxScore = -1.0; for (Conversation C : conversations) { if (C.getId() == currentId) continue; List<Message> mostRecent = ((List<Message>)C.getMessages(1)); if (mostRecent.isEmpty()) continue; Message latest = mostRecent.get(0); if (now - millis > latest.getTimeStamp()) continue; List<String> baseWords = MessageLexer.lexAnalyse(latest.getContents(), dictionary, affix, stopWords); if (baseWords.isEmpty()) continue; double currScore = 0.0; double total = (double)baseWords.size(); for (String word : baseWords) { for (String kwd : keywords.keySet()) { currScore += StringMatcher.Match(kwd, word) * ((double)keywords.get(kwd)) / total; } } if (currScore > maxScore) { maxScore = currScore; ret = latest; } } return ret; } @Override public List<Conversation> mostMessages(int n) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); List<Conversation> ret = new LinkedList<Conversation>(); Collection<Conversation> conversations = parentConference.getConversationSet(); PriorityQueue<IntegerConversationPair> pq = new PriorityQueue<IntegerConversationPair>(11, new ConversationComparatorByMessageCount()); for (Conversation C : conversations) pq.offer(new IntegerConversationPair(C.getMessages().size(), C)); while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getConvo()); n--; } return ret; } @Override public List<User> mostActiveUsers(int n) { List<User> users = HibernateUtil.getTransaction().createCriteria(UserModel.class).list(); List<User> ret = new LinkedList<User>(); PriorityQueue<IntegerUserPair> pq = new PriorityQueue<IntegerUserPair>(11, new UserComparatorByActivity()); for (User U : users) { int msgCnt = HibernateUtil.getTransaction().createCriteria(MessageModel.class) .add(Restrictions.eq("sender", U)).list().size(); pq.offer(new IntegerUserPair(msgCnt, U)); } while (n > 0 && !pq.isEmpty()) { ret.add(pq.poll().getUser()); n--; } return ret; } @Override public int hail(long millis) { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); long now = new Date().getTime(); int cnt = 0; for (Conversation C : conversations) { List<Message> msgs = (List<Message>) C.getSortedMessages(); Collections.reverse(msgs); for (Message M : msgs) { if ((now - M.getTimeStamp()) > millis) break; else cnt++; } } return cnt; } @Override public double maleToFemaleRatio() { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); double maleCount = 0, femaleCount = 0; for (Conversation C : conversations) { Collection<User> users = C.getUsers(); for (User U : users) { if (U.getGender() == null) continue; if (U.getGender().equals("M") || U.getGender().equals("Male")) maleCount++; if (U.getGender().equals("F") || U.getGender().equals("Female")) femaleCount++; } } if (femaleCount == 0.0) return Double.POSITIVE_INFINITY; return maleCount / femaleCount; } @Override public double maleToFemaleRatio(Conversation conversation) { double maleCount = 0, femaleCount=0; Collection<User> users = conversation.getUsers(); for (User U : users) { if (U.getGender() == null) continue; if (U.getGender().equals("M") || U.getGender().equals("Male")) maleCount++; if (U.getGender().equals("F") || U.getGender().equals("Female")) femaleCount++; } if (femaleCount == 0.0) return Double.POSITIVE_INFINITY; return maleCount / femaleCount; } @Override public int messageCount(long convoId) { Conversation convo = (Conversation) HibernateUtil.getTransaction().get(ConversationModel.class, convoId); return convo.getMessages().size(); } @Override public int messageCount() { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); int ret = 0; for (Conversation C : conversations) { ret += C.getMessages().size(); } return ret; } @Override public int userCount(long convoId) { Conversation convo = (Conversation) HibernateUtil.getTransaction().get(ConversationModel.class, convoId); return convo.getUsers().size(); } @Override public int userCount() { Conference parentConference = (Conference) HibernateUtil.getTransaction().get(ConferenceModel.class, parentID); Collection<Conversation> conversations = parentConference.getConversationSet(); int ret = 0; for (Conversation C : conversations) { ret += C.getUsers().size(); } return ret; } @Override public int contributingUsers(long convoId, boolean current) { Conversation convo = (Conversation) HibernateUtil.getTransaction().get(ConversationModel.class, convoId); Set<User> users = (Set<User>)convo.getUsers(); Collection<Message> msgs = convo.getMessages(); Set<Long> ret = new HashSet<Long>(); for (Message M : msgs) { long id = M.getSender().getId(); if (!current || users.contains(M.getSender())) ret.add(id); } return ret.size(); } @Override public long lastTimeActive(User user) { List<Message> sols = HibernateUtil.getTransaction().createCriteria(MessageModel.class) .add(Restrictions.eq("sender", user)).addOrder(Order.desc("timeStamp")).list(); long now = new Date().getTime(); if (sols == null) return now; if (sols.size() == 0) return now; return now - sols.get(0).getTimeStamp(); } }