package common; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.MoreLikeThisParams; import processing.hashtag.solr.Tweet; public class SolrConnector { private SolrServer server; public SolrConnector(String solrUrl, String core) { this.server = new HttpSolrServer(solrUrl + "/solr/" + core); } @SuppressWarnings("unchecked") public Map<String, Set<String>> getTweets() { Map<String, Set<String>> tweets = new LinkedHashMap<String, Set<String>>(); SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "*:*"); solrParams.set("fl", "text,hashtags"); solrParams.set("rows", Integer.MAX_VALUE); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { tweets.put((String) d.get("text"), new LinkedHashSet<String>((List<String>) d.get("hashtags"))); } } catch (SolrServerException e) { e.printStackTrace(); } return tweets; } @SuppressWarnings("unchecked") public List<Tweet> getTweetObjects(boolean ignoreRetweets) { List<Tweet> tweetObjects = new ArrayList<Tweet>(); SolrQuery solrParams = new SolrQuery(); if (!ignoreRetweets) { solrParams.set("q", "*:*"); } else { solrParams.set("q", "-text:\"RT @*\""); } solrParams.set("rows", Integer.MAX_VALUE); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { tweetObjects.add(new Tweet((String) d.get("id"), (String) d.get("userid"), (String) d.get("text"), (String) d.get("timestamp"), new LinkedHashSet<String>((List<String>) d.get("hashtags")))); } } catch (SolrServerException e) { e.printStackTrace(); } return tweetObjects; } @SuppressWarnings("unchecked") public List<Tweet> getTrainTweetObjects(SolrConnector trainConnector, int hours) { List<Tweet> tweetObjects = new ArrayList<Tweet>(); SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "*:*"); solrParams.set("rows", Integer.MAX_VALUE); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { String userId = (String) d.get("userid"); String text = trainConnector.getTweetTextOfLastHours(userId, hours); tweetObjects.add(new Tweet((String) d.get("id"), userId, text, (String) d.get("timestamp"), new LinkedHashSet<String>((List<String>) d.get("hashtags")))); } } catch (SolrServerException e) { e.printStackTrace(); } return tweetObjects; } @SuppressWarnings("unchecked") public Map<String, Set<String>> getUserIDs() { Map<String, Set<String>> tweetIDs = new LinkedHashMap<String, Set<String>>(); SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "*:*"); solrParams.set("fl", "userid,hashtags"); solrParams.set("rows", Integer.MAX_VALUE); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { tweetIDs.put((String) d.get("userid"), new HashSet<String>((List<String>) d.get("hashtags"))); } } catch (SolrServerException e) { e.printStackTrace(); } return tweetIDs; } public String getMostRecentTweetOfUser(String user) { SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "userid:" + user); solrParams.set("sort", "timestamp desc"); solrParams.set("fl", "id"); solrParams.set("rows", 1); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { return (String) d.get("id"); } } catch (Exception e) { e.printStackTrace(); } return null; } public String getTweetTextOfRecentTweets(String user, int intValue) { String tweetText = ""; SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "userid:" + user); solrParams.set("sort", "timestamp desc"); solrParams.set("fl", "text"); solrParams.set("rows", intValue); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { tweetText += ((String) d.get("text") + " "); } } catch (Exception e) { e.printStackTrace(); } return tweetText; } public String getTweetTextOfLastHours(String user, int hours) { String tweetText = ""; SolrQuery solrParams = new SolrQuery(); solrParams.set("q", "userid:" + user); solrParams.set("sort", "timestamp desc"); solrParams.set("fl", "text,timestamp"); solrParams.set("rows", 100); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); Long threshold = null; for (SolrDocument d : docs) { String timestampString = (String) d.get("timestamp"); Long timestamp = Long.parseLong(timestampString.substring(0, timestampString.indexOf("."))); if (threshold == null) { threshold = timestamp - hours * 60 * 60; } if (timestamp > threshold.longValue()) { tweetText += ((String) d.get("text") + " "); } else { break; } } } catch (Exception e) { e.printStackTrace(); } return tweetText; } @SuppressWarnings({ "unchecked", "deprecation" }) public Map<String, Double> getTopHashtagsForTweetText(String tweetText, int limit) { Map<String, Double> hashtagMap = new LinkedHashMap<String, Double>(); String cleanedTweetText = tweetText;//getCleanedTweetText(tweetText); if (cleanedTweetText == null || cleanedTweetText.isEmpty()) { return hashtagMap; } SolrQuery solrParams = new SolrQuery(); // query version //solrParams.set("q", "text:" + cleanedTweetText); // mlt version solrParams.setQueryType("/mlt"); solrParams.set("stream.body", cleanedTweetText); solrParams.set("mlt.fl", "text"); // additional parameters solrParams.set("fl", "hashtags,score"); solrParams.set("mlt.count", 50); solrParams.set("rows", 50); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { processSolrDocument(d, hashtagMap, limit); } } catch (Exception e) { System.out.println("Exception with tweet-text: " + cleanedTweetText); e.printStackTrace(); } return MapUtil.sortByValue(hashtagMap); } @SuppressWarnings({ "unchecked", "deprecation" }) public Map<String, Double> getTopHashtagsForTweetID(String tweetID, int limit) { Map<String, Double> hashtagMap = new LinkedHashMap<String, Double>(); if (tweetID == null || tweetID.isEmpty()) { return hashtagMap; } SolrQuery solrParams = new SolrQuery(); // query version //solrParams.set("q", "text:" + cleanedTweetText); // mlt version solrParams.setQueryType("/mlt"); solrParams.set("q", "id:" + tweetID); solrParams.set("mlt.fl", "text"); // additional parameters solrParams.set("fl", "hashtags,score"); solrParams.set("mlt.count", 50); solrParams.set("rows", 50); QueryResponse r = null; try { r = this.server.query(solrParams); SolrDocumentList docs = r.getResults(); for (SolrDocument d : docs) { processSolrDocument(d, hashtagMap, limit); } } catch (Exception e) { System.out.println("Exception with tweet-id: " + tweetID); e.printStackTrace(); } return hashtagMap; } private void processSolrDocument(SolrDocument d, Map<String, Double> hashtagMap, int limit) { double score = (float) d.get("score"); List<String> hashtags = (List<String>) d.get("hashtags"); for (String h : hashtags) { if (hashtagMap.size() < limit) { if (!hashtagMap.containsKey(h.toLowerCase())) { hashtagMap.put(h.toLowerCase(), score); } //Double val = hashtagMap.get(h.toLowerCase()); //hashtagMap.put(h.toLowerCase(), val == null ? score : val.doubleValue() + score); } else { break; } } } private String getCleanedTweetText(String tweetText) { if (tweetText != null) { return tweetText.replaceAll("[^a-zA-Z0-9 ]+", "").trim(); } return ""; } }