/* * Copyright 2010 Peter Karich jetwick_@_pannous_._info * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.jetwick.es; import de.jetwick.util.Helper; import org.elasticsearch.action.search.SearchResponse; import de.jetwick.config.Configuration; import de.jetwick.data.JTweet; import de.jetwick.data.JUser; import de.jetwick.tw.TweetDetector; import de.jetwick.tw.cmd.StringFreqMap; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.elasticsearch.action.get.GetRequestBuilder; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.index.query.FilteredQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryStringQueryBuilder.Operator; import org.elasticsearch.search.facet.FacetBuilders; import org.elasticsearch.search.facet.terms.TermsFacet; import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net */ public class ElasticUserSearch extends AbstractElasticSearch<JUser> { private Logger logger = LoggerFactory.getLogger(getClass()); private static final String TOKEN = "token_s"; private static final String QUERY_TERMS = "ss_qterms_mv_s"; private static final String SCREEN_NAME = "name"; private static final String ACTIVE = "active"; private static final String LAST_VISIT_DT = "lastVisit_dt"; private static final String CREATED_DT = "createdAt_dt"; private static final String TOPICS = "topics"; private static final String TWITTER_ID = "twitterId"; private static final String EMAIL = "email"; protected int termMinFrequency = 2; private String indexName = "uindex"; public ElasticUserSearch(Configuration config) { this(config.getTweetSearchUrl()); } public ElasticUserSearch(String url) { super(url); } public ElasticUserSearch(Client client) { super(client); } @Override public String getIndexName() { return indexName; } @Override public void setIndexName(String indexName) { this.indexName = indexName; } @Override public String getIndexType() { return "user"; } void delete(JUser user, boolean commit) { if (user.getScreenName() == null) throw new NullPointerException("Null " + JUser.SCREEN_NAME + " is not allowed! User:" + user); deleteById(user.getScreenName()); if (commit) refresh(); } public void update(Collection<JUser> users) { bulkUpdate(users, getIndexName(), false); } public void update(JUser user, boolean optimize, boolean refresh) { save(user, refresh); } public void save(JUser user, boolean refresh) { try { bulkUpdate(Collections.singleton(user), getIndexName(), refresh); } catch (Exception e) { throw new RuntimeException(e); } } @Override public XContentBuilder createDoc(JUser user) throws IOException { XContentBuilder b = JsonXContent.contentBuilder().startObject(); // make sure that if we look for a specific user this user will show up first: b.field(SCREEN_NAME, user.getScreenName()); if (user.getTwitterId() != null) b.field(TWITTER_ID, user.getTwitterId()); b.field("realName", user.getRealName()); b.field("protected", user.isProtected()); b.field("weekFallback", user.isWeekFallback()); b.field("iconUrl", user.getProfileImageUrl()); b.field("webUrl", user.getWebUrl()); b.field("bio", user.getDescription()); b.field("mode", user.getMode()); b.field(TOKEN, user.getTwitterToken()); b.field("tokenSecret_s", user.getTwitterTokenSecret()); b.field(CREATED_DT, user.getCreatedAt()); b.field("twCreatedAt_dt", user.getTwitterCreatedAt()); b.field("friendsUpdate_dt", user.getLastFriendsUpdate()); b.field("friends", Helper.toStringArray(user.getFriends())); b.field(LAST_VISIT_DT, user.getLastVisit()); b.field(EMAIL, user.getEmail()); b.field(ACTIVE, user.isActive()); b.field("role", user.getRole()); b.field("followersCount", user.getFollowersCount()); b.field("friendsCount", user.getFriendsCount()); int counter = 1; for (SavedSearch ss : user.getSavedSearches()) { b.field("ss_" + counter + "_query_s", ss.getCleanQuery().toString()); b.field("ss_" + counter + "_last_dt", ss.getLastQueryDate()); if (ss.getQueryTerm() != null && !ss.getQueryTerm().isEmpty()) { // for tweetProducer (pick via facets) and stats: b.field(QUERY_TERMS, ss.getQueryTerm()); } counter++; } // some users were only mentioned by others ... Collection<JTweet> tweets = user.getOwnTweets(); if (tweets.size() > 0) { TweetDetector extractor = new TweetDetector(tweets); List<String> tagList = new ArrayList<String>(); for (Entry<String, Integer> entry : extractor.run().getSortedTerms()) { if (entry.getValue() > termMinFrequency) tagList.add(entry.getKey()); } b.field("tag", tagList); StringFreqMap langs = new StringFreqMap(); for (JTweet tw : tweets) { langs.inc(tw.getLanguage(), 1); } List<String> langList = new ArrayList<String>(); for (Entry<String, Integer> lang : langs.getSorted()) { langList.add(lang.getKey()); } b.field("lang", langList); } List<Map<String, Object>> listOfMaps = new ArrayList<Map<String, Object>>( user.getTopicsMap().size()); for (Entry<String, Date> entry : user.getTopicsMap().entrySet()) { Map<String, Object> map = new LinkedHashMap<String, Object>(2); map.put("name", entry.getKey()); map.put("lastRead", entry.getValue()); listOfMaps.add(map); } b.array(TOPICS, listOfMaps.toArray(new Map[listOfMaps.size()])); return b; } @Override public JUser readDoc(String idAsStr, long version, Map<String, Object> doc) { String userName = idAsStr; JUser user = new JUser(userName); if (doc.get(TWITTER_ID) != null) user.setTwitterId(((Number) doc.get(TWITTER_ID)).longValue()); Boolean active = (Boolean) doc.get(ACTIVE); if (active == null) user.setActive(true); else user.setActive(active); user.setRealName((String) doc.get("realName")); user.setProfileImageUrl((String) doc.get("iconUrl")); user.setWebUrl((String) doc.get("webUrl")); user.setDescription((String) doc.get("bio")); user.setTwitterToken((String) doc.get(TOKEN)); user.setTwitterTokenSecret((String) doc.get("tokenSecret_s")); user.setMode((String) doc.get("mode")); Collection<Map<String, Object>> topics = ((Collection<Map<String, Object>>) doc.get(TOPICS)); if (topics != null) for (Map t : topics) { Date date = Helper.toDateNoNPE((String) t.get("lastRead")); user.updateTopic((String) t.get("name"), date); } if (doc.get("protected") != null) user.setProtected((Boolean) doc.get("protected")); if (doc.get("weekFallback") != null) user.setWeekFallback((Boolean) doc.get("weekFallback")); user.setLastVisit(Helper.toDateNoNPE((String) doc.get(LAST_VISIT_DT))); user.setCreatedAt(Helper.toDateNoNPE((String) doc.get(CREATED_DT))); user.setTwitterCreatedAt(Helper.toDateNoNPE((String) doc.get("twCreatedAt_dt"))); user.setLastFriendsUpdate(Helper.toDateNoNPE((String) doc.get("friendsUpdate_dt"))); user.setFriends((Collection<String>) doc.get("friends")); if (doc.get("followersCount") != null) user.setFollowersCount(((Number) doc.get("followersCount")).intValue()); if (doc.get("friendsCount") != null) user.setFriendsCount(((Number) doc.get("friendsCount")).intValue()); if (doc.get("role") != null) user.setRole((String) doc.get("role")); user.setEmail((String) doc.get(EMAIL)); long counter = 1; while (true) { String qString = (String) doc.get("ss_" + counter + "_query_s"); if (qString == null) // backward compatibility break; TweetQuery q = TweetQuery.parseQuery(qString); SavedSearch ss = new SavedSearch(counter, q); ss.setLastQueryDate(Helper.toDateNoNPE((String) doc.get("ss_" + counter + "_last_dt"))); user.addSavedSearch(ss); counter++; } // only used for facet search? doc.get("lang"); Collection<Object> tags = (Collection<Object>) doc.get("tag"); if (tags != null) for (Object tag : tags) { user.addTag((String) tag); } Collection<Object> langs = (Collection<Object>) doc.get("lang"); if (langs != null) for (Object lang : langs) { user.addLanguage((String) lang); } return user; } SearchResponse prepareFindBy(String key, Object value) { SearchRequestBuilder srb = createSearchBuilder(); // fastest method. we only expect one or two objects srb.setSearchType(SearchType.QUERY_AND_FETCH); srb.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), FilterBuilders.termFilter(key, value))); return srb.execute().actionGet(); } /** * Deprecated. Use findById * @param token * @return */ public JUser findByTwitterToken(String token) { try { Collection<JUser> res = collectObjects(prepareFindBy(TOKEN, token)); if (res.isEmpty()) return null; else if (res.size() == 1) return res.iterator().next(); else throw new IllegalStateException("token search:" + token + " returns more than one users:" + res); } catch (Exception ex) { logger.error("Couldn't load user with token:" + token + " " + ex.getMessage()); return null; } } /** * @return the user with the specified twitter id */ public JUser findById(long userTwitterId) { try { Collection<JUser> res = collectObjects(prepareFindBy(TWITTER_ID, userTwitterId)); if (res.isEmpty()) return null; else if (res.size() == 1) { JUser u = res.iterator().next(); u.setTwitterId(userTwitterId); return u; } else throw new IllegalStateException("userId search:" + userTwitterId + " returns more than one users:" + res); } catch (Exception ex) { logger.error("Couldn't load user with userId:" + userTwitterId + " " + ex.getMessage()); return null; } } public JUser findByScreenName(String name) { try { name = name.toLowerCase(); GetRequestBuilder grb = client.prepareGet(getIndexName(), getIndexType(), name); GetResponse gr = grb.execute().actionGet(); if (gr.isExists()) return readDoc(gr.id(), gr.version(), gr.sourceAsMap()); } catch (Exception ex) { logger.error("Couldn't load user with screenName:" + name + " " + ex.getMessage()); } return null; } public JUser findByEmail(String email) { try { email = email.toLowerCase(); Collection<JUser> res = collectObjects(prepareFindBy(EMAIL, email)); if (res.isEmpty()) return null; else if (res.size() == 1) return res.iterator().next(); else throw new IllegalStateException("email search:" + email + " returns more than one users:" + res); } catch (Exception ex) { logger.error("Couldn't load user with email:" + email + " " + ex.getMessage()); return null; } } public Collection<JUser> findByTopic(String topic, int size) { try { SearchRequestBuilder srb = createSearchBuilder(); srb.addSort(LAST_VISIT_DT, SortOrder.DESC); srb.setQuery(QueryBuilders.queryString(topic).defaultOperator(Operator.AND).defaultField(TOPICS + ".name"). allowLeadingWildcard(false).useDisMax(true)); srb.setSize(size); SearchResponse rsp = srb.execute().actionGet(); logger.info("[user.findByTopic] took:" + rsp.getTookInMillis() / 1000f + " topic:" + topic + " hits:" + rsp.getHits().totalHits()); List<JUser> list = collectObjects(rsp); Collections.shuffle(list); return list; } catch (Exception ex) { return Collections.emptyList(); } } public Collection<String> getQueryTerms() { SearchRequestBuilder srb = createSearchBuilder(); FilteredQueryBuilder fb = QueryBuilders.filteredQuery( QueryBuilders.matchAllQuery(), FilterBuilders.existsFilter(TOKEN)); srb.addFacet(FacetBuilders.termsFacet(QUERY_TERMS).field(QUERY_TERMS).size(1000)). setQuery(fb); SearchResponse rsp = srb.execute().actionGet(); // SearchResponse rsp = search(new UserQuery().addFacetField(QUERY_TERMS, 1000)); TermsFacet tf = (TermsFacet) rsp.getFacets().facet(QUERY_TERMS); if (tf.entries() != null && !tf.entries().isEmpty()) { Collection<String> res = new ArrayList<String>(); for (TermsFacet.Entry cnt : tf.entries()) { if (cnt.getCount() > 0) res.add(cnt.getTerm()); } return res; } return new ArrayList(1); } // topics is analyzed!! // public Collection<String> getTopics() { // SearchRequestBuilder srb = createSearchBuilder(); // FilteredQueryBuilder fb = QueryBuilders.filteredQuery( // QueryBuilders.matchAllQuery(), FilterBuilders.existsFilter(TOKEN)); // // srb.addFacet(FacetBuilders.termsFacet(TOPICS).field(TOPICS).size(1000)). // setQuery(fb); // SearchResponse rsp = srb.execute().actionGet(); // TermsFacet tf = (TermsFacet) rsp.getFacets().facet(TOPICS); // if (tf.entries() != null && !tf.entries().isEmpty()) { // Collection<String> res = new ArrayList<String>(); // for (TermsFacet.Entry cnt : tf.entries()) { // if (cnt.getCount() > 0) // res.add(cnt.getTerm()); // } // return res; // } // return Collections.emptyList(); // } @Override public SearchResponse query(JetwickQuery query) { return query(new ArrayList(), query); } public SearchResponse query(Collection<JUser> users, JetwickQuery query) { SearchRequestBuilder srb = createSearchBuilder(); SearchResponse response = query.initRequestBuilder(srb).execute().actionGet(); users.addAll(collectObjects(response)); return response; } /** use createQuery + search instead */ @Deprecated Collection<JUser> search(String string) { Set<JUser> ret = new LinkedHashSet<JUser>(); search(ret, string, 10, 0); return ret; } /** use createQuery + search instead */ @Deprecated long search(Collection<JUser> users, String qStr, int hitsPerPage, int page) { JetwickQuery query = new UserQuery(qStr); query.attachPagability(page, hitsPerPage); SearchResponse rsp = query(users, query); return rsp.getHits().totalHits(); } void setTermMinFrequency(int tmf) { termMinFrequency = tmf; } public void searchLastLoggedIn(Set<JUser> users, int from, int size) { SearchRequestBuilder srb = createSearchBuilder(); srb.setQuery(QueryBuilders.filteredQuery( QueryBuilders.matchAllQuery(), FilterBuilders.andFilter( FilterBuilders.existsFilter(TOKEN), FilterBuilders.termFilter(ACTIVE, true)))); srb.setFrom(from); srb.setSize(size); // prefer last logged in users srb.addSort(ElasticUserSearch.CREATED_DT, SortOrder.DESC); users.addAll(collectObjects(srb.execute().actionGet())); } }