/** * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.jetwick.tw; import java.util.Date; import com.google.inject.Module; import de.jetwick.config.DefaultModule; import de.jetwick.data.JTag; import de.jetwick.snacktory.JResult; import java.util.LinkedHashSet; import java.util.Set; import de.jetwick.JetwickTestClass; import de.jetwick.es.ElasticTagSearchTest; import org.junit.AfterClass; import org.junit.BeforeClass; import de.jetwick.es.ElasticTweetSearch; import de.jetwick.es.ElasticUserSearch; import de.jetwick.es.ElasticTweetSearchTest; import de.jetwick.es.ElasticUserSearchTest; import de.jetwick.data.JTweet; import de.jetwick.data.JUser; import de.jetwick.data.UrlEntry; import de.jetwick.es.TweetQuery; import de.jetwick.snacktory.HtmlFetcher; import de.jetwick.util.GenericUrlResolver; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.junit.After; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.*; /** * * @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net */ public class TweetCollectorIntegrationTestClass extends JetwickTestClass { private ElasticUserSearchTest userSearchTester = new ElasticUserSearchTest(); private ElasticTweetSearchTest tweetSearchTester = new ElasticTweetSearchTest(); private ElasticTagSearchTest tagSearchTester = new ElasticTagSearchTest(); private GenericUrlResolver urlResolver = new GenericUrlResolver(100).setResolveThreads(2); @BeforeClass public static void beforeClass() { ElasticTweetSearchTest.beforeClass(); } @AfterClass public static void afterClass() { ElasticTweetSearchTest.afterClass(); } @Override @Before public void setUp() throws Exception { tagSearchTester.setUp(); userSearchTester.setUp(); tweetSearchTester.setUp(); super.setUp(); } @After @Override public void tearDown() throws Exception { super.tearDown(); tagSearchTester.tearDown(); userSearchTester.tearDown(); tweetSearchTester.tearDown(); } @Test public void testUrlResolving() throws Exception { final Map<Thread, Throwable> exceptionMap = new HashMap<Thread, Throwable>(); Thread.UncaughtExceptionHandler excHandler = createExceptionMapHandler(exceptionMap); // fill DB with one default tag tagSearchTester.getSearch().bulkUpdate(Arrays.asList(new JTag("java")), tagSearchTester.getSearch().getIndexName(), true); ElasticTweetSearch tweetSearch = tweetSearchTester.getSearch(); ElasticUserSearch userSearch = userSearchTester.getSearch(); TwitterSearch tws = new TwitterSearch() { @Override public boolean isInitialized() { return true; } @Override public long search(String q, Collection<JTweet> result, int tweets, long lastMillis) { JUser u = new JUser("timetabling"); JTweet tw1 = new JTweet(1L, "... Egypt. http://apne.ws/dERa4A - XY #tEst", u); result.add(tw1); return lastMillis; } @Override public List<JTweet> getTweets(JUser user, Collection<JUser> users, int twPerPage) { return Collections.EMPTY_LIST; } }; TweetConsumer tweetConsumer = getInstance(TweetConsumer.class); tweetConsumer.setUncaughtExceptionHandler(excHandler); tweetConsumer.start(); TweetProducer tweetProducer = getInstance(TweetProducer.class); tweetProducer.setTwitterSearch(tws); tweetProducer.setUserSearch(userSearch); tweetProducer.setTagSearch(tagSearchTester.getSearch()); tweetProducer.setQueue(tweetConsumer.register("tweet-producer", Integer.MAX_VALUE, 100)); Thread tweetProducerThread = new Thread(tweetProducer); tweetProducerThread.setUncaughtExceptionHandler(excHandler); tweetProducerThread.start(); // wait so let consumer consume Thread.sleep(500); tweetConsumer.interrupt(); tweetProducerThread.interrupt(); checkExceptions(exceptionMap); tweetSearch.forceEmptyQueueAndRefresh(); List<JTweet> res = tweetSearch.searchTweets(new TweetQuery().addFilterQuery(ElasticTweetSearch.USER, "timetabling")); assertEquals(1, res.size()); assertEquals(1, res.get(0).getUrlEntries().size()); // no resolving for now assertTrue(res.get(0).getUrlEntries().iterator().next().getResolvedUrl().equals("http://apne.ws/dERa4A")); } @Test public void testProduceTweets() throws InterruptedException, Exception { final Map<Thread, Throwable> exceptionMap = new HashMap<Thread, Throwable>(); Thread.UncaughtExceptionHandler excHandler = createExceptionMapHandler(exceptionMap); // fill DB with one default tag tagSearchTester.getSearch().bulkUpdate(Arrays.asList(new JTag("java")), tagSearchTester.getSearch().getIndexName(), true); ElasticUserSearch userSearch = userSearchTester.getSearch(); ElasticTweetSearch tweetSearch = tweetSearchTester.getSearch(); // already existing tweets must not harm tweetSearch.store(new JTweet(3L, "duplication tweet", new JUser("tmp")), true); TwitterSearch tws = new TwitterSearch() { @Override public boolean isInitialized() { return true; } @Override public long search(String q, Collection<JTweet> result, int tweets, long lastMillis) { JUser u = new JUser("timetabling"); JTweet tw1 = new JTweet(1L, "test", u); result.add(tw1); tw1 = new JTweet(2L, "java test", u); result.add(tw1); // this tweet will be ignored and so it won't be indexed! tw1 = new JTweet(3L, "duplicate tweet", new JUser("anotheruser")); result.add(tw1); tw1 = new JTweet(4L, "reference a user: @timetabling", new JUser("user3")); result.add(tw1); assertEquals(4, result.size()); return lastMillis; } @Override public List<JTweet> getTweets(JUser user, Collection<JUser> users, int twPerPage) { return Collections.EMPTY_LIST; } }; TweetConsumer tweetConsumer = getInstance(TweetConsumer.class); tweetConsumer.setUncaughtExceptionHandler(excHandler); tweetConsumer.start(); TweetProducer tweetProducer = getInstance(TweetProducer.class); tweetProducer.setTwitterSearch(tws); tweetProducer.setUserSearch(userSearch); tweetProducer.setTagSearch(tagSearchTester.getSearch()); tweetProducer.setQueue(tweetConsumer.register("tweet-producer", Integer.MAX_VALUE, 100)); Thread tweetProducerThread = new Thread(tweetProducer); tweetProducerThread.setUncaughtExceptionHandler(excHandler); tweetProducerThread.start(); Thread.sleep(500); tweetConsumer.interrupt(); checkExceptions(exceptionMap); tweetSearch.forceEmptyQueueAndRefresh(); Set<JUser> users = new LinkedHashSet<JUser>(); tweetSearch.query(users, new TweetQuery().addFilterQuery(ElasticTweetSearch.USER, "timetabling")); assertEquals(2, users.iterator().next().getOwnTweets().size()); List<JUser> res = new ArrayList<JUser>(); tweetSearch.query(res, new TweetQuery("java")); assertEquals(1, res.size()); Collection<JTweet> coll = tweetSearch.searchTweets(new TweetQuery("duplicate")); assertEquals(1, coll.size()); assertEquals("duplication tweet", coll.iterator().next().getText()); coll = tweetSearch.searchTweets(new TweetQuery("duplication")); assertEquals(1, coll.size()); assertEquals("duplication tweet", coll.iterator().next().getText()); } @Test public void testArticleContains2Sources() throws InterruptedException, Exception { final Map<Thread, Throwable> exceptionMap = new HashMap<Thread, Throwable>(); Thread.UncaughtExceptionHandler excHandler = createExceptionMapHandler(exceptionMap); // fill DB with one default tag tagSearchTester.getSearch().bulkUpdate(Arrays.asList(new JTag("java")), tagSearchTester.getSearch().getIndexName(), true); TwitterSearch tws = new TwitterSearch() { @Override public boolean isInitialized() { return true; } @Override public long search(String q, Collection<JTweet> result, int tweets, long lastMillis) { // make retweet older otherwise no retweet detection! Date dt = new Date(); JTweet tw1 = new JTweet(10L, "A new #browser performance test: Rendering the #linux kernel impact graph on #github: http://t.co/0NCINwv", new JUser("jbandi")).setCreatedAt(dt); result.add(tw1); JTweet tw2 = new JTweet(11L, "RT @jbandi: A new #browser performance test: Rendering the #linux kernel impact graph on #github: http://t.co/0NCINwv", new JUser("adietisheim")).setCreatedAt(new Date(dt.getTime() + 1)); result.add(tw2); return lastMillis; } @Override public List<JTweet> getTweets(JUser user, Collection<JUser> users, int twPerPage) { return Collections.EMPTY_LIST; } }; ElasticTweetSearch tweetSearch = getInstance(ElasticTweetSearch.class); GenericUrlResolver resolver = getInstance(GenericUrlResolver.class); tweetSearch.addListener(resolver); TweetConsumer tweetConsumer = getInstance(TweetConsumer.class); tweetConsumer.setUncaughtExceptionHandler(excHandler); tweetConsumer.start(); TweetProducer tweetProducer = getInstance(TweetProducer.class); tweetProducer.setTwitterSearch(tws); tweetProducer.setUserSearch(getInstance(ElasticUserSearch.class)); tweetProducer.setTagSearch(tagSearchTester.getSearch()); tweetProducer.setQueue(tweetConsumer.register("tweet-producer", Integer.MAX_VALUE, 100)); Thread tweetProducerThread = new Thread(tweetProducer); tweetProducerThread.setUncaughtExceptionHandler(excHandler); tweetProducerThread.start(); // let tweetconsumer do its work Thread.sleep(500); tweetSearch.forceEmptyQueueAndRefresh(); List<JTweet> tweets = tweetSearch.searchTweets(new TweetQuery().setSort(ElasticTweetSearch.RT_COUNT, "desc")); assertEquals(2, tweets.size()); assertEquals(1, tweets.get(0).getRetweetCount()); assertEquals(0, tweets.get(1).getRetweetCount()); } @Override public Module createModule() { return new DefaultModule() { @Override public void installSearchModule() { bind(ElasticUserSearch.class).toInstance(userSearchTester.getSearch()); bind(ElasticTweetSearch.class).toInstance(tweetSearchTester.getSearch()); } @Override public GenericUrlResolver createGenericUrlResolver() { return urlResolver; } @Override public HtmlFetcher createHtmlFetcher() { return new HtmlFetcher() { @Override public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception { return UrlEntry.createSimpleResult(url); } @Override public String getResolvedUrl(String urlAsString, int timeout) { // TODO NOW resolved url can be different!! return urlAsString; } }; } }; } }