package qa.qcri.aidr.collector.collectors; import java.io.Closeable; import java.io.IOException; import java.util.Arrays; import java.util.List; import org.apache.log4j.Logger; import qa.qcri.aidr.collector.beans.CollectionTask; import qa.qcri.aidr.collector.beans.TwitterCollectionTask; import qa.qcri.aidr.collector.utils.CollectorConfigurator; import qa.qcri.aidr.collector.utils.CollectorConfigurationProperty; import qa.qcri.aidr.common.redis.LoadShedder; import twitter4j.FilterQuery; import twitter4j.TwitterStream; import twitter4j.TwitterStreamFactory; import twitter4j.conf.Configuration; import twitter4j.conf.ConfigurationBuilder; /** * This class is responsible for managing all the resources associated with the * thread which pulls tweets for the given collection task. * */ public class TwitterStreamTracker implements Closeable { private static Logger logger = Logger.getLogger(TwitterStreamTracker.class.getName()); private static CollectorConfigurator configProperties = CollectorConfigurator.getInstance(); private TwitterStream twitterStream; private FilterQuery query; private JedisPublisher publisherJedis; public TwitterStreamTracker(TwitterCollectionTask task) throws ParseException{ logger.info("Waiting to aquire Jedis connection for collection " + task.getCollectionCode()); this.query = task2query(task); Configuration config = task2configuration(task); this.publisherJedis = JedisPublisher.newInstance(); logger.info("Jedis connection acquired for collection " + task.getCollectionCode()); String channelName = configProperties.getProperty(CollectorConfigurationProperty.COLLECTOR_CHANNEL) + "." + task.getCollectionCode(); LoadShedder shedder = new LoadShedder( Integer.parseInt(configProperties.getProperty(CollectorConfigurationProperty.PERSISTER_LOAD_LIMIT)), Integer.parseInt(configProperties.getProperty(CollectorConfigurationProperty.PERSISTER_LOAD_CHECK_INTERVAL_MINUTES)), true,channelName); TwitterStatusListener listener = new TwitterStatusListener(task, channelName); listener.addFilter(new ShedderFilter(channelName, shedder)); if ("strict".equals(task.getGeoR())) { listener.addFilter(new StrictLocationFilter(task)); logger.info("Added StrictLocationFilter for collection = " + task.getCollectionCode() + ", BBox: " + task.getGeoLocation()); } // Added by koushik if (task.isToFollowAvailable()) { listener.addFilter(new FollowFilter(task)); logger.info("Added FollowFilter for collection = " + task.getCollectionCode() + ", toFollow: " + task.getToFollow()); } if (task.isToTrackAvailable() && (task.isGeoLocationAvailable() || task.isToFollowAvailable())) { // New default behavior: filter tweets received from geolocation and/or followed users using tracked keywords // Note: this override the default and old behavior of ORing the filter conditions by Twitter listener.addFilter(new TrackFilter(task)); logger.info("Added TrackFilter for collection = " + task.getCollectionCode() + ", toTrack: " + task.getToTrack()); } listener.addPublisher(publisherJedis); long threhold = Long.parseLong(configProperties.getProperty(CollectorConfigurationProperty.COLLECTOR_REDIS_COUNTER_UPDATE_THRESHOLD)); String cacheKey = task.getCollectionCode(); listener.addPublisher(new StatusPublisher(cacheKey, threhold)); twitterStream = new TwitterStreamFactory(config).getInstance(); twitterStream.addListener(listener); twitterStream.addConnectionLifeCycleListener(listener); } /** * This method internally creates a thread which manipulates TwitterStream * and calls these adequate listener methods continuously */ public void start() { twitterStream.filter(query); } public void close() throws IOException { twitterStream.cleanUp(); twitterStream.shutdown(); publisherJedis.close(); logger.info("AIDR-Fetcher: Collection stopped which was tracking " + query); } private static Configuration task2configuration(CollectionTask task) { ConfigurationBuilder configurationBuilder = new ConfigurationBuilder(); configurationBuilder.setDebugEnabled(false) .setJSONStoreEnabled(true) .setOAuthConsumerKey(configProperties.getProperty(CollectorConfigurationProperty.TWITTER_CONSUMER_KEY)) .setOAuthConsumerSecret(configProperties.getProperty(CollectorConfigurationProperty.TWITTER_CONSUMER_SECRET)) .setOAuthAccessToken(task.getAccessToken()) .setOAuthAccessTokenSecret(task.getAccessTokenSecret()); Configuration configuration = configurationBuilder.build(); return configuration; } /*default*/ static FilterQuery task2query(TwitterCollectionTask collectionTask) throws NumberFormatException { FilterQuery query; query = new FilterQuery(); String toTrack = collectionTask.getToTrack(); if (toTrack != null && !toTrack.isEmpty()) query.track(toTrack.split(",")); String toFollow = collectionTask.getToFollow(); if (toFollow != null && !toFollow.isEmpty()) { List<String> list = Arrays.asList(toFollow.split(",")); // TODO: Java 8 update. Replace the following block with one single line // query.follow(list.stream().mapToLong(Long::parseLong).toArray()); long[] tmp = new long[list.size()]; for (int i=0; i<list.size(); ++i) { long val = Long.parseLong(list.get(i)); tmp[i] = val; //System.out.println("Will follow twitter user ID: " + tmp[i]); } query.follow(tmp); // End of Java 8 Update } String locations = collectionTask.getGeoLocation(); if (locations != null && !locations.isEmpty()) { List<String> list = Arrays.asList(locations.split(",")); // TODO: Java 8 update. Replace the following block with one single line // double[] flat = list.stream().mapToDouble(Double::parseDouble).toArray(); double[] flat = new double[list.size()]; for (int i=0; i<list.size(); ++i) { double val = Double.parseDouble(list.get(i)); flat[i] = val; } // End of Java 8 Update assert flat.length % 4 == 0; double[][] square = new double[flat.length / 2][2]; for (int i = 0; i < flat.length; i = i + 2) { // Read 2 elements at a time, into each 2-element sub-array // of 'locations' square[i / 2][0] = flat[i]; square[i / 2][1] = flat[i + 1]; } query.locations(square); } String language = collectionTask.getLanguageFilter(); if (language != null && !language.isEmpty()) query.language(language.split(",")); return query; } }