/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.twitter.provider; import org.apache.streams.config.ComponentConfigurator; import org.apache.streams.config.StreamsConfiguration; import org.apache.streams.config.StreamsConfigurator; import org.apache.streams.core.DatumStatus; import org.apache.streams.core.DatumStatusCountable; import org.apache.streams.core.DatumStatusCounter; import org.apache.streams.core.StreamsDatum; import org.apache.streams.core.StreamsProvider; import org.apache.streams.core.StreamsResultSet; import org.apache.streams.jackson.StreamsJacksonMapper; import org.apache.streams.twitter.TwitterStreamConfiguration; import org.apache.streams.twitter.converter.TwitterDateTimeFormat; import org.apache.streams.util.ComponentUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.Uninterruptibles; import com.twitter.hbc.ClientBuilder; import com.twitter.hbc.core.Constants; import com.twitter.hbc.core.Hosts; import com.twitter.hbc.core.HttpHosts; import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; import com.twitter.hbc.core.endpoint.StatusesFirehoseEndpoint; import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint; import com.twitter.hbc.core.endpoint.StreamingEndpoint; import com.twitter.hbc.core.endpoint.UserstreamEndpoint; import com.twitter.hbc.httpclient.BasicClient; import com.twitter.hbc.httpclient.auth.Authentication; import com.twitter.hbc.httpclient.auth.BasicAuth; import com.twitter.hbc.httpclient.auth.OAuth1; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import com.typesafe.config.ConfigParseOptions; import org.apache.commons.lang.NotImplementedException; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; import java.io.Serializable; import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Queue; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; /** * TwitterStreamProvider wraps a hosebird client and passes received documents * to subscribing components. */ public class TwitterStreamProvider implements StreamsProvider, Serializable, DatumStatusCountable { public static final String STREAMS_ID = "TwitterStreamProvider"; private static final Logger LOGGER = LoggerFactory.getLogger(TwitterStreamProvider.class); /** * To use from command line: * * <p/> * Supply (at least) the following required configuration in application.conf: * * <p/> * twitter.oauth.consumerKey * twitter.oauth.consumerSecret * twitter.oauth.accessToken * twitter.oauth.accessTokenSecret * * <p/> * Launch using: * * <p/> * mvn exec:java -Dexec.mainClass=org.apache.streams.twitter.provider.TwitterStreamProvider -Dexec.args="application.conf tweets.json" * * @param args */ public static void main(String[] args) { Preconditions.checkArgument(args.length >= 2); String configfile = args[0]; String outfile = args[1]; Config reference = ConfigFactory.load(); File file = new File(configfile); assert (file.exists()); Config testResourceConfig = ConfigFactory.parseFileAnySyntax(file, ConfigParseOptions.defaults().setAllowMissing(false)); Config typesafe = testResourceConfig.withFallback(reference).resolve(); StreamsConfiguration streamsConfiguration = StreamsConfigurator.detectConfiguration(typesafe); TwitterStreamConfiguration config = new ComponentConfigurator<>(TwitterStreamConfiguration.class).detectConfiguration(typesafe, "twitter"); TwitterStreamProvider provider = new TwitterStreamProvider(config); ObjectMapper mapper = StreamsJacksonMapper.getInstance(Collections.singletonList(TwitterDateTimeFormat.TWITTER_FORMAT)); PrintStream outStream; try { outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outfile))); } catch (FileNotFoundException ex) { LOGGER.error("FileNotFoundException", ex); return; } provider.prepare(config); provider.startStream(); do { Uninterruptibles.sleepUninterruptibly(streamsConfiguration.getBatchFrequencyMs(), TimeUnit.MILLISECONDS); for (StreamsDatum datum : provider.readCurrent()) { String json; try { json = mapper.writeValueAsString(datum.getDocument()); outStream.println(json); } catch (JsonProcessingException ex) { System.err.println(ex.getMessage()); } } } while ( provider.isRunning()); provider.cleanUp(); outStream.flush(); } private static final int MAX_BATCH = 1000; private TwitterStreamConfiguration config; public TwitterStreamConfiguration getConfig() { return config; } public void setConfig(TwitterStreamConfiguration config) { this.config = config; } private volatile Queue<Future<List<StreamsDatum>>> providerQueue; private Authentication auth; protected StreamingEndpoint endpoint; private BasicClient client; private AtomicBoolean running = new AtomicBoolean(false); protected TwitterStreamHelper processor = new TwitterStreamHelper(this); private DatumStatusCounter countersCurrent = new DatumStatusCounter(); private DatumStatusCounter countersTotal = new DatumStatusCounter(); public TwitterStreamProvider() { this.config = new ComponentConfigurator<>(TwitterStreamConfiguration.class).detectConfiguration(StreamsConfigurator.config, "twitter"); } public TwitterStreamProvider(TwitterStreamConfiguration config) { this.config = config; } @Override public String getId() { return STREAMS_ID; } @Override public void startStream() { client.connect(); running.set(true); } @Override public synchronized StreamsResultSet readCurrent() { StreamsResultSet current; synchronized (this) { Queue<StreamsDatum> drain = new LinkedBlockingDeque<>(); drainTo(drain); current = new StreamsResultSet(drain); current.setCounter(new DatumStatusCounter()); current.getCounter().add(countersCurrent); countersTotal.add(countersCurrent); countersCurrent = new DatumStatusCounter(); } return current; } @Override public StreamsResultSet readNew(BigInteger sequence) { throw new NotImplementedException(); } @Override public StreamsResultSet readRange(DateTime start, DateTime end) { throw new NotImplementedException(); } @Override public boolean isRunning() { return this.running.get() && !client.isDone(); } @Override public void prepare(Object configurationObject) { Objects.requireNonNull(config.getEndpoint()); Hosts hosebirdHosts; if (config.getEndpoint().equals("userstream") ) { hosebirdHosts = new HttpHosts(Constants.USERSTREAM_HOST); UserstreamEndpoint userstreamEndpoint = new UserstreamEndpoint(); userstreamEndpoint.withFollowings(true); userstreamEndpoint.withUser(false); userstreamEndpoint.allReplies(false); endpoint = userstreamEndpoint; } else if (config.getEndpoint().equals("sample") ) { hosebirdHosts = new HttpHosts(Constants.STREAM_HOST); boolean track = config.getTrack() != null && !config.getTrack().isEmpty(); boolean follow = config.getFollow() != null && !config.getFollow().isEmpty(); if ( track || follow ) { LOGGER.debug("***\tPRESENT\t***"); StatusesFilterEndpoint statusesFilterEndpoint = new StatusesFilterEndpoint(); if ( track ) { statusesFilterEndpoint.trackTerms(config.getTrack()); } if ( follow ) { statusesFilterEndpoint.followings(config.getFollow()); } this.endpoint = statusesFilterEndpoint; } else { endpoint = new StatusesSampleEndpoint(); } } else if ( config.getEndpoint().endsWith("firehose")) { hosebirdHosts = new HttpHosts(Constants.STREAM_HOST); endpoint = new StatusesFirehoseEndpoint(); } else { LOGGER.error("NO ENDPOINT RESOLVED"); return; } if ( config.getBasicauth() != null ) { Objects.requireNonNull(config.getBasicauth().getUsername()); Objects.requireNonNull(config.getBasicauth().getPassword()); auth = new BasicAuth( config.getBasicauth().getUsername(), config.getBasicauth().getPassword() ); } else if ( config.getOauth() != null ) { Objects.requireNonNull(config.getOauth().getConsumerKey()); Objects.requireNonNull(config.getOauth().getConsumerSecret()); Objects.requireNonNull(config.getOauth().getAccessToken()); Objects.requireNonNull(config.getOauth().getAccessTokenSecret()); auth = new OAuth1(config.getOauth().getConsumerKey(), config.getOauth().getConsumerSecret(), config.getOauth().getAccessToken(), config.getOauth().getAccessTokenSecret()); } else { LOGGER.error("NO AUTH RESOLVED"); return; } LOGGER.debug("host={}\tendpoint={}\taut={}", hosebirdHosts, endpoint, auth); providerQueue = new LinkedBlockingQueue<>(MAX_BATCH); client = new ClientBuilder() .name("apache/streams/streams-contrib/streams-provider-twitter") .hosts(hosebirdHosts) .endpoint(endpoint) .authentication(auth) .connectionTimeout(1200000) .processor(processor) .build(); } @Override public void cleanUp() { this.client.stop(); this.processor.cleanUp(); this.running.set(false); } @Override public DatumStatusCounter getDatumStatusCounter() { return countersTotal; } protected boolean addDatum(Future<List<StreamsDatum>> future) { try { ComponentUtils.offerUntilSuccess(future, providerQueue); countersCurrent.incrementStatus(DatumStatus.SUCCESS); return true; } catch (Exception ex) { countersCurrent.incrementStatus(DatumStatus.FAIL); LOGGER.warn("Unable to enqueue item from Twitter stream"); return false; } } protected void drainTo(Queue<StreamsDatum> drain) { int count = 0; while (!providerQueue.isEmpty() && count <= MAX_BATCH) { for (StreamsDatum datum : pollForDatum()) { ComponentUtils.offerUntilSuccess(datum, drain); count++; } } } protected List<StreamsDatum> pollForDatum() { try { return providerQueue.poll().get(); } catch (InterruptedException ex) { LOGGER.warn("Interrupted while waiting for future. Initiate shutdown."); this.cleanUp(); Thread.currentThread().interrupt(); return new ArrayList<>(); } catch (ExecutionException ex) { LOGGER.warn("Error getting tweet from future"); return new ArrayList<>(); } } }