/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.twitter; import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.regex.Pattern; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.lifecycle.OnStopped; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.Validator; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; import com.twitter.hbc.ClientBuilder; import com.twitter.hbc.core.Client; import com.twitter.hbc.core.Constants; import com.twitter.hbc.core.endpoint.Location ; import com.twitter.hbc.core.endpoint.Location.Coordinate ; import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; import com.twitter.hbc.core.endpoint.StatusesFirehoseEndpoint; import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint; import com.twitter.hbc.core.endpoint.StreamingEndpoint; import com.twitter.hbc.core.event.Event; import com.twitter.hbc.core.processor.StringDelimitedProcessor; import com.twitter.hbc.httpclient.auth.Authentication; import com.twitter.hbc.httpclient.auth.OAuth1; @SupportsBatching @InputRequirement(Requirement.INPUT_FORBIDDEN) @Tags({"twitter", "tweets", "social media", "status", "json"}) @CapabilityDescription("Pulls status changes from Twitter's streaming API") @WritesAttribute(attribute = "mime.type", description = "Sets mime type to application/json") public class GetTwitter extends AbstractProcessor { static final AllowableValue ENDPOINT_SAMPLE = new AllowableValue("Sample Endpoint", "Sample Endpoint", "The endpoint that provides public data, aka a 'garden hose'"); static final AllowableValue ENDPOINT_FIREHOSE = new AllowableValue("Firehose Endpoint", "Firehose Endpoint", "The endpoint that provides access to all tweets"); static final AllowableValue ENDPOINT_FILTER = new AllowableValue("Filter Endpoint", "Filter Endpoint", "Endpoint that allows the stream to be filtered by specific terms or User IDs"); public static final PropertyDescriptor ENDPOINT = new PropertyDescriptor.Builder() .name("Twitter Endpoint") .description("Specifies which endpoint data should be pulled from") .required(true) .allowableValues(ENDPOINT_SAMPLE, ENDPOINT_FIREHOSE, ENDPOINT_FILTER) .defaultValue(ENDPOINT_SAMPLE.getValue()) .build(); public static final PropertyDescriptor CONSUMER_KEY = new PropertyDescriptor.Builder() .name("Consumer Key") .description("The Consumer Key provided by Twitter") .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor CONSUMER_SECRET = new PropertyDescriptor.Builder() .name("Consumer Secret") .description("The Consumer Secret provided by Twitter") .required(true) .sensitive(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor ACCESS_TOKEN = new PropertyDescriptor.Builder() .name("Access Token") .description("The Access Token provided by Twitter") .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor ACCESS_TOKEN_SECRET = new PropertyDescriptor.Builder() .name("Access Token Secret") .description("The Access Token Secret provided by Twitter") .required(true) .sensitive(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor LANGUAGES = new PropertyDescriptor.Builder() .name("Languages") .description("A comma-separated list of languages for which tweets should be fetched") .required(false) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor FOLLOWING = new PropertyDescriptor.Builder() .name("IDs to Follow") .description("A comma-separated list of Twitter User ID's to follow. Ignored unless Endpoint is set to 'Filter Endpoint'.") .required(false) .addValidator(new FollowingValidator()) .build(); public static final PropertyDescriptor LOCATIONS = new PropertyDescriptor.Builder() .name("Locations to Filter On") .description("A comma-separated list of coordinates specifying one or more bounding boxes to filter on." + "Each bounding box is specified by a pair of coordinates in the format: swLon,swLat,neLon,neLat. " + "Multiple bounding boxes can be specified as such: swLon1,swLat1,neLon1,neLat1,swLon2,swLat2,neLon2,neLat2." + "Ignored unless Endpoint is set to 'Filter Endpoint'.") .addValidator(new LocationValidator() ) .required(false) .build(); public static final PropertyDescriptor TERMS = new PropertyDescriptor.Builder() .name("Terms to Filter On") .description("A comma-separated list of terms to filter on. Ignored unless Endpoint is set to 'Filter Endpoint'." + " The filter works such that if any term matches, the status update will be retrieved; multiple terms" + " separated by a space function as an 'AND'. I.e., 'it was, hello' will retrieve status updates that" + " have either 'hello' or both 'it' AND 'was'") .required(false) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("All status updates will be routed to this relationship") .build(); private List<PropertyDescriptor> descriptors; private Set<Relationship> relationships; private final BlockingQueue<Event> eventQueue = new LinkedBlockingQueue<>(1000); private volatile Client client; private volatile BlockingQueue<String> messageQueue = new LinkedBlockingQueue<>(5000); @Override protected void init(final ProcessorInitializationContext context) { final List<PropertyDescriptor> descriptors = new ArrayList<>(); descriptors.add(ENDPOINT); descriptors.add(CONSUMER_KEY); descriptors.add(CONSUMER_SECRET); descriptors.add(ACCESS_TOKEN); descriptors.add(ACCESS_TOKEN_SECRET); descriptors.add(LANGUAGES); descriptors.add(TERMS); descriptors.add(FOLLOWING); descriptors.add(LOCATIONS); this.descriptors = Collections.unmodifiableList(descriptors); final Set<Relationship> relationships = new HashSet<>(); relationships.add(REL_SUCCESS); this.relationships = Collections.unmodifiableSet(relationships); } @Override public Set<Relationship> getRelationships() { return this.relationships; } @Override public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { return descriptors; } @Override protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { return new PropertyDescriptor.Builder() .name(propertyDescriptorName) .description("Adds a query parameter with name '" + propertyDescriptorName + "' to the Twitter query") .required(false) .dynamic(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); } @Override protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) { final List<ValidationResult> results = new ArrayList<>(); final String endpointName = validationContext.getProperty(ENDPOINT).getValue(); if (ENDPOINT_FILTER.getValue().equals(endpointName)) { if (!validationContext.getProperty(TERMS).isSet() && !validationContext.getProperty(FOLLOWING).isSet() && !validationContext.getProperty(LOCATIONS).isSet()) { results.add(new ValidationResult.Builder().input("").subject(FOLLOWING.getName()) .valid(false).explanation("When using the 'Filter Endpoint', at least one of '" + TERMS.getName() + "' or '" + FOLLOWING.getName() + "'" + "' or '" + LOCATIONS.getName() + " must be set").build()); } } return results; } @Override public void onPropertyModified(final PropertyDescriptor descriptor, final String oldValue, final String newValue) { // if any property is modified, the results are no longer valid. Destroy all messages in the queue. messageQueue.clear(); } @OnScheduled public void onScheduled(final ProcessContext context) throws MalformedURLException { final String endpointName = context.getProperty(ENDPOINT).getValue(); final Authentication oauth = new OAuth1(context.getProperty(CONSUMER_KEY).getValue(), context.getProperty(CONSUMER_SECRET).getValue(), context.getProperty(ACCESS_TOKEN).getValue(), context.getProperty(ACCESS_TOKEN_SECRET).getValue()); final ClientBuilder clientBuilder = new ClientBuilder(); clientBuilder.name("GetTwitter[id=" + getIdentifier() + "]") .authentication(oauth) .eventMessageQueue(eventQueue) .processor(new StringDelimitedProcessor(messageQueue)); final String languageString = context.getProperty(LANGUAGES).getValue(); final List<String> languages; if (languageString == null) { languages = null; } else { languages = new ArrayList<>(); for (final String language : context.getProperty(LANGUAGES).getValue().split(",")) { languages.add(language.trim()); } } final String host; final StreamingEndpoint streamingEndpoint; if (ENDPOINT_SAMPLE.getValue().equals(endpointName)) { host = Constants.STREAM_HOST; final StatusesSampleEndpoint sse = new StatusesSampleEndpoint(); streamingEndpoint = sse; if (languages != null) { sse.languages(languages); } } else if (ENDPOINT_FIREHOSE.getValue().equals(endpointName)) { host = Constants.STREAM_HOST; final StatusesFirehoseEndpoint firehoseEndpoint = new StatusesFirehoseEndpoint(); streamingEndpoint = firehoseEndpoint; if (languages != null) { firehoseEndpoint.languages(languages); } } else if (ENDPOINT_FILTER.getValue().equals(endpointName)) { host = Constants.STREAM_HOST; final StatusesFilterEndpoint filterEndpoint = new StatusesFilterEndpoint(); final String followingString = context.getProperty(FOLLOWING).getValue(); final List<Long> followingIds; if (followingString == null) { followingIds = Collections.emptyList(); } else { followingIds = new ArrayList<>(); for (final String split : followingString.split(",")) { final Long id = Long.parseLong(split.trim()); followingIds.add(id); } } final String termString = context.getProperty(TERMS).getValue(); final List<String> terms; if (termString == null) { terms = Collections.emptyList(); } else { terms = new ArrayList<>(); for (final String split : termString.split(",")) { terms.add(split.trim()); } } if (!terms.isEmpty()) { filterEndpoint.trackTerms(terms); } if (!followingIds.isEmpty()) { filterEndpoint.followings(followingIds); } if (languages != null) { filterEndpoint.languages(languages); } final String locationString = context.getProperty(LOCATIONS).getValue(); final List<Location> locations; if (locationString == null) { locations = Collections.emptyList(); } else { locations = LocationUtil.parseLocations(locationString); } if (!locations.isEmpty()) { filterEndpoint.locations(locations); } streamingEndpoint = filterEndpoint ; } else { throw new AssertionError("Endpoint was invalid value: " + endpointName); } clientBuilder.hosts(host).endpoint(streamingEndpoint); client = clientBuilder.build(); client.connect(); } @OnStopped public void shutdownClient() { if (client != null) { client.stop(); } } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final Event event = eventQueue.poll(); if (event != null) { switch (event.getEventType()) { case STOPPED_BY_ERROR: getLogger().error("Received error {}: {} due to {}. Will not attempt to reconnect", new Object[]{event.getEventType(), event.getMessage(), event.getUnderlyingException()}); break; case CONNECTION_ERROR: case HTTP_ERROR: getLogger().error("Received error {}: {}. Will attempt to reconnect", new Object[]{event.getEventType(), event.getMessage()}); client.reconnect(); break; default: break; } } final String tweet = messageQueue.poll(); if (tweet == null) { context.yield(); return; } FlowFile flowFile = session.create(); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { out.write(tweet.getBytes(StandardCharsets.UTF_8)); } }); final Map<String, String> attributes = new HashMap<>(); attributes.put(CoreAttributes.MIME_TYPE.key(), "application/json"); attributes.put(CoreAttributes.FILENAME.key(), flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".json"); flowFile = session.putAllAttributes(flowFile, attributes); session.transfer(flowFile, REL_SUCCESS); session.getProvenanceReporter().receive(flowFile, Constants.STREAM_HOST + client.getEndpoint().getURI().toString()); } private static class FollowingValidator implements Validator { private static final Pattern NUMBER_PATTERN = Pattern.compile("\\d+"); @Override public ValidationResult validate(final String subject, final String input, final ValidationContext context) { final String[] splits = input.split(","); for (final String split : splits) { if (!NUMBER_PATTERN.matcher(split.trim()).matches()) { return new ValidationResult.Builder().input(input).subject(subject).valid(false).explanation("Must be comma-separted list of User ID's").build(); } } return new ValidationResult.Builder().subject(subject).input(input).valid(true).build(); } } private static class LocationValidator implements Validator { @Override public ValidationResult validate(final String subject, final String input, final ValidationContext context) { try { final List<Location> locations = LocationUtil.parseLocations(input); for (final Location location : locations) { final Coordinate sw = location.southwestCoordinate(); final Coordinate ne = location.northeastCoordinate(); if (sw.longitude() > ne.longitude()) { return new ValidationResult.Builder().input(input).subject(subject).valid(false) .explanation("SW Longitude (" + sw.longitude() + ") must be less than NE Longitude (" + ne.longitude() + ").").build(); } if (sw.longitude() == ne.longitude()) { return new ValidationResult.Builder().input(input).subject(subject).valid(false) .explanation("SW Longitude (" + sw.longitude() + ") can not be equal to NE Longitude (" + ne.longitude() + ").").build(); } if (sw.latitude() > ne.latitude()) { return new ValidationResult.Builder().input(input).subject(subject).valid(false) .explanation("SW Latitude (" + sw.latitude() + ") must be less than NE Latitude (" + ne.latitude() + ").").build(); } if (sw.latitude() == ne.latitude()) { return new ValidationResult.Builder().input(input).subject(subject).valid(false) .explanation("SW Latitude (" + sw.latitude() + ") can not be equal to NE Latitude (" + ne.latitude() + ").").build(); } } return new ValidationResult.Builder().subject(subject).input(input).valid(true).build(); } catch (IllegalStateException e) { return new ValidationResult.Builder() .input(input).subject(subject).valid(false) .explanation("Must be a comma-separated list of longitude,latitude pairs specifying one or more bounding boxes.") .build(); } } } }