/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.azure.eventhub;
import com.microsoft.azure.eventhubs.EventData;
import com.microsoft.azure.eventhubs.EventHubClient;
import com.microsoft.azure.eventhubs.PartitionReceiver;
import com.microsoft.azure.servicebus.ConnectionStringBuilder;
import com.microsoft.azure.servicebus.ServiceBusException;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.util.StopWatch;
import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
@Tags({"azure", "microsoft", "cloud", "eventhub", "events", "streaming", "streams"})
@CapabilityDescription("Receives messages from a Microsoft Azure Event Hub, writing the contents of the Azure message to the content of the FlowFile")
@InputRequirement(Requirement.INPUT_FORBIDDEN)
@WritesAttributes({
@WritesAttribute(attribute = "eventhub.enqueued.timestamp", description = "The time (in milliseconds since epoch, UTC) at which the message was enqueued in the Azure Event Hub"),
@WritesAttribute(attribute = "eventhub.offset", description = "The offset into the partition at which the message was stored"),
@WritesAttribute(attribute = "eventhub.sequence", description = "The Azure Sequence number associated with the message"),
@WritesAttribute(attribute = "eventhub.name", description = "The name of the Event Hub from which the message was pulled"),
@WritesAttribute(attribute = "eventhub.partition", description = "The name of the Azure Partition from which the message was pulled")
})
public class GetAzureEventHub extends AbstractProcessor {
static final PropertyDescriptor EVENT_HUB_NAME = new PropertyDescriptor.Builder()
.name("Event Hub Name")
.description("The name of the Azure Event Hub to pull messages from")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.required(true)
.build();
static final PropertyDescriptor NAMESPACE = new PropertyDescriptor.Builder()
.name("Event Hub Namespace")
.description("The Azure Namespace that the Event Hub is assigned to. This is generally equal to <Event Hub Name>-ns")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(false)
.required(true)
.build();
static final PropertyDescriptor ACCESS_POLICY = new PropertyDescriptor.Builder()
.name("Shared Access Policy Name")
.description("The name of the Event Hub Shared Access Policy. This Policy must have Listen permissions.")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(false)
.required(true)
.build();
static final PropertyDescriptor POLICY_PRIMARY_KEY = new PropertyDescriptor.Builder()
.name("Shared Access Policy Primary Key")
.description("The primary key of the Event Hub Shared Access Policy")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(false)
.sensitive(true)
.required(true)
.build();
static final PropertyDescriptor NUM_PARTITIONS = new PropertyDescriptor.Builder()
.name("Number of Event Hub Partitions")
.description("The number of partitions that the Event Hub has. Only this number of partitions will be used, "
+ "so it is important to ensure that if the number of partitions changes that this value be updated. Otherwise, some messages may not be consumed.")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.expressionLanguageSupported(false)
.required(true)
.build();
static final PropertyDescriptor CONSUMER_GROUP = new PropertyDescriptor.Builder()
.name("Event Hub Consumer Group")
.description("The name of the Event Hub Consumer Group to use when pulling events")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(false)
.defaultValue("$Default")
.required(true)
.build();
static final PropertyDescriptor ENQUEUE_TIME = new PropertyDescriptor.Builder()
.name("Event Hub Message Enqueue Time")
.description("A timestamp (ISO-8061 Instant) formatted as YYYY-MM-DDThhmmss.sssZ (2016-01-01T01:01:01.000Z) from which messages "
+ "should have been enqueued in the EventHub to start reading from")
.addValidator(StandardValidators.ISO8061_INSTANT_VALIDATOR)
.expressionLanguageSupported(false)
.required(false)
.build();
static final PropertyDescriptor RECEIVER_FETCH_SIZE = new PropertyDescriptor.Builder()
.name("Partition Recivier Fetch Size")
.description("The number of events that a receiver should fetch from an EventHubs partition before returning. Default(100)")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.expressionLanguageSupported(false)
.required(false)
.build();
static final PropertyDescriptor RECEIVER_FETCH_TIMEOUT = new PropertyDescriptor.Builder()
.name("Partiton Receiver Timeout (millseconds)")
.description("The amount of time a Partition Receiver should wait to receive the Fetch Size before returning. Default(60000)")
.addValidator(StandardValidators.POSITIVE_LONG_VALIDATOR)
.expressionLanguageSupported(false)
.required(false)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("Any FlowFile that is successfully received from the Azure Event Hub will be transferred to this Relationship.")
.build();
private final ConcurrentMap<String, PartitionReceiver> partitionToReceiverMap = new ConcurrentHashMap<>();
private volatile BlockingQueue<String> partitionNames = new LinkedBlockingQueue<>();
private volatile Instant configuredEnqueueTime;
private volatile int receiverFetchSize;
private volatile Duration receiverFetchTimeout;
private EventHubClient eventHubClient;
private final static List<PropertyDescriptor> propertyDescriptors;
private final static Set<Relationship> relationships;
/*
* Will ensure that the list of property descriptors is build only once.
* Will also create a Set of relationships
*/
static {
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
_propertyDescriptors.add(EVENT_HUB_NAME);
_propertyDescriptors.add(NAMESPACE);
_propertyDescriptors.add(ACCESS_POLICY);
_propertyDescriptors.add(POLICY_PRIMARY_KEY);
_propertyDescriptors.add(NUM_PARTITIONS);
_propertyDescriptors.add(CONSUMER_GROUP);
_propertyDescriptors.add(ENQUEUE_TIME);
_propertyDescriptors.add(RECEIVER_FETCH_SIZE);
_propertyDescriptors.add(RECEIVER_FETCH_TIMEOUT);
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
Set<Relationship> _relationships = new HashSet<>();
_relationships.add(REL_SUCCESS);
relationships = Collections.unmodifiableSet(_relationships);
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return propertyDescriptors;
}
protected void setupReceiver(final String connectionString) throws ProcessException {
try {
eventHubClient = EventHubClient.createFromConnectionString(connectionString).get();
} catch (InterruptedException | ExecutionException | IOException | ServiceBusException e) {
throw new ProcessException(e);
}
}
PartitionReceiver getReceiver(final ProcessContext context, final String partitionId) throws IOException, ServiceBusException, ExecutionException, InterruptedException {
PartitionReceiver existingReceiver = partitionToReceiverMap.get(partitionId);
if (existingReceiver != null) {
return existingReceiver;
}
// we want to avoid allowing multiple threads to create Receivers simultaneously because that could result in
// having multiple Receivers for the same partition. So if the map does not contain a receiver for this partition,
// we will enter a synchronized block and check again (because once we enter the synchronized block, we know that no
// other thread is creating a client). If within the synchronized block, we still do not have an entry in the map,
// it is up to use to create the receiver, initialize it, and then put it into the map.
// We do not use the putIfAbsent method in order to do a CAS operation here because we want to also initialize the
// receiver if and only if it is not present in the map. As a result, we need to initialize the receiver and add it
// to the map atomically. Hence, the synchronized block.
synchronized (this) {
existingReceiver = partitionToReceiverMap.get(partitionId);
if (existingReceiver != null) {
return existingReceiver;
}
final String consumerGroupName = context.getProperty(CONSUMER_GROUP).getValue();
final PartitionReceiver receiver = eventHubClient.createReceiver(
consumerGroupName,
partitionId,
configuredEnqueueTime == null ? Instant.now() : configuredEnqueueTime).get();
receiver.setReceiveTimeout(receiverFetchTimeout == null ? Duration.ofMillis(60000) : receiverFetchTimeout);
partitionToReceiverMap.put(partitionId, receiver);
return receiver;
}
}
/**
* This method is here to try and isolate the Azure related code as the PartitionReceiver cannot be mocked
* with PowerMock due to it being final. Unfortunately it extends a base class and does not implement an interface
* so even if we create a MockPartitionReciver, it will not work as the two classes are orthogonal.
*
* @param context - The processcontext for this processor
* @param partitionId - The partition ID to retrieve a receiver by.
* @return - Returns the events received from the EventBus.
* @throws ProcessException -- If any exception is encountered, receiving events it is wrapped in a ProcessException
* and then that exception is thrown.
*/
protected Iterable<EventData> receiveEvents(final ProcessContext context, final String partitionId) throws ProcessException {
final PartitionReceiver receiver;
try {
receiver = getReceiver(context, partitionId);
return receiver.receive(receiverFetchSize).get();
} catch (final IOException | ServiceBusException | ExecutionException | InterruptedException e) {
throw new ProcessException(e);
}
}
@OnStopped
public void tearDown() throws ProcessException {
for (final PartitionReceiver receiver : partitionToReceiverMap.values()) {
if (null != receiver) {
receiver.close();
}
}
partitionToReceiverMap.clear();
try {
if (null != eventHubClient) {
eventHubClient.closeSync();
}
} catch (final ServiceBusException e) {
throw new ProcessException(e);
}
}
@OnScheduled
public void onScheduled(final ProcessContext context) throws ProcessException {
final BlockingQueue<String> partitionNames = new LinkedBlockingQueue<>();
for (int i = 0; i < context.getProperty(NUM_PARTITIONS).asInteger(); i++) {
partitionNames.add(String.valueOf(i));
}
this.partitionNames = partitionNames;
final String policyName = context.getProperty(ACCESS_POLICY).getValue();
final String policyKey = context.getProperty(POLICY_PRIMARY_KEY).getValue();
final String namespace = context.getProperty(NAMESPACE).getValue();
final String eventHubName = context.getProperty(EVENT_HUB_NAME).getValue();
if(context.getProperty(ENQUEUE_TIME).isSet()) {
configuredEnqueueTime = Instant.parse(context.getProperty(ENQUEUE_TIME).toString());
} else {
configuredEnqueueTime = null;
}
if(context.getProperty(RECEIVER_FETCH_SIZE).isSet()) {
receiverFetchSize = context.getProperty(RECEIVER_FETCH_SIZE).asInteger();
} else {
receiverFetchSize = 100;
}
if(context.getProperty(RECEIVER_FETCH_TIMEOUT).isSet()) {
receiverFetchTimeout = Duration.ofMillis(context.getProperty(RECEIVER_FETCH_TIMEOUT).asLong());
} else {
receiverFetchTimeout = null;
}
final String connectionString = new ConnectionStringBuilder(namespace, eventHubName, policyName, policyKey).toString();
setupReceiver(connectionString);
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final BlockingQueue<String> partitionIds = this.partitionNames;
final String partitionId = partitionIds.poll();
if (partitionId == null) {
getLogger().debug("No partitions available");
return;
}
final StopWatch stopWatch = new StopWatch(true);
try {
final Iterable<EventData> receivedEvents = receiveEvents(context, partitionId);
if (receivedEvents == null) {
return;
}
for (final EventData eventData : receivedEvents) {
if (null != eventData) {
final Map<String, String> attributes = new HashMap<>();
FlowFile flowFile = session.create();
EventData.SystemProperties systemProperties = eventData.getSystemProperties();
if (null != systemProperties) {
attributes.put("eventhub.enqueued.timestamp", String.valueOf(eventData.getSystemProperties().getEnqueuedTime()));
attributes.put("eventhub.offset", eventData.getSystemProperties().getOffset());
attributes.put("eventhub.sequence", String.valueOf(eventData.getSystemProperties().getSequenceNumber()));
}
attributes.put("eventhub.name", context.getProperty(EVENT_HUB_NAME).getValue());
attributes.put("eventhub.partition", partitionId);
flowFile = session.putAllAttributes(flowFile, attributes);
flowFile = session.write(flowFile, out -> {
out.write(eventData.getBody());
});
session.transfer(flowFile, REL_SUCCESS);
final String namespace = context.getProperty(NAMESPACE).getValue();
final String eventHubName = context.getProperty(EVENT_HUB_NAME).getValue();
final String consumerGroup = context.getProperty(CONSUMER_GROUP).getValue();
final String transitUri = "amqps://" + namespace + ".servicebus.windows.net" + "/" + eventHubName + "/ConsumerGroups/" + consumerGroup + "/Partitions/" + partitionId;
session.getProvenanceReporter().receive(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
}
}
} finally {
partitionIds.offer(partitionId);
}
}
}