/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.standard; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.Stateful; import org.apache.nifi.annotation.behavior.TriggerSerially; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.PropertyValue; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.state.Scope; import org.apache.nifi.components.state.StateMap; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.stream.Collectors; import static org.apache.commons.lang3.StringUtils.isBlank; @EventDriven @Tags({"sort", "order"}) @InputRequirement(Requirement.INPUT_REQUIRED) @TriggerSerially @CapabilityDescription("Enforces expected ordering of FlowFiles those belong to the same data group. " + " Although PriorityAttributePrioritizer can be used on a connection to ensure that flow files going through that connection are in priority order," + " depending on error-handling, branching, and other flow designs, it is possible for FlowFiles to get out-of-order." + " EnforceOrder can be used to enforce original ordering for those FlowFiles." + " [IMPORTANT] In order to take effect of EnforceOrder, FirstInFirstOutPrioritizer should be used at EVERY downstream relationship" + " UNTIL the order of FlowFiles physically get FIXED by operation such as MergeContent or being stored to the final destination.") @Stateful(scopes = Scope.LOCAL, description = "EnforceOrder uses following states per ordering group:" + " '<groupId>.target' is a order number which is being waited to arrive next." + " When a FlowFile with a matching order arrives, or a FlowFile overtakes the FlowFile being waited for because of wait timeout," + " target order will be updated to (FlowFile.order + 1)." + " '<groupId>.max is the maximum order number for a group." + " '<groupId>.updatedAt' is a timestamp when the order of a group was updated last time." + " These managed states will be removed automatically once a group is determined as inactive, see 'Inactive Timeout' for detail.") @WritesAttributes({ @WritesAttribute(attribute = EnforceOrder.ATTR_STARTED_AT, description = "All FlowFiles going through this processor will have this attribute. This value is used to determine wait timeout."), @WritesAttribute(attribute = EnforceOrder.ATTR_RESULT, description = "All FlowFiles going through this processor will have this attribute denoting which relationship it was routed to."), @WritesAttribute(attribute = EnforceOrder.ATTR_DETAIL, description = "FlowFiles routed to 'failure' or 'skipped' relationship will have this attribute describing details."), @WritesAttribute(attribute = EnforceOrder.ATTR_EXPECTED_ORDER, description = "FlowFiles routed to 'wait' or 'skipped' relationship will have this attribute denoting expected order when the FlowFile was processed.") }) public class EnforceOrder extends AbstractProcessor { public static final String ATTR_STARTED_AT = "EnforceOrder.startedAt"; public static final String ATTR_EXPECTED_ORDER = "EnforceOrder.expectedOrder"; public static final String ATTR_RESULT = "EnforceOrder.result"; public static final String ATTR_DETAIL = "EnforceOrder.detail"; private static final Function<String, String> STATE_TARGET_ORDER = groupId -> groupId + ".target"; private static final String STATE_SUFFIX_UPDATED_AT = ".updatedAt"; private static final Function<String, String> STATE_UPDATED_AT = groupId -> groupId + STATE_SUFFIX_UPDATED_AT; private static final Function<String, String> STATE_MAX_ORDER = groupId -> groupId + ".max"; public static final PropertyDescriptor GROUP_IDENTIFIER = new PropertyDescriptor.Builder() .name("group-id") .displayName("Group Identifier") .description("EnforceOrder is capable of multiple ordering groups." + " 'Group Identifier' is used to determine which group a FlowFile belongs to." + " This property will be evaluated with each incoming FlowFile." + " If evaluated result is empty, the FlowFile will be routed to failure.") .required(true) .addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(true) .defaultValue("${filename}") .build(); public static final PropertyDescriptor ORDER_ATTRIBUTE = new PropertyDescriptor.Builder() .name("order-attribute") .displayName("Order Attribute") .description("A name of FlowFile attribute whose value will be used to enforce order of FlowFiles within a group." + " If a FlowFile does not have this attribute, or its value is not an integer, the FlowFile will be routed to failure.") .required(true) .addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(false) .build(); public static final PropertyDescriptor INITIAL_ORDER = new PropertyDescriptor.Builder() .name("initial-order") .displayName("Initial Order") .description("When the first FlowFile of a group arrives, initial target order will be computed and stored in the managed state." + " After that, target order will start being tracked by EnforceOrder and stored in the state management store." + " If Expression Language is used but evaluated result was not an integer, then the FlowFile will be routed to failure," + " and initial order will be left unknown until consecutive FlowFiles provide a valid initial order.") .required(true) .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) .expressionLanguageSupported(true) .defaultValue("0") .build(); public static final PropertyDescriptor MAX_ORDER = new PropertyDescriptor.Builder() .name("maximum-order") .displayName("Maximum Order") .description("If specified, any FlowFiles that have larger order will be routed to failure." + " This property is computed only once for a given group." + " After a maximum order is computed, it will be persisted in the state management store and used for other FlowFiles belonging to the same group." + " If Expression Language is used but evaluated result was not an integer, then the FlowFile will be routed to failure," + " and maximum order will be left unknown until consecutive FlowFiles provide a valid maximum order.") .required(false) .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .expressionLanguageSupported(true) .build(); public static final PropertyDescriptor WAIT_TIMEOUT = new PropertyDescriptor.Builder() .name("wait-timeout") .displayName("Wait Timeout") .description("Indicates the duration after which waiting FlowFiles will be routed to the 'overtook' relationship.") .required(true) .defaultValue("10 min") .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) .expressionLanguageSupported(false) .build(); public static final PropertyDescriptor INACTIVE_TIMEOUT = new PropertyDescriptor.Builder() .name("inactive-timeout") .displayName("Inactive Timeout") .description("Indicates the duration after which state for an inactive group will be cleared from managed state." + " Group is determined as inactive if any new incoming FlowFile has not seen for a group for specified duration." + " Inactive Timeout must be longer than Wait Timeout." + " If a FlowFile arrives late after its group is already cleared, it will be treated as a brand new group," + " but will never match the order since expected preceding FlowFiles are already gone." + " The FlowFile will eventually timeout for waiting and routed to 'overtook'." + " To avoid this, group states should be kept long enough, however, shorter duration would be helpful for reusing the same group identifier again.") .required(true) .defaultValue("30 min") .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) .expressionLanguageSupported(false) .build(); public static final PropertyDescriptor BATCH_COUNT = new PropertyDescriptor.Builder() .name("batch-count") .displayName("Batch Count") .description("The maximum number of FlowFiles that EnforceOrder can process at an execution.") .required(true) .defaultValue("1000") .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .expressionLanguageSupported(false) .build(); public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("A FlowFile with a matching order number will be routed to this relationship.") .build(); public static final Relationship REL_FAILURE = new Relationship.Builder() .name("failure") .description("A FlowFiles which does not have required attributes, or fails to compute those will be routed to this relationship") .build(); public static final Relationship REL_WAIT = new Relationship.Builder() .name("wait") .description("A FlowFile with non matching order will be routed to this relationship") .build(); public static final Relationship REL_OVERTOOK = new Relationship.Builder() .name("overtook") .description("A FlowFile that waited for preceding FlowFiles longer than Wait Timeout and overtook those FlowFiles, will be routed to this relationship.") .build(); public static final Relationship REL_SKIPPED = new Relationship.Builder() .name("skipped") .description("A FlowFile that has an order younger than current, which means arrived too late and skipped, will be routed to this relationship.") .build(); private final Set<Relationship> relationships; public EnforceOrder() { final Set<Relationship> rels = new HashSet<>(); rels.add(REL_SUCCESS); rels.add(REL_WAIT); rels.add(REL_OVERTOOK); rels.add(REL_FAILURE); rels.add(REL_SKIPPED); relationships = Collections.unmodifiableSet(rels); } @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { final List<PropertyDescriptor> descriptors = new ArrayList<>(); descriptors.add(GROUP_IDENTIFIER); descriptors.add(ORDER_ATTRIBUTE); descriptors.add(INITIAL_ORDER); descriptors.add(MAX_ORDER); descriptors.add(BATCH_COUNT); descriptors.add(WAIT_TIMEOUT); descriptors.add(INACTIVE_TIMEOUT); return descriptors; } @Override public Set<Relationship> getRelationships() { return relationships; } @Override protected Collection<ValidationResult> customValidate(ValidationContext validationContext) { final List<ValidationResult> results = new ArrayList<>(super.customValidate(validationContext)); final Long waitTimeoutMillis = validationContext.getProperty(WAIT_TIMEOUT).asTimePeriod(TimeUnit.MICROSECONDS); final Long inactiveTimeoutMillis = validationContext.getProperty(INACTIVE_TIMEOUT).asTimePeriod(TimeUnit.MICROSECONDS); if (waitTimeoutMillis >= inactiveTimeoutMillis) { results.add(new ValidationResult.Builder().input(validationContext.getProperty(INACTIVE_TIMEOUT).getValue()) .subject(INACTIVE_TIMEOUT.getDisplayName()) .explanation(String.format("%s should be longer than %s", INACTIVE_TIMEOUT.getDisplayName(), WAIT_TIMEOUT.getDisplayName())) .valid(false) .build()); } return results; } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final ComponentLog logger = getLogger(); final Integer batchCount = context.getProperty(BATCH_COUNT).asInteger(); List<FlowFile> flowFiles = session.get(batchCount); if (flowFiles == null || flowFiles.isEmpty()) { return; } final StateMap stateMap; try { stateMap = context.getStateManager().getState(Scope.LOCAL); } catch (final IOException e) { logger.error("Failed to retrieve state from StateManager due to {}" + e, e); context.yield(); return; } final OrderingContext oc = new OrderingContext(context, session); oc.groupStates.putAll(stateMap.toMap()); for (FlowFile flowFile : flowFiles) { oc.setFlowFile(flowFile); if (oc.flowFile == null) { break; } if (!oc.computeGroupId() || !oc.computeOrder() || !oc.computeInitialOrder() || !oc.computeMaxOrder()) { continue; } // At this point, the flow file is confirmed to be valid. oc.markFlowFileValid(); } oc.transferFlowFiles(); oc.cleanupInactiveStates(); try { context.getStateManager().setState(oc.groupStates, Scope.LOCAL); } catch (final IOException e) { throw new RuntimeException("Failed to update state due to " + e + ". Session will be rollback and processor will be yielded for a while.", e); } } private class OrderingContext { private final ComponentLog logger = getLogger(); private final ProcessSession processSession; private final ProcessContext processContext; // Following properties are static global setting for all groups. private final String orderAttribute; private final Long waitTimeoutMillis; private final Function<FlowFile, Integer> getOrder; private final Map<String, String> groupStates = new HashMap<>(); private final long now = System.currentTimeMillis(); // Following properties are computed per flow file. private final PropertyValue groupIdentifierProperty ; // Followings are per group objects. private final PropertyValue initOrderProperty; private final PropertyValue maxOrderProperty; private final Map<String, List<FlowFile>> flowFileGroups = new TreeMap<>(); // Current variables within incoming FlowFiles loop. private FlowFile flowFile; private String groupId; private Integer order; private OrderingContext(final ProcessContext processContext, final ProcessSession processSession) { this.processContext = processContext; this.processSession = processSession; orderAttribute = processContext.getProperty(ORDER_ATTRIBUTE).getValue(); waitTimeoutMillis = processContext.getProperty(WAIT_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS); getOrder = flowFile -> Integer.parseInt(flowFile.getAttribute(orderAttribute)); groupIdentifierProperty = processContext.getProperty(GROUP_IDENTIFIER); initOrderProperty = processContext.getProperty(INITIAL_ORDER); maxOrderProperty = processContext.getProperty(MAX_ORDER); } private void setFlowFile(final FlowFile flowFile) { this.flowFile = flowFile; this.groupId = null; this.order = null; } private boolean computeGroupId() { groupId = groupIdentifierProperty.evaluateAttributeExpressions(flowFile).getValue(); if (isBlank(groupId)) { transferToFailure(flowFile, "Failed to get Group Identifier."); return false; } return true; } private boolean computeOrder() { try { order = getOrder.apply(flowFile); } catch (final NumberFormatException e) { transferToFailure(flowFile, "Failed to parse order attribute due to " + e, e); return false; } return true; } private boolean computeMaxOrder() { if (maxOrderProperty.isSet()) { // Compute maxOrder for this group if it's not there yet. final String maxOrderStr = groupStates.computeIfAbsent(STATE_MAX_ORDER.apply(groupId), k -> maxOrderProperty.evaluateAttributeExpressions(flowFile).getValue()); if (isBlank(maxOrderStr)) { transferToFailure(flowFile, String.format("%s was specified but result was empty.", MAX_ORDER.getDisplayName())); return false; } final Integer maxOrder; try { maxOrder = Integer.parseInt(maxOrderStr); } catch (final NumberFormatException e) { final String msg = String.format("Failed to get Maximum Order for group [%s] due to %s", groupId, e); transferToFailure(flowFile, msg, e); return false; } // Check max order. if (order > maxOrder) { final String msg = String.format("Order (%d) is greater than the Maximum Order (%d) for Group [%s]", order, maxOrder, groupId); transferToFailure(flowFile, msg); return false; } } return true; } private boolean computeInitialOrder() { // Compute initial order. Use asInteger() to check if it's a valid integer. final String stateKeyOrder = STATE_TARGET_ORDER.apply(groupId); try { final AtomicReference<String> computedInitOrder = new AtomicReference<>(); groupStates.computeIfAbsent(stateKeyOrder, k -> { final String initOrderStr = initOrderProperty.evaluateAttributeExpressions(flowFile).getValue(); // Parse it to check if it is a valid integer. Integer.parseInt(initOrderStr); computedInitOrder.set(initOrderStr); return initOrderStr; }); // If these map modification is in the computeIfAbsent function, it causes this issue. // JDK-8071667 : HashMap.computeIfAbsent() adds entry that HashMap.get() does not find. // http://bugs.java.com/bugdatabase/view_bug.do?bug_id=8071667 if (!isBlank(computedInitOrder.get())) { groupStates.put(STATE_UPDATED_AT.apply(groupId), String.valueOf(now)); } } catch (final NumberFormatException e) { final String msg = String.format("Failed to get Initial Order for Group [%s] due to %s", groupId, e); transferToFailure(flowFile, msg, e); return false; } return true; } private void markFlowFileValid() { final List<FlowFile> groupedFlowFiles = flowFileGroups.computeIfAbsent(groupId, k -> new ArrayList<>()); final FlowFile validFlowFile; if (isBlank(flowFile.getAttribute(ATTR_STARTED_AT))) { validFlowFile = processSession.putAttribute(flowFile, ATTR_STARTED_AT, String.valueOf(now)); } else { validFlowFile = flowFile; } groupedFlowFiles.add(validFlowFile); } private void transferFlowFiles() { flowFileGroups.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).map(entry -> { // Sort flow files within each group. final List<FlowFile> groupedFlowFiles = entry.getValue(); groupedFlowFiles.sort(Comparator.comparing(getOrder)); return entry; }).forEach(entry -> { // Check current state. final String groupId = entry.getKey(); final String stateKeyOrder = STATE_TARGET_ORDER.apply(groupId); final int previousTargetOrder = Integer.parseInt(groupStates.get(stateKeyOrder)); final AtomicInteger targetOrder = new AtomicInteger(previousTargetOrder); final List<FlowFile> groupedFlowFiles = entry.getValue(); final String maxOrderStr = groupStates.get(STATE_MAX_ORDER.apply(groupId)); groupedFlowFiles.forEach(f -> { final Integer order = getOrder.apply(f); final boolean isMaxOrder = !isBlank(maxOrderStr) && order.equals(Integer.parseInt(maxOrderStr)); if (order == targetOrder.get()) { transferResult(f, REL_SUCCESS, null, null); if (!isMaxOrder) { // If max order is specified and this FlowFile has the max order, don't increment target anymore. targetOrder.incrementAndGet(); } } else if (order > targetOrder.get()) { if (now - Long.parseLong(f.getAttribute(ATTR_STARTED_AT)) > waitTimeoutMillis) { transferResult(f, REL_OVERTOOK, null, targetOrder.get()); targetOrder.set(isMaxOrder ? order : order + 1); } else { transferResult(f, REL_WAIT, null, targetOrder.get()); } } else { final String msg = String.format("Skipped, FlowFile order was %d but current target is %d", order, targetOrder.get()); logger.warn(msg + ". {}", new Object[]{f}); transferResult(f, REL_SKIPPED, msg, targetOrder.get()); } }); if (previousTargetOrder != targetOrder.get()) { groupStates.put(stateKeyOrder, String.valueOf(targetOrder.get())); groupStates.put(STATE_UPDATED_AT.apply(groupId), String.valueOf(now)); } }); } private void transferResult(final FlowFile flowFile, final Relationship result, final String detail, final Integer expectedOrder) { final Map<String, String> attributes = new HashMap<>(); attributes.put(ATTR_RESULT, result.getName()); if (expectedOrder != null) { attributes.put(ATTR_EXPECTED_ORDER, expectedOrder.toString()); } if (!isBlank(detail)) { attributes.put(ATTR_DETAIL, detail); } FlowFile resultFlowFile = processSession.putAllAttributes(flowFile, attributes); // Remove if (expectedOrder == null) { resultFlowFile = processSession.removeAttribute(resultFlowFile, ATTR_EXPECTED_ORDER); } if (detail == null) { resultFlowFile = processSession.removeAttribute(resultFlowFile, ATTR_DETAIL); } processSession.transfer(resultFlowFile, result); } private void transferToFailure(final FlowFile flowFile, final String message) { transferToFailure(flowFile, message, null); } private void transferToFailure(final FlowFile flowFile, final String message, final Throwable cause) { if (cause != null) { getLogger().warn(message + " {}", new Object[]{flowFile}, cause); } else { getLogger().warn(message + " {}", new Object[]{flowFile}); } transferResult(flowFile, REL_FAILURE, message, null); } private void cleanupInactiveStates() { final Long inactiveTimeout = processContext.getProperty(INACTIVE_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS); final List<String> inactiveGroups = groupStates.keySet().stream() .filter(k -> k.endsWith(STATE_SUFFIX_UPDATED_AT) && (now - Long.parseLong(groupStates.get(k)) > inactiveTimeout)) .map(k -> k.substring(0, k.length() - STATE_SUFFIX_UPDATED_AT.length())) .collect(Collectors.toList()); inactiveGroups.forEach(groupId -> { groupStates.remove(STATE_TARGET_ORDER.apply(groupId)); groupStates.remove(STATE_UPDATED_AT.apply(groupId)); groupStates.remove(STATE_MAX_ORDER.apply(groupId)); }); } } }