/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.cybersecurity; import com.idealista.tlsh.exceptions.InsufficientComplexityException; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.SeeAlso; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.util.StringUtils; import org.apache.nifi.stream.io.StreamUtils; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; @EventDriven @SideEffectFree @SupportsBatching @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @Tags({"hashing", "fuzzy-hashing", "cyber-security"}) @CapabilityDescription("Calculates a fuzzy/locality-sensitive hash value for the Content of a FlowFile and puts that " + "hash value on the FlowFile as an attribute whose name is determined by the <Hash Attribute Name> property." + "Note: this processor only offers non-cryptographic hash algorithms. And it should be not be " + "seen as a replacement to the HashContent processor." + "Note: The underlying library loads the entirety of the streamed content into and performs result " + "evaluations in memory. Accordingly, it is important to consider the anticipated profile of content being " + "evaluated by this processor and the hardware supporting it especially when working against large files.") @SeeAlso(classNames = {"org.apache.nifi.processors.standard.HashContent"}, value = {CompareFuzzyHash.class}) @WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>", description = "This Processor adds an attribute whose value is the result of Hashing the " + "existing FlowFile content. The name of this attribute is specified by the <Hash Attribute Name> property")}) public class FuzzyHashContent extends AbstractFuzzyHashProcessor { public static final PropertyDescriptor HASH_ALGORITHM = new PropertyDescriptor.Builder() .name("HASH_ALGORITHM") .displayName("Hashing Algorithm") .description("The hashing algorithm utilised") .allowableValues(allowableValueSSDEEP, allowableValueTLSH) .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("Any FlowFile that is successfully hashed will be sent to this Relationship.") .build(); public static final Relationship REL_FAILURE = new Relationship.Builder() .name("failure") .description("Any FlowFile that is successfully hashed will be sent to this Relationship.") .build(); private List<PropertyDescriptor> descriptors; private Set<Relationship> relationships; @Override protected void init(final ProcessorInitializationContext context) { final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); descriptors.add(ATTRIBUTE_NAME); descriptors.add(HASH_ALGORITHM); this.descriptors = Collections.unmodifiableList(descriptors); final Set<Relationship> relationships = new HashSet<Relationship>(); relationships.add(REL_SUCCESS); relationships.add(REL_FAILURE); this.relationships = Collections.unmodifiableSet(relationships); } @Override public Set<Relationship> getRelationships() { return this.relationships; } @Override public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { return descriptors; } @OnScheduled public void onScheduled(final ProcessContext context) { } @Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get(); if (flowFile == null) { return; } final ComponentLog logger = getLogger(); String algorithm = context.getProperty(HASH_ALGORITHM).getValue(); // Check if content matches minimum length requirement if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) { logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure", new Object[]{flowFile, algorithm}); session.transfer(flowFile, REL_FAILURE); return; } final AtomicReference<String> hashValueHolder = new AtomicReference<>(null); try { session.read(flowFile, new InputStreamCallback() { @Override public void process(final InputStream in) throws IOException { try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) { StreamUtils.copy(in,holder); String hashValue = generateHash(algorithm, holder.toString()); if (StringUtils.isBlank(hashValue) == false) { hashValueHolder.set(hashValue); } } } }); final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue(); flowFile = session.putAttribute(flowFile, attributeName, hashValueHolder.get()); logger.info("Successfully added attribute '{}' to {} with a value of {}; routing to success", new Object[]{attributeName, flowFile, hashValueHolder.get()}); session.getProvenanceReporter().modifyAttributes(flowFile); session.transfer(flowFile, REL_SUCCESS); } catch (final InsufficientComplexityException | ProcessException e) { logger.error("Failed to process {} due to {}; routing to failure", new Object[]{flowFile, e}); session.transfer(flowFile, REL_FAILURE); } } }