/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.cybersecurity;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher;
import org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher;
import org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher;
import org.apache.nifi.util.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@EventDriven
@SideEffectFree
@SupportsBatching
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@SeeAlso({FuzzyHashContent.class})
@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
@CapabilityDescription("Compares an attribute containing a Fuzzy Hash against a file containing a list of fuzzy hashes, " +
"appending an attribute to the FlowFile in case of a successful match.")
@WritesAttributes({
@WritesAttribute(attribute = "XXXX.N.match", description = "The match that resembles the attribute specified " +
"by the <Hash Attribute Name> property. Note that: 'XXX' gets replaced with the <Hash Attribute Name>"),
@WritesAttribute(attribute = "XXXX.N.similarity", description = "The similarity score between this flowfile" +
"and its match of the same number N. Note that: 'XXX' gets replaced with the <Hash Attribute Name>")})
public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
public static final AllowableValue singleMatch = new AllowableValue(
"single",
"single",
"Send FlowFile to matched after the first match above threshold");
public static final AllowableValue multiMatch = new AllowableValue(
"multi-match",
"multi-match",
"Iterate full list of hashes before deciding to send FlowFile to matched or unmatched");
public static final PropertyDescriptor HASH_LIST_FILE = new PropertyDescriptor.Builder()
.name("HASH_LIST_FILE")
.displayName("Hash List source file")
.description("Path to the file containing hashes to be validated against")
.required(true)
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
.build();
// Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
public static final PropertyDescriptor MATCH_THRESHOLD = new PropertyDescriptor.Builder()
// Note that while both TLSH and SSDeep seems to return int, we treat them as double in code.
// The rationale behind being the expectation that other algorithms thatmay return double values
// may be added to the processor later on.
.name("MATCH_THRESHOLD")
.displayName("Match threshold")
.description("The similarity score must exceed or be equal to in order for" +
"match to be considered true. Refer to Additional Information for differences between TLSH " +
"and SSDEEP scores and how they relate to this property.")
.required(true)
.addValidator(StandardValidators.NUMBER_VALIDATOR)
.build();
public static final PropertyDescriptor MATCHING_MODE = new PropertyDescriptor.Builder()
.name("MATCHING_MODE")
.displayName("Matching mode")
.description("Defines if the Processor should try to match as many entries as possible (" + multiMatch.getDisplayName() +
") or if it should stop after the first match (" + singleMatch.getDisplayName() + ")")
.required(true)
.allowableValues(singleMatch,multiMatch)
.defaultValue(singleMatch.getValue())
.build();
public static final Relationship REL_FOUND = new Relationship.Builder()
.name("found")
.description("Any FlowFile that is successfully matched to an existing hash will be sent to this Relationship.")
.build();
public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
.name("not-found")
.description("Any FlowFile that cannot be matched to an existing hash will be sent to this Relationship.")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Any FlowFile that cannot be matched, e.g. (lacks the attribute) will be sent to this Relationship.")
.build();
@Override
protected void init(final ProcessorInitializationContext context) {
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
descriptors.add(HASH_LIST_FILE);
// As mentioned above, add the PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME from parent class
descriptors.add(HASH_ALGORITHM);
descriptors.add(ATTRIBUTE_NAME);
descriptors.add(MATCH_THRESHOLD);
descriptors.add(MATCHING_MODE);
this.descriptors = Collections.unmodifiableList(descriptors);
final Set<Relationship> relationships = new HashSet<Relationship>();
relationships.add(REL_FOUND);
relationships.add(REL_NOT_FOUND);
relationships.add(REL_FAILURE);
this.relationships = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return this.relationships;
}
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return descriptors;
}
@OnScheduled
public void onScheduled(final ProcessContext context) {
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
String inputHash = flowFile.getAttribute(attributeName);
if (inputHash == null) {
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.",
new Object[]{flowFile, attributeName});
session.transfer(flowFile, REL_FAILURE);
return;
}
FuzzyHashMatcher fuzzyHashMatcher = null;
switch (algorithm) {
case tlsh:
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
break;
case ssdeep:
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
break;
default:
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.",
new Object[]{algorithm});
context.yield();
return;
}
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
// and if that is the case we log
logger.error("Invalid hash provided. Sending to failure");
// and send to failure
session.transfer(flowFile, REL_FAILURE);
session.commit();
return;
}
double similarity = 0;
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
try {
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
String line = null;
iterateFile: while ((line = reader.readLine()) != null) {
if (line != null) {
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
String match = fuzzyHashMatcher.getMatch(line);
// A malformed file may cause a match with no filename
// Because this would simply look odd, we ignore such entry and log
if (!StringUtils.isEmpty(match)) {
matched.put(match, similarity);
} else {
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" +
"the {} file and ensure they are properly formatted",
new Object[]{line, HASH_LIST_FILE.getDisplayName()});
}
}
}
// Check if single match is desired and if a match has been made
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
// and save time by breaking the outer loop
break iterateFile;
}
}
// no matter if the break was called or not, Continue processing
// First by creating a new map to hold attributes
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
// Then by iterating over the hashmap of matches
if (matched.size() > 0) {
int x = 0;
for (Map.Entry<String, Double> entry : matched.entrySet()) {
// defining attributes accordingly
attributes.put(
attributeName + "." + x + ".match",
entry.getKey());
attributes.put(
attributeName + "." + x + ".similarity",
String.valueOf(entry.getValue()));
x++;
}
// Finally, append the attributes to the flowfile and sent to match
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_FOUND);
session.commit();
return;
} else {
// Otherwise send it to non-match
session.transfer(flowFile, REL_NOT_FOUND);
session.commit();
return;
}
} catch (IOException e) {
logger.error("Error while reading the hash input source" );
context.yield();
}
}
}