/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.enrich; import org.apache.commons.lang3.StringUtils; import org.apache.commons.net.whois.WhoisClient; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; @EventDriven @SideEffectFree @SupportsBatching @Tags({"whois", "enrich", "ip"}) @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @CapabilityDescription("A powerful whois query processor primary designed to enrich DataFlows with whois based APIs " + "(e.g. ShadowServer's ASN lookup) but that can be also used to perform regular whois lookups.") @WritesAttributes({ @WritesAttribute(attribute = "enrich.dns.record*.group*", description = "The captured fields of the Whois query response for each of the records received"), }) public class QueryWhois extends AbstractEnrichProcessor { public static final AllowableValue BEGIN_END = new AllowableValue("Begin/End", "Begin/End", "The evaluated input of each flowfile is enclosed within begin and end tags. Each row contains a delimited set of fields"); public static final AllowableValue BULK_NONE = new AllowableValue("None", "None", "Queries are made without any particular dialect"); public static final PropertyDescriptor WHOIS_QUERY_TYPE = new PropertyDescriptor.Builder() .name("WHOIS_QUERY_TYPE") .displayName("Whois Query Type") .description("The Whois query type to be used by the processor (if used)") .required(false) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor WHOIS_SERVER = new PropertyDescriptor.Builder() .name("WHOIS_SERVER") .displayName("Whois Server") .description("The Whois server to be used") .required(true) .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) .build(); public static final PropertyDescriptor WHOIS_SERVER_PORT = new PropertyDescriptor.Builder() .name("WHOIS_SERVER_PORT") .displayName("Whois Server Port") .description("The TCP port of the remote Whois server") .required(true) .defaultValue("43") .addValidator(StandardValidators.PORT_VALIDATOR) .build(); public static final PropertyDescriptor WHOIS_TIMEOUT = new PropertyDescriptor.Builder() .name("WHOIS_TIMEOUT") .displayName("Whois Query Timeout") .description("The amount of time to wait until considering a query as failed") .required(true) .defaultValue("1500 ms") .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) .build(); public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder() .name("BATCH_SIZE") .displayName("Batch Size") .description("The number of incoming FlowFiles to process in a single execution of this processor. ") .required(true) .defaultValue("25") .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .build(); public static final PropertyDescriptor BULK_PROTOCOL = new PropertyDescriptor.Builder() .name("BULK_PROTOCOL") .displayName("Bulk Protocol") .description("The protocol used to perform the bulk query. ") .required(true) .defaultValue(BULK_NONE.getValue()) .allowableValues(BEGIN_END, BULK_NONE) .build(); @Override public List<ValidationResult> customValidate(ValidationContext validationContext) { final List<ValidationResult> results = new ArrayList<>(super.customValidate(validationContext)); final String chosenQUERY_PARSER = validationContext.getProperty(QUERY_PARSER).getValue(); if (!chosenQUERY_PARSER.equals(NONE.getValue()) && !validationContext.getProperty(QUERY_PARSER_INPUT).isSet() ) { results.add(new ValidationResult.Builder().input("QUERY_PARSER_INPUT") .subject(QUERY_PARSER_INPUT.getDisplayName()) .explanation("Split and Regex parsers require a valid Regular Expression") .valid(false) .build()); } if (validationContext.getProperty(BATCH_SIZE).asInteger() > 1 && !validationContext.getProperty(KEY_GROUP).isSet() ) { results.add(new ValidationResult.Builder().input("KEY_GROUP") .subject(KEY_GROUP.getDisplayName()) .explanation("when operating in Batching mode, RegEx and Split parsers require a " + "valid capture group/matching column. Configure the processor batch size to 1" + " or enter a valid column / named capture value.") .valid(false) .build()); } if ( validationContext.getProperty(BATCH_SIZE).asInteger() > 1 && chosenQUERY_PARSER.equals(NONE.getValue()) ) { results.add(new ValidationResult.Builder().input(validationContext.getProperty(BATCH_SIZE).getValue()) .subject(QUERY_PARSER.getDisplayName()) .explanation("NONE parser does not support batching. Configure Batch Size to 1 or use another parser.") .valid(false) .build()); } if ( validationContext.getProperty(BATCH_SIZE).asInteger() == 1 && !validationContext.getProperty(BULK_PROTOCOL).getValue().equals(BULK_NONE.getValue()) ) { results.add(new ValidationResult.Builder().input("BULK_PROTOCOL") .subject(BATCH_SIZE.getDisplayName()) .explanation("Bulk protocol requirement requires batching. Configure Batch Size to more than 1 or " + "use another protocol.") .valid(false) .build()); } return results; } private final static List<PropertyDescriptor> propertyDescriptors; private final static Set<Relationship> relationships; private WhoisClient whoisClient; static { List<PropertyDescriptor> props = new ArrayList<>(); props.add(QUERY_INPUT); props.add(WHOIS_QUERY_TYPE); props.add(WHOIS_SERVER); props.add(WHOIS_SERVER_PORT); props.add(WHOIS_TIMEOUT); props.add(BATCH_SIZE); props.add(BULK_PROTOCOL); props.add(QUERY_PARSER); props.add(QUERY_PARSER_INPUT); props.add(KEY_GROUP); propertyDescriptors = Collections.unmodifiableList(props); Set<Relationship> rels = new HashSet<>(); rels.add(REL_FOUND); rels.add(REL_NOT_FOUND); relationships = Collections.unmodifiableSet(rels); } @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { return propertyDescriptors; } @Override public Set<Relationship> getRelationships() { return relationships; } @Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final int batchSize = context.getProperty(BATCH_SIZE).asInteger(); List<FlowFile> flowFiles = session.get(batchSize); if (flowFiles == null || flowFiles.isEmpty()) { context.yield(); return; } // Build query String buildString = ""; final String queryType = context.getProperty(WHOIS_QUERY_TYPE).getValue(); // Verify the the protocol mode and craft the "begin" pseudo-command, otherwise just the query type buildString = context.getProperty(BULK_PROTOCOL).getValue().equals(BEGIN_END.getValue()) ? buildString.concat("begin ") : buildString.concat(""); // Append the query type buildString = context.getProperty(WHOIS_QUERY_TYPE).isSet() ? buildString.concat(queryType + " " ) : buildString.concat(""); // A new line is required when working on Begin/End buildString = context.getProperty(BULK_PROTOCOL).getValue().equals(BEGIN_END.getValue()) ? buildString.concat("\n") : buildString.concat(""); // append the values for (FlowFile flowFile : flowFiles) { final String evaluatedInput = context.getProperty(QUERY_INPUT).evaluateAttributeExpressions(flowFile).getValue(); buildString = buildString + evaluatedInput + "\n"; } // Verify the the protocol mode and craft the "end" pseudo-command, otherwise just the query type buildString = context.getProperty(BULK_PROTOCOL).getValue().equals(BEGIN_END.getValue()) ? buildString.concat("end") : buildString.concat(""); final String queryParser = context.getProperty(QUERY_PARSER).getValue(); final String queryRegex = context.getProperty(QUERY_PARSER_INPUT).getValue(); final int keyLookup = context.getProperty(KEY_GROUP).asInteger(); final int whoisTimeout = context.getProperty(WHOIS_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue(); final String whoisServer = context.getProperty(WHOIS_SERVER).getValue(); final int whoisPort = context.getProperty(WHOIS_SERVER_PORT).asInteger(); final List<FlowFile> flowFilesMatched = new ArrayList<FlowFile>(); final List<FlowFile> flowFilesNotMatched = new ArrayList<FlowFile>(); String result = doLookup(whoisServer, whoisPort, whoisTimeout, buildString); if (StringUtils.isEmpty(result)) { // If nothing was returned, let the processor continue its life and transfer the batch to REL_NOT_FOUND session.transfer(flowFiles, REL_NOT_FOUND); return; } else { // Run as normal for (FlowFile flowFile : flowFiles) { // Check the batchSize. If 1, run normal parser if (batchSize == 1) { Map<String, String> parsedResults = parseResponse(null, result, queryParser, queryRegex, "whois"); if (parsedResults.isEmpty()) { // parsedResults didn't return anything valid, sending to not found. flowFilesNotMatched.add(flowFile); } else { // Still, extraction is needed flowFile = session.putAllAttributes(flowFile, parsedResults); flowFilesMatched.add(flowFile); // Finished processing single result } } else { // Otherwise call the multiline parser and get the row map; final Map<String, Map<String, String>> rowMap = parseBatchResponse(result, queryParser, queryRegex, keyLookup, "whois").rowMap(); // Identify the flowfile Lookupvalue and search against the rowMap String ffLookupValue = context.getProperty(QUERY_INPUT).evaluateAttributeExpressions(flowFile).getValue(); if (rowMap.containsKey(ffLookupValue)) { // flowfile Lookup Value is contained within the results, get the properties and add to matched list flowFile = session.putAllAttributes(flowFile, rowMap.get(ffLookupValue)); flowFilesMatched.add(flowFile); } else { // otherwise add to Not Matched flowFilesNotMatched.add(flowFile); } } } } // Finally prepare to send the data down the pipeline // Because batches may include matches and non-matches, test both and send // each to its relationship if (flowFilesMatched.size() > 0) { // Sending the resulting flowfile (with attributes) to REL_FOUND session.transfer(flowFilesMatched, REL_FOUND); } if (flowFilesNotMatched.size() > 0) { // Sending whatetver didn't match to REL_NOT_FOUND session.transfer(flowFilesNotMatched, REL_NOT_FOUND); } } /** * This method performs a simple Whois lookup * @param whoisServer Server to be queried; * @param whoisPort TCP port to be useed to connect to server * @param whoisTimeout How long to wait for a response (in ms); * @param query The query to be made; */ protected String doLookup(String whoisServer, int whoisPort, int whoisTimeout, String query) { // This is a simple WHOIS lookup attempt String result = null; whoisClient = createClient(); try { // Uses pre-existing context to resolve if (!whoisClient.isConnected()) { whoisClient.connect(whoisServer, whoisPort); whoisClient.setSoTimeout(whoisTimeout); result = whoisClient.query(query); // clean up... if (whoisClient.isConnected()) whoisClient.disconnect(); } } catch ( IOException e) { getLogger().error("Query failed due to {}", new Object[]{e.getMessage()}, e); throw new ProcessException("Error performing Whois Lookup", e); } return result; } /* Note createClient() was separated from the rest of code in order to allow powermock to inject a fake return during testing */ protected WhoisClient createClient() { return new WhoisClient(); } }