package com.thinkbiganalytics.nifi.v2.hdfs; /*- * #%L * thinkbig-nifi-hadoop-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.gson.Gson; import com.google.gson.JsonSyntaxException; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Base64; import java.util.List; import java.util.Objects; import java.util.Set; import javax.annotation.Nonnull; @CapabilityDescription("Computes HDFS checksums of list of files") @EventDriven @Tags({"hadoop", "HDFS", "filesystem", "thinkbig", "checksum", "hash", "md5"}) public class ComputeHDFSChecksums extends AbstractHadoopProcessor { /** * Relationship for failure */ public static final Relationship REL_FAILURE = new Relationship.Builder() .name("failure") .description("At least one of the provided checksums don't match computed one") .build(); /** * Relationship for success */ public static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("Flow files goes to success relationship") .build(); /** * the absolute base directory for the files given by {@link FILES} */ public static final PropertyDescriptor DIRECTORY = new PropertyDescriptor.Builder() .name("absolute.path") .description("The absolute path to HDFS directory containing files to check. If not provided file names " + "will be treated as absolute paths") .required(false) .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) .expressionLanguageSupported(true) .build(); /** * directs the processor to fail if any of the files given have a provided checksum not matching the one computed by this processor */ public static final PropertyDescriptor FAIL_IF_INCORRECT_CHECKSUM = new PropertyDescriptor.Builder() .name("failIfWrongChecksum") .description("Decides whether flow should be failed if provided checksum doesn't match computed one") .required(true) .defaultValue("True") .addValidator(StandardValidators.BOOLEAN_VALIDATOR) .allowableValues(Sets.newHashSet("True", "False")) .build(); /** * A JSON encoded list of files and checksums. file names will be relative to {@link DIRECTORY} or absolute paths. */ public static final PropertyDescriptor FILES = new PropertyDescriptor.Builder() .name("files") .description("JSON-encoded list of files with their checksums, given like: " + "[{\n" + " \"name\": \"example\",\n" + " \"size\": 123456,\n" + " \"checksum\": {\n" + " \"length\": 28,\n" + " \"value\": \"AAAAAAAAAAAAAAAAcLyPS3KoaSFGi/joRB3OUQAAAAA=\",\n" + " \"algorithm\": \"MD5-of-0MD5-of-0CRC32\"\n" + " }\n" + "}]") .required(true) .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) .expressionLanguageSupported(true) .build(); /** * Output paths to other NiFi processors */ private static final Set<Relationship> relationships = ImmutableSet.of(REL_FAILURE, REL_SUCCESS); /** * methods to get the properties list * * @return the list of properties supported by this processor */ @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { return ImmutableList.<PropertyDescriptor>builder().addAll(super.getSupportedPropertyDescriptors()). add(DIRECTORY).add(FAIL_IF_INCORRECT_CHECKSUM).add(FILES).build(); } /** * get the relationships for this processor * * @return the set of relationships */ @Override public Set<Relationship> getRelationships() { return relationships; } @Override public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get(); if (flowFile == null) { return; } final FileSystem fs = getFileSystem(context); if (fs == null) { getLog().error("Couldn't initialize HDFS"); session.transfer(flowFile, REL_FAILURE); return; } String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue(); String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue(); Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM). evaluateAttributeExpressions(flowFile).asBoolean(); Gson jsonParser = new Gson(); File[] filesList; try { filesList = jsonParser.fromJson(filesJSON, File[].class); if (filesList == null) { filesList = new File[0]; } for (File f : filesList) { String name = f.getName(); Path filePath; if (absolutePath == null || absolutePath.isEmpty()) { filePath = new Path(name); } else { filePath = new Path(absolutePath, name); } FileChecksum computed_checksum = fs.getFileChecksum(filePath); String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes()); f.setComputedChecksum(new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName())); if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) { getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: " + f.getChecksum().getValue() + " checksum computed: " + b64_checksum); session.transfer(flowFile, REL_FAILURE); return; } } } catch (JsonSyntaxException e) { getLog().error("Files list attribute does not contain a proper JSON array"); session.transfer(flowFile, REL_FAILURE); return; } catch (FileNotFoundException e) { getLog().error("One of the provided files not found.\n" + e.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } catch (IOException e) { throw new ProcessException(e); } flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList)); session.transfer(flowFile, REL_SUCCESS); } class File { private String name; private Integer size; private Checksum checksum; private Checksum computedChecksum; public Checksum getComputedChecksum() { return computedChecksum; } public void setComputedChecksum(Checksum computedChecksum) { this.computedChecksum = computedChecksum; } public String getName() { return name; } public void setName(String name) { this.name = name; } public Integer getSize() { return size; } public void setSize(Integer size) { this.size = size; } public Checksum getChecksum() { return checksum; } public void setChecksum(Checksum checksum) { this.checksum = checksum; } } class Checksum { private Integer length; private String value; private String algorithm; public Checksum(Integer length, String value, String algorithm) { this.length = length; this.value = value; this.algorithm = algorithm; } public Integer getLength() { return length; } public void setLength(Integer length) { this.length = length; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } public String getAlgorithm() { return algorithm; } public void setAlgorithm(String algorithm) { this.algorithm = algorithm; } } }