package com.thinkbiganalytics.nifi.v2.hdfs;
/*-
* #%L
* thinkbig-nifi-hadoop-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.util.StopWatch;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* This processor creates an HDFS folder
*/
@EventDriven
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hadoop", "HDFS", "folder"})
@CapabilityDescription("Create a folder in Hadoop Distributed File System (HDFS)")
public class CreateHDFSFolder extends AbstractHadoopProcessor {
// relationships
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("Files that have been successfully written to HDFS are transferred to this relationship")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description(
"Files that could not be written to HDFS for some reason are transferred to this relationship")
.build();
// properties
public static final PropertyDescriptor DIRECTORY = new PropertyDescriptor.Builder()
.name(DIRECTORY_PROP_NAME)
.description("The full HDFS directory(s) to create separated by newline")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
public static final PropertyDescriptor UMASK = new PropertyDescriptor.Builder()
.name("Permissions umask")
.description(
"A umask represented as an octal number which determines the permissions of files written to HDFS. This overrides the Hadoop Configuration dfs.umaskmode")
.addValidator(createUmaskValidator())
.build();
public static final PropertyDescriptor REMOTE_OWNER = new PropertyDescriptor.Builder()
.name("Remote Owner")
.description(
"Changes the owner of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change owner")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
public static final PropertyDescriptor REMOTE_GROUP = new PropertyDescriptor.Builder()
.name("Remote Group")
.description(
"Changes the group of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change group")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
private static final Set<Relationship> relationships;
static {
final Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_FAILURE);
relationships = Collections.unmodifiableSet(rels);
}
/*
* Validates that a property is a valid umask, i.e. a short octal number that is not negative.
*/
static Validator createUmaskValidator() {
return new Validator() {
@Override
public ValidationResult validate(final String subject, final String value, final ValidationContext context) {
String reason = null;
try {
final short shortVal = Short.parseShort(value, 8);
if (shortVal < 0) {
reason = "octal umask [" + value + "] cannot be negative";
} else if (shortVal > 511) {
// HDFS umask has 9 bits: rwxrwxrwx ; the sticky bit cannot be umasked
reason = "octal umask [" + value + "] is not a valid umask";
}
} catch (final NumberFormatException e) {
reason = "[" + value + "] is not a valid short octal number";
}
return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null)
.build();
}
};
}
static short resolveUMask(final PropertyValue umaskProp) {
final short dfsUmask;
if (umaskProp.isSet()) {
dfsUmask = Short.parseShort(umaskProp.getValue(), 8);
} else {
dfsUmask = FsPermission.DEFAULT_UMASK;
}
return dfsUmask;
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
List<PropertyDescriptor> props = new ArrayList<>(super.getSupportedPropertyDescriptors());
props.add(DIRECTORY);
props.add(UMASK);
props.add(REMOTE_OWNER);
props.add(REMOTE_GROUP);
return Collections.unmodifiableList(props);
}
/**
* @param context The context provides configuration properties from the processor
* @throws IOException in the event
* @see OnScheduled
*/
@OnScheduled
public void onScheduled(ProcessContext context) throws IOException {
super.abstractOnScheduled(context);
// Set umask once, to avoid thread safety issues doing it in onTrigger
final PropertyValue umaskProp = context.getProperty(UMASK);
final short dfsUmask = resolveUMask(umaskProp);
final Configuration conf = getConfiguration();
FsPermission.setUMask(conf, new FsPermission(dfsUmask));
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
try {
final FileSystem hdfs = getFileSystem(context);
if (hdfs == null) {
getLog().error("HDFS not configured properly");
session.transfer(flowFile, REL_FAILURE);
context.yield();
return;
}
String owner = context.getProperty(REMOTE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
String group = context.getProperty(REMOTE_GROUP).evaluateAttributeExpressions(flowFile).getValue();
HDFSSupport hdfsSupport = new HDFSSupport(hdfs);
String pathString = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
String[] paths = pathString.split("\\r?\\n");
// Create for each path defined
for (String path : paths) {
getLog().info("Creating folder " + path);
final Path folderPath = new Path(path.trim());
hdfsSupport.createFolder(folderPath, owner, group);
}
stopWatch.stop();
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
getLog().info("created folders {} in {} milliseconds",
new Object[]{pathString, millis});
session.transfer(flowFile, REL_SUCCESS);
} catch (Exception e) {
getLog().error("failed folder creation {}",
new Object[]{e});
session.transfer(flowFile, REL_FAILURE);
}
}
}