package com.thinkbiganalytics.nifi.v2.ingest;
/*-
* #%L
* thinkbig-nifi-core-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.ingest.StripHeaderSupport;
import com.thinkbiganalytics.nifi.processor.AbstractNiFiProcessor;
import org.apache.commons.lang3.mutable.MutableLong;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@EventDriven
@SideEffectFree
@Tags({"header", "text"})
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Splits a text file(s) content from its header. The content of the header is passed through a separate relationship for validation")
public class StripHeader extends AbstractNiFiProcessor {
public static final PropertyDescriptor ENABLED = new PropertyDescriptor.Builder()
.name("Enable processing")
.description("Whether to strip the header")
.required(true)
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.defaultValue("false")
.expressionLanguageSupported(true)
.build();
public static final PropertyDescriptor HEADER_LINE_COUNT = new PropertyDescriptor.Builder()
.name("Header Line Count")
.description("The number of lines that should be considered part of the header")
.required(true)
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
.defaultValue("1")
.expressionLanguageSupported(true)
.build();
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
.name("original")
.description("The original input file will be routed to this destination")
.build();
public static final Relationship REL_CONTENT = new Relationship.Builder()
.name("content")
.description("The content (stripped of header if enabled) will be routed to this destination")
.build();
public static final Relationship REL_HEADER = new Relationship.Builder()
.name("header")
.description("The header will be routed to this destination when header is stripped")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("If a file cannot be split for some reason, the original file will be routed to this destination and nothing will be routed elsewhere")
.build();
private List<PropertyDescriptor> properties;
private Set<Relationship> relationships;
@Override
protected void init(final ProcessorInitializationContext context) {
super.init(context);
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(ENABLED);
properties.add(HEADER_LINE_COUNT);
this.properties = Collections.unmodifiableList(properties);
final Set<Relationship> relationships = new HashSet<>();
relationships.add(REL_ORIGINAL);
relationships.add(REL_CONTENT);
relationships.add(REL_HEADER);
relationships.add(REL_FAILURE);
this.relationships = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final StripHeaderSupport headerSupport = new StripHeaderSupport();
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final boolean isEnabled = context.getProperty(ENABLED).evaluateAttributeExpressions(flowFile).asBoolean();
final int headerCount = context.getProperty(HEADER_LINE_COUNT).evaluateAttributeExpressions(flowFile).asInteger();
// Empty files and no work to do will simply pass along content
if (!isEnabled || headerCount == 0 || flowFile.getSize() == 0L) {
final FlowFile contentFlowFile = session.clone(flowFile);
session.transfer(contentFlowFile, REL_CONTENT);
session.transfer(flowFile, REL_ORIGINAL);
return;
}
final MutableLong headerBoundaryInBytes = new MutableLong(-1);
session.read(flowFile, false, rawIn -> {
try {
// Identify the byte boundary of the header
long bytes = headerSupport.findHeaderBoundary(headerCount, rawIn);
headerBoundaryInBytes.setValue(bytes);
if (bytes < 0) {
getLog().error("Unable to strip header {} expecting at least {} lines in file", new Object[]{flowFile, headerCount});
}
} catch (IOException e) {
getLog().error("Unable to strip header {} due to {}; routing to failure", new Object[]{flowFile, e.getLocalizedMessage()}, e);
}
});
long headerBytes = headerBoundaryInBytes.getValue();
if (headerBytes < 0) {
session.transfer(flowFile, REL_FAILURE);
} else {
// Transfer header
final FlowFile headerFlowFile = session.clone(flowFile, 0, headerBytes);
session.transfer(headerFlowFile, REL_HEADER);
// Transfer content
long contentBytes = flowFile.getSize() - headerBytes;
final FlowFile contentFlowFile = session.clone(flowFile, headerBytes, contentBytes);
session.transfer(contentFlowFile, REL_CONTENT);
session.transfer(flowFile, REL_ORIGINAL);
}
}
}