/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.email;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.FlowFileHandlingException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.stream.io.BufferedInputStream;
import org.apache.poi.hmef.Attachment;
import org.apache.poi.hmef.HMEFMessage;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@SupportsBatching
@EventDriven
@SideEffectFree
@Tags({"split", "email"})
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Extract attachments from a mime formatted email file, splitting them into individual flowfiles.")
@WritesAttributes({
@WritesAttribute(attribute = "filename ", description = "The filename of the attachment"),
@WritesAttribute(attribute = "email.tnef.attachment.parent.filename ", description = "The filename of the parent FlowFile"),
@WritesAttribute(attribute = "email.tnef.attachment.parent.uuid", description = "The UUID of the original FlowFile.")})
public class ExtractTNEFAttachments extends AbstractProcessor {
public static final String ATTACHMENT_ORIGINAL_FILENAME = "email.tnef.attachment.parent.filename";
public static final String ATTACHMENT_ORIGINAL_UUID = "email.tnef.attachment.parent.uuid";
public static final Relationship REL_ATTACHMENTS = new Relationship.Builder()
.name("attachments")
.description("Each individual attachment will be routed to the attachments relationship")
.build();
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
.name("original")
.description("Each original flowfile (i.e. before extraction) will be routed to the original relationship")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Each individual flowfile that could not be parsed will be routed to the failure relationship")
.build();
private final static Set<Relationship> RELATIONSHIPS;
private final static List<PropertyDescriptor> DESCRIPTORS;
static {
final Set<Relationship> _relationships = new HashSet<>();
_relationships.add(REL_ATTACHMENTS);
_relationships.add(REL_ORIGINAL);
_relationships.add(REL_FAILURE);
RELATIONSHIPS = Collections.unmodifiableSet(_relationships);
final List<PropertyDescriptor> _descriptors = new ArrayList<>();
DESCRIPTORS = Collections.unmodifiableList(_descriptors);
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
HMEFMessage hmefMessage = null;
// This will trigger an exception in case content is not a TNEF.
hmefMessage = new HMEFMessage(in);
// Add otiginal flowfile (may revert later on in case of errors) //
originalFlowFilesList.add(originalFlowFile);
if (hmefMessage != null) {
// Attachments isn empty, proceeding.
if (!hmefMessage.getAttachments().isEmpty()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final Attachment attachment : hmefMessage.getAttachments()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
if (StringUtils.isNotBlank(attachment.getLongFilename())) {
attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename());
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
// TODO: Extract Mime Type (HMEF doesn't seem to be able to get this info.
split = session.append(split, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(attachment.getContents());
}
});
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[]{originalFlowFile, e});
invalidFlowFilesList.add(originalFlowFile);
}
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[]{originalFlowFile, e});
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
// check if attachments have been extracted
if (attachmentsList.size() != 0) {
if (attachmentsList.size() > 10) {
// If more than 10, summarise log
logger.info("Split {} into {} files", new Object[]{originalFlowFile, attachmentsList.size()});
} else {
// Otherwise be more verbose and list each individual split
logger.info("Split {} into {} files: {}", new Object[]{originalFlowFile, attachmentsList.size(), attachmentsList});
}
}
}
@Override
public Set<Relationship> getRelationships() {
return this.RELATIONSHIPS;
}
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return DESCRIPTORS;
}
}