/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.camel.component.file;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.regex.Pattern;
import org.apache.camel.AsyncCallback;
import org.apache.camel.Exchange;
import org.apache.camel.Message;
import org.apache.camel.Processor;
import org.apache.camel.ShutdownRunningTask;
import org.apache.camel.impl.ScheduledBatchPollingConsumer;
import org.apache.camel.util.CastUtils;
import org.apache.camel.util.StopWatch;
import org.apache.camel.util.StringHelper;
import org.apache.camel.util.TimeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Base class for file consumers.
*/
public abstract class GenericFileConsumer<T> extends ScheduledBatchPollingConsumer {
protected final Logger log = LoggerFactory.getLogger(getClass());
protected GenericFileEndpoint<T> endpoint;
protected GenericFileOperations<T> operations;
protected String fileExpressionResult;
protected volatile ShutdownRunningTask shutdownRunningTask;
protected volatile int pendingExchanges;
protected Processor customProcessor;
protected boolean eagerLimitMaxMessagesPerPoll = true;
protected volatile boolean prepareOnStartup;
private final Pattern includePattern;
private final Pattern excludePattern;
public GenericFileConsumer(GenericFileEndpoint<T> endpoint, Processor processor, GenericFileOperations<T> operations) {
super(endpoint, processor);
this.endpoint = endpoint;
this.operations = operations;
this.includePattern = endpoint.getIncludePattern();
this.excludePattern = endpoint.getExcludePattern();
}
public Processor getCustomProcessor() {
return customProcessor;
}
/**
* Use a custom processor to process the exchange.
* <p/>
* Only set this if you need to do custom processing, instead of the regular processing.
* <p/>
* This is for example used to browse file endpoints by leveraging the file consumer to poll
* the directory to gather the list of exchanges. But to avoid processing the files regularly
* we can use a custom processor.
*
* @param processor a custom processor
*/
public void setCustomProcessor(Processor processor) {
this.customProcessor = processor;
}
public boolean isEagerLimitMaxMessagesPerPoll() {
return eagerLimitMaxMessagesPerPoll;
}
public void setEagerLimitMaxMessagesPerPoll(boolean eagerLimitMaxMessagesPerPoll) {
this.eagerLimitMaxMessagesPerPoll = eagerLimitMaxMessagesPerPoll;
}
/**
* Poll for files
*/
protected int poll() throws Exception {
// must prepare on startup the very first time
if (!prepareOnStartup) {
// prepare on startup
endpoint.getGenericFileProcessStrategy().prepareOnStartup(operations, endpoint);
prepareOnStartup = true;
}
// must reset for each poll
fileExpressionResult = null;
shutdownRunningTask = null;
pendingExchanges = 0;
// before we poll is there anything we need to check?
// such as are we connected to the FTP Server still?
if (!prePollCheck()) {
log.debug("Skipping poll as pre poll check returned false");
return 0;
}
// gather list of files to process
List<GenericFile<T>> files = new ArrayList<GenericFile<T>>();
String name = endpoint.getConfiguration().getDirectory();
// time how long it takes to poll
StopWatch stop = new StopWatch();
boolean limitHit;
try {
limitHit = !pollDirectory(name, files, 0);
} catch (Exception e) {
// during poll directory we add files to the in progress repository, in case of any exception thrown after this work
// we must then drain the in progress files before rethrowing the exception
log.debug("Error occurred during poll directory: " + name + " due " + e.getMessage() + ". Removing " + files.size() + " files marked as in-progress.");
removeExcessiveInProgressFiles(files);
throw e;
}
long delta = stop.stop();
if (log.isDebugEnabled()) {
log.debug("Took {} to poll: {}", TimeUtils.printDuration(delta), name);
}
// log if we hit the limit
if (limitHit) {
log.debug("Limiting maximum messages to poll at {} files as there were more messages in this poll.", maxMessagesPerPoll);
}
// sort files using file comparator if provided
if (endpoint.getSorter() != null) {
files.sort(endpoint.getSorter());
}
// sort using build in sorters so we can use expressions
// use a linked list so we can dequeue the exchanges
LinkedList<Exchange> exchanges = new LinkedList<Exchange>();
for (GenericFile<T> file : files) {
Exchange exchange = endpoint.createExchange(file);
endpoint.configureExchange(exchange);
endpoint.configureMessage(file, exchange.getIn());
exchanges.add(exchange);
}
// sort files using exchange comparator if provided
if (endpoint.getSortBy() != null) {
exchanges.sort(endpoint.getSortBy());
}
if (endpoint.isShuffle()) {
Collections.shuffle(exchanges);
}
// use a queue for the exchanges
Deque<Exchange> q = exchanges;
// we are not eager limiting, but we have configured a limit, so cut the list of files
if (!eagerLimitMaxMessagesPerPoll && maxMessagesPerPoll > 0) {
if (files.size() > maxMessagesPerPoll) {
log.debug("Limiting maximum messages to poll at {} files as there were more messages in this poll.", maxMessagesPerPoll);
// must first remove excessive files from the in progress repository
removeExcessiveInProgressFiles(q, maxMessagesPerPoll);
}
}
// consume files one by one
int total = exchanges.size();
if (total > 0) {
log.debug("Total {} files to consume", total);
}
int polledMessages = processBatch(CastUtils.cast(q));
postPollCheck(polledMessages);
return polledMessages;
}
public int processBatch(Queue<Object> exchanges) {
int total = exchanges.size();
int answer = total;
// limit if needed
if (maxMessagesPerPoll > 0 && total > maxMessagesPerPoll) {
log.debug("Limiting to maximum messages to poll {} as there were {} messages in this poll.", maxMessagesPerPoll, total);
total = maxMessagesPerPoll;
}
for (int index = 0; index < total && isBatchAllowed(); index++) {
// only loop if we are started (allowed to run)
// use poll to remove the head so it does not consume memory even after we have processed it
Exchange exchange = (Exchange) exchanges.poll();
// add current index and total as properties
exchange.setProperty(Exchange.BATCH_INDEX, index);
exchange.setProperty(Exchange.BATCH_SIZE, total);
exchange.setProperty(Exchange.BATCH_COMPLETE, index == total - 1);
// update pending number of exchanges
pendingExchanges = total - index - 1;
// process the current exchange
boolean started;
if (customProcessor != null) {
// use a custom processor
started = customProcessExchange(exchange, customProcessor);
} else {
// process the exchange regular
started = processExchange(exchange);
}
// if we did not start process the file then decrement the counter
if (!started) {
answer--;
}
}
// drain any in progress files as we are done with this batch
removeExcessiveInProgressFiles(CastUtils.cast((Deque<?>) exchanges, Exchange.class), 0);
return answer;
}
/**
* Drain any in progress files as we are done with this batch
*
* @param exchanges the exchanges
* @param limit the limit
*/
protected void removeExcessiveInProgressFiles(Deque<Exchange> exchanges, int limit) {
// remove the file from the in progress list in case the batch was limited by max messages per poll
while (exchanges.size() > limit) {
// must remove last
Exchange exchange = exchanges.removeLast();
GenericFile<?> file = exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE, GenericFile.class);
String key = file.getAbsoluteFilePath();
endpoint.getInProgressRepository().remove(key);
}
}
/**
* Drain any in progress files as we are done with the files
*
* @param files the files
*/
protected void removeExcessiveInProgressFiles(List<GenericFile<T>> files) {
for (GenericFile file : files) {
String key = file.getAbsoluteFilePath();
endpoint.getInProgressRepository().remove(key);
}
}
/**
* Whether or not we can continue polling for more files
*
* @param fileList the current list of gathered files
* @return <tt>true</tt> to continue, <tt>false</tt> to stop due hitting maxMessagesPerPoll limit
*/
public boolean canPollMoreFiles(List<?> fileList) {
// at this point we should not limit if we are not eager
if (!eagerLimitMaxMessagesPerPoll) {
return true;
}
if (maxMessagesPerPoll <= 0) {
// no limitation
return true;
}
// then only poll if we haven't reached the max limit
return fileList.size() < maxMessagesPerPoll;
}
/**
* Override if required. Perform some checks (and perhaps actions) before we poll.
*
* @return <tt>true</tt> to poll, <tt>false</tt> to skip this poll.
*/
protected boolean prePollCheck() throws Exception {
return true;
}
/**
* Override if required. Perform some checks (and perhaps actions) after we have polled.
*
* @param polledMessages number of polled messages
*/
protected void postPollCheck(int polledMessages) {
// noop
}
/**
* Polls the given directory for files to process
*
* @param fileName current directory or file
* @param fileList current list of files gathered
* @param depth the current depth of the directory (will start from 0)
* @return whether or not to continue polling, <tt>false</tt> means the maxMessagesPerPoll limit has been hit
*/
protected abstract boolean pollDirectory(String fileName, List<GenericFile<T>> fileList, int depth);
/**
* Sets the operations to be used.
* <p/>
* Can be used to set a fresh operations in case of recovery attempts
*
* @param operations the operations
*/
public void setOperations(GenericFileOperations<T> operations) {
this.operations = operations;
}
/**
* Whether to ignore if the file cannot be retrieved.
* <p/>
* By default an {@link GenericFileOperationFailedException} is thrown if the file cannot be retrieved.
* <p/>
* This method allows to suppress this and just ignore that.
*
* @param name the file name
* @param exchange the exchange
* @param cause optional exception occurred during retrieving file
* @return <tt>true</tt> to ignore, <tt>false</tt> is the default.
*/
protected boolean ignoreCannotRetrieveFile(String name, Exchange exchange, Exception cause) {
return false;
}
/**
* Processes the exchange
*
* @param exchange the exchange
* @return <tt>true</tt> if the file was started to be processed, <tt>false</tt> if the file was not started
* to be processed, for some reason (not found, or aborted etc)
*/
protected boolean processExchange(final Exchange exchange) {
GenericFile<T> file = getExchangeFileProperty(exchange);
log.trace("Processing file: {}", file);
// must extract the absolute name before the begin strategy as the file could potentially be pre moved
// and then the file name would be changed
String absoluteFileName = file.getAbsoluteFilePath();
// check if we can begin processing the file
final GenericFileProcessStrategy<T> processStrategy = endpoint.getGenericFileProcessStrategy();
Exception beginCause = null;
boolean begin = false;
try {
begin = processStrategy.begin(operations, endpoint, exchange, file);
} catch (Exception e) {
beginCause = e;
}
if (!begin) {
// no something was wrong, so we need to abort and remove the file from the in progress list
Exception abortCause = null;
log.debug("{} cannot begin processing file: {}", endpoint, file);
try {
// abort
processStrategy.abort(operations, endpoint, exchange, file);
} catch (Exception e) {
abortCause = e;
} finally {
// begin returned false, so remove file from the in progress list as its no longer in progress
endpoint.getInProgressRepository().remove(absoluteFileName);
}
if (beginCause != null) {
String msg = endpoint + " cannot begin processing file: " + file + " due to: " + beginCause.getMessage();
handleException(msg, beginCause);
}
if (abortCause != null) {
String msg2 = endpoint + " cannot abort processing file: " + file + " due to: " + abortCause.getMessage();
handleException(msg2, abortCause);
}
return false;
}
// must use file from exchange as it can be updated due the
// preMoveNamePrefix/preMoveNamePostfix options
final GenericFile<T> target = getExchangeFileProperty(exchange);
// we can begin processing the file so update file headers on the Camel message
// in case it took some time to acquire read lock, and file size/timestamp has been updated since etc
updateFileHeaders(target, exchange.getIn());
// must use full name when downloading so we have the correct path
final String name = target.getAbsoluteFilePath();
try {
if (isRetrieveFile()) {
// retrieve the file using the stream
log.trace("Retrieving file: {} from: {}", name, endpoint);
// retrieve the file and check it was a success
boolean retrieved;
Exception cause = null;
try {
retrieved = operations.retrieveFile(name, exchange);
} catch (Exception e) {
retrieved = false;
cause = e;
}
if (!retrieved) {
if (ignoreCannotRetrieveFile(name, exchange, cause)) {
log.trace("Cannot retrieve file {} maybe it does not exists. Ignoring.", name);
// remove file from the in progress list as we could not retrieve it, but should ignore
endpoint.getInProgressRepository().remove(absoluteFileName);
return false;
} else {
// throw exception to handle the problem with retrieving the file
// then if the method return false or throws an exception is handled the same in here
// as in both cases an exception is being thrown
if (cause != null && cause instanceof GenericFileOperationFailedException) {
throw cause;
} else {
throw new GenericFileOperationFailedException("Cannot retrieve file: " + file + " from: " + endpoint, cause);
}
}
}
log.trace("Retrieved file: {} from: {}", name, endpoint);
} else {
log.trace("Skipped retrieval of file: {} from: {}", name, endpoint);
exchange.getIn().setBody(null);
}
// register on completion callback that does the completion strategies
// (for instance to move the file after we have processed it)
exchange.addOnCompletion(new GenericFileOnCompletion<T>(endpoint, operations, target, absoluteFileName));
log.debug("About to process file: {} using exchange: {}", target, exchange);
if (endpoint.isSynchronous()) {
// process synchronously
getProcessor().process(exchange);
} else {
// process the exchange using the async consumer to support async routing engine
// which can be supported by this file consumer as all the done work is
// provided in the GenericFileOnCompletion
getAsyncProcessor().process(exchange, new AsyncCallback() {
public void done(boolean doneSync) {
// noop
if (log.isTraceEnabled()) {
log.trace("Done processing file: {} {}", target, doneSync ? "synchronously" : "asynchronously");
}
}
});
}
} catch (Exception e) {
// remove file from the in progress list due to failure
// (cannot be in finally block due to GenericFileOnCompletion will remove it
// from in progress when it takes over and processes the file, which may happen
// by another thread at a later time. So its only safe to remove it if there was an exception)
endpoint.getInProgressRepository().remove(absoluteFileName);
String msg = "Error processing file " + file + " due to " + e.getMessage();
handleException(msg, e);
}
return true;
}
/**
* Updates the information on {@link Message} after we have acquired read-lock and
* can begin process the file.
*
* @param file the file
* @param message the Camel message to update its headers
*/
protected abstract void updateFileHeaders(GenericFile<T> file, Message message);
/**
* Override if required. Files are retrieved / returns true by default
*
* @return <tt>true</tt> to retrieve files, <tt>false</tt> to skip retrieval of files.
*/
protected boolean isRetrieveFile() {
return true;
}
/**
* Processes the exchange using a custom processor.
*
* @param exchange the exchange
* @param processor the custom processor
*/
protected boolean customProcessExchange(final Exchange exchange, final Processor processor) {
GenericFile<T> file = getExchangeFileProperty(exchange);
log.trace("Custom processing file: {}", file);
// must extract the absolute name before the begin strategy as the file could potentially be pre moved
// and then the file name would be changed
String absoluteFileName = file.getAbsoluteFilePath();
try {
// process using the custom processor
processor.process(exchange);
} catch (Exception e) {
if (log.isDebugEnabled()) {
log.debug(endpoint + " error custom processing: " + file + " due to: " + e.getMessage() + ". This exception will be ignored.", e);
}
handleException(e);
} finally {
// always remove file from the in progress list as its no longer in progress
// use the original file name that was used to add it to the repository
// as the name can be different when using preMove option
endpoint.getInProgressRepository().remove(absoluteFileName);
}
return true;
}
/**
* Strategy for validating if the given remote file should be included or not
*
* @param file the file
* @param isDirectory whether the file is a directory or a file
* @param files files in the directory
* @return <tt>true</tt> to include the file, <tt>false</tt> to skip it
*/
protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, List<T> files) {
String absoluteFilePath = file.getAbsoluteFilePath();
if (!isMatched(file, isDirectory, files)) {
log.trace("File did not match. Will skip this file: {}", file);
return false;
}
// directory is always valid
if (isDirectory) {
return true;
}
// check if file is already in progress
if (endpoint.getInProgressRepository().contains(absoluteFilePath)) {
if (log.isTraceEnabled()) {
log.trace("Skipping as file is already in progress: {}", file.getFileName());
}
return false;
}
// if its a file then check we have the file in the idempotent registry already
if (endpoint.isIdempotent()) {
// use absolute file path as default key, but evaluate if an expression key was configured
String key = file.getAbsoluteFilePath();
if (endpoint.getIdempotentKey() != null) {
Exchange dummy = endpoint.createExchange(file);
key = endpoint.getIdempotentKey().evaluate(dummy, String.class);
}
if (key != null && endpoint.getIdempotentRepository().contains(key)) {
log.trace("This consumer is idempotent and the file has been consumed before matching idempotentKey: {}. Will skip this file: {}", key, file);
return false;
}
}
// okay so final step is to be able to add atomic as in-progress, so we are the
// only thread processing this file
return endpoint.getInProgressRepository().add(absoluteFilePath);
}
/**
* Strategy to perform file matching based on endpoint configuration.
* <p/>
* Will always return <tt>false</tt> for certain files/folders:
* <ul>
* <li>Starting with a dot</li>
* <li>lock files</li>
* </ul>
* And then <tt>true</tt> for directories.
*
* @param file the file
* @param isDirectory whether the file is a directory or a file
* @param files files in the directory
* @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
*/
protected boolean isMatched(GenericFile<T> file, boolean isDirectory, List<T> files) {
String name = file.getFileNameOnly();
// folders/names starting with dot is always skipped (eg. ".", ".camel", ".camelLock")
if (name.startsWith(".")) {
return false;
}
// lock files should be skipped
if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) {
return false;
}
if (endpoint.getFilter() != null) {
if (!endpoint.getFilter().accept(file)) {
return false;
}
}
if (endpoint.getAntFilter() != null) {
if (!endpoint.getAntFilter().accept(file)) {
return false;
}
}
if (isDirectory && endpoint.getFilterDirectory() != null) {
// create a dummy exchange as Exchange is needed for expression evaluation
Exchange dummy = endpoint.createExchange(file);
boolean matches = endpoint.getFilterDirectory().matches(dummy);
if (!matches) {
return false;
}
}
// directories are regarded as matched if filter accepted them
if (isDirectory) {
return true;
}
// exclude take precedence over include
if (excludePattern != null) {
if (excludePattern.matcher(name).matches()) {
return false;
}
}
if (includePattern != null) {
if (!includePattern.matcher(name).matches()) {
return false;
}
}
// use file expression for a simple dynamic file filter
if (endpoint.getFileName() != null) {
fileExpressionResult = evaluateFileExpression();
if (fileExpressionResult != null) {
if (!name.equals(fileExpressionResult)) {
return false;
}
}
}
if (endpoint.getFilterFile() != null) {
// create a dummy exchange as Exchange is needed for expression evaluation
Exchange dummy = endpoint.createExchange(file);
boolean matches = endpoint.getFilterFile().matches(dummy);
if (!matches) {
return false;
}
}
// if done file name is enabled, then the file is only valid if a done file exists
if (endpoint.getDoneFileName() != null) {
// done file must be in same path as the file
String doneFileName = endpoint.createDoneFileName(file.getAbsoluteFilePath());
StringHelper.notEmpty(doneFileName, "doneFileName", endpoint);
// is it a done file name?
if (endpoint.isDoneFile(file.getFileNameOnly())) {
log.trace("Skipping done file: {}", file);
return false;
}
if (!isMatched(file, doneFileName, files)) {
return false;
}
}
return true;
}
/**
* Strategy to perform file matching based on endpoint configuration in terms of done file name.
*
* @param file the file
* @param doneFileName the done file name (without any paths)
* @param files files in the directory
* @return <tt>true</tt> if the file is matched, <tt>false</tt> if not
*/
protected abstract boolean isMatched(GenericFile<T> file, String doneFileName, List<T> files);
/**
* Is the given file already in progress.
*
* @param file the file
* @return <tt>true</tt> if the file is already in progress
* @deprecated no longer in use, use {@link org.apache.camel.component.file.GenericFileEndpoint#getInProgressRepository()} instead.
*/
@Deprecated
protected boolean isInProgress(GenericFile<T> file) {
String key = file.getAbsoluteFilePath();
// must use add, to have operation as atomic
return !endpoint.getInProgressRepository().add(key);
}
protected String evaluateFileExpression() {
if (fileExpressionResult == null && endpoint.getFileName() != null) {
// create a dummy exchange as Exchange is needed for expression evaluation
Exchange dummy = endpoint.createExchange();
fileExpressionResult = endpoint.getFileName().evaluate(dummy, String.class);
}
return fileExpressionResult;
}
@SuppressWarnings("unchecked")
private GenericFile<T> getExchangeFileProperty(Exchange exchange) {
return (GenericFile<T>) exchange.getProperty(FileComponent.FILE_EXCHANGE_FILE);
}
@Override
protected void doStart() throws Exception {
super.doStart();
}
@Override
protected void doStop() throws Exception {
prepareOnStartup = false;
super.doStop();
}
}