package com.cloudera.flume.source;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.conf.Configurable;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.PollableSource;
import org.apache.flume.EventDeliveryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.sshHelper.SshClientJ;
public class SshSpoolDirectorySource extends AbstractSource
implements Configurable, PollableSource
{
private static final Logger logger =
LoggerFactory.getLogger(SshSpoolDirectorySource.class);
private String hostName, userName, userPass;
private String remoteSpoolPath;
private String localPersistPath;
private SshClientJ sshClient ;
private SshSpoolStateManager filesState;
@Override
public void configure(Context context) {
hostName = context.getString( SshSpoolDirectorySourceConstants.HOST_NAME );
userName = context.getString( SshSpoolDirectorySourceConstants.USER_NAME );
userPass = context.getString( SshSpoolDirectorySourceConstants.USER_PASS );
remoteSpoolPath = context.getString( SshSpoolDirectorySourceConstants.REMOTE_DIR_PATH );
localPersistPath = context.getString( SshSpoolDirectorySourceConstants.LOCAL_PERSIST_PATH );
sshClient = new SshClientJ( hostName, userName, userPass );
filesState = new SshSpoolStateManager( localPersistPath );
}
@Override
public void start() { }
@Override
public void stop () {
filesState.saveState();
}
@Override
public Status process() throws EventDeliveryException {
// Get pending files
ArrayList< String > pendingFiles;
try {
ArrayList< String > files = sshClient.getFilesInPath( remoteSpoolPath );
filesState.addProcessingList( files );
pendingFiles = filesState.getPending();
} catch (Exception e) {
logger.error( e.toString() );
return Status.BACKOFF;
}
// Start transaction
for( String file: pendingFiles )
{
try {
filesState.markInProcess( file );
File tempFile = sshClient.getTempLocalInstance( file );
if( tempFile == null ) {
logger.error( "Unable to retrieve contents: " + file );
logger.error( "Marking file in error state: " + file );
filesState.markError( file );
continue;
}
Map< String, String > headers = new HashMap< String, String >();
headers.put( "filename", file );
int line_counter = 1;
Scanner s = new Scanner( tempFile )
.useDelimiter( SshSpoolDirectorySourceConstants.RECORD_DELIMITER);
while( s.hasNext() ) {
String record = s.next();
logger.debug("f:" + file + ", l: " + line_counter + ", r: " + record );
headers.put( "line_number", Integer.toString(line_counter++) );
Event e = EventBuilder.withBody( record,
SshSpoolDirectorySourceConstants.FILE_CHARSET, headers );
// Store the Event into this Source's associated Channel(s)
getChannelProcessor().processEvent(e);
}
s.close();
filesState.markFinished( file );
logger.info("Successfully parsed: " + file );
} catch (Throwable t) {
// Log exception, handle individual exceptions as needed
logger.error( "While processing: " + file + " - " + t.toString() );
filesState.markError( file );
}
}
return Status.READY;
}
}