package mil.nga.giat.geowave.core.ingest.hdfs;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.apache.avro.file.DataFileWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import mil.nga.giat.geowave.core.ingest.avro.AvroFormatPlugin;
import mil.nga.giat.geowave.core.ingest.local.AbstractLocalFileDriver;
import mil.nga.giat.geowave.core.ingest.local.LocalInputCommandLineOptions;
/**
* This class actually executes the staging of data to HDFS based on the
* available type plugin providers that are discovered through SPI.
*/
public class StageToHdfsDriver extends
AbstractLocalFileDriver<AvroFormatPlugin<?, ?>, StageRunData>
{
private final static Logger LOGGER = LoggerFactory.getLogger(StageToHdfsDriver.class);
private final Map<String, AvroFormatPlugin<?, ?>> ingestPlugins;
private final String hdfsHostPort;
private final String basePath;
public StageToHdfsDriver(
Map<String, AvroFormatPlugin<?, ?>> ingestPlugins,
String hdfsHostPort,
String basePath,
LocalInputCommandLineOptions inputOptions ) {
super(
inputOptions);
this.ingestPlugins = ingestPlugins;
this.hdfsHostPort = hdfsHostPort;
this.basePath = basePath;
}
@Override
protected void processFile(
final File file,
final String typeName,
final AvroFormatPlugin<?, ?> plugin,
final StageRunData runData ) {
final DataFileWriter writer = runData.getWriter(
typeName,
plugin);
if (writer != null) {
final Object[] objs = plugin.toAvroObjects(file);
for (final Object obj : objs) {
try {
writer.append(obj);
}
catch (final IOException e) {
LOGGER.error(
"Cannot append data to sequence file",
e);
}
}
}
}
public boolean runOperation(
String inputPath ) {
// first collect the stage to hdfs plugins
final Map<String, AvroFormatPlugin<?, ?>> stageToHdfsPlugins = ingestPlugins;
final Configuration conf = new Configuration();
conf.set(
"fs.defaultFS",
hdfsHostPort);
conf.set(
"fs.hdfs.impl",
org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
final Path hdfsBaseDirectory = new Path(
basePath);
try {
try (final FileSystem fs = FileSystem.get(conf)) {
if (!fs.exists(hdfsBaseDirectory)) {
fs.mkdirs(hdfsBaseDirectory);
}
try {
final StageRunData runData = new StageRunData(
hdfsBaseDirectory,
fs);
processInput(
inputPath,
stageToHdfsPlugins,
runData);
runData.close();
return true;
}
catch (final IOException e) {
LOGGER.error(
"Unexpected I/O exception when reading input files",
e);
return false;
}
}
}
catch (final IOException e) {
LOGGER.error(
"Unable to create remote HDFS directory",
e);
return false;
}
}
}