package mil.nga.giat.geowave.core.ingest.hdfs;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import mil.nga.giat.geowave.core.ingest.avro.AvroFormatPlugin;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A class to hold intermediate stage data that must be used throughout the life
* of the HDFS stage process.
*/
public class StageRunData
{
private final static Logger LOGGER = LoggerFactory.getLogger(StageRunData.class);
private final Map<String, DataFileWriter> cachedWriters = new HashMap<String, DataFileWriter>();
private final Path hdfsBaseDirectory;
private final FileSystem fs;
public StageRunData(
final Path hdfsBaseDirectory,
final FileSystem fs ) {
this.hdfsBaseDirectory = hdfsBaseDirectory;
this.fs = fs;
}
public DataFileWriter getWriter(
final String typeName,
final AvroFormatPlugin plugin ) {
return getDataWriterCreateIfNull(
typeName,
plugin);
}
private synchronized DataFileWriter getDataWriterCreateIfNull(
final String typeName,
final AvroFormatPlugin plugin ) {
if (!cachedWriters.containsKey(typeName)) {
FSDataOutputStream out = null;
final DataFileWriter dfw = new DataFileWriter(
new GenericDatumWriter());
cachedWriters.put(
typeName,
dfw);
dfw.setCodec(CodecFactory.snappyCodec());
try {
// TODO: we should probably clean up the type name to make it
// HDFS path safe in case there are invalid characters
// also, if a file already exists do we want to delete it or
// append to it?
out = fs.create(new Path(
hdfsBaseDirectory,
typeName));
dfw.create(
plugin.getAvroSchema(),
out);
}
catch (final IOException e) {
LOGGER.error(
"Unable to create output stream",
e);
// cache a null value so we don't continually try to recreate
cachedWriters.put(
typeName,
null);
return null;
}
}
return cachedWriters.get(typeName);
}
public synchronized void close() {
for (final DataFileWriter dfw : cachedWriters.values()) {
try {
dfw.close();
}
catch (final IOException e) {
LOGGER.warn(
"Unable to close sequence file stream",
e);
}
}
cachedWriters.clear();
}
}