package mil.nga.giat.geowave.core.ingest.hdfs.mapreduce;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.PersistenceUtils;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.core.ingest.GeoWaveData;
import mil.nga.giat.geowave.core.ingest.avro.AbstractStageWholeFileToAvro;
import mil.nga.giat.geowave.core.ingest.avro.WholeFile;
import mil.nga.giat.geowave.core.ingest.local.LocalFileIngestPlugin;
import mil.nga.giat.geowave.core.store.CloseableIterator;
import mil.nga.giat.geowave.core.store.adapter.WritableDataAdapter;
import mil.nga.giat.geowave.core.store.index.CommonIndexValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterators;
/**
* This class can be sub-classed as a general-purpose recipe for parallelizing
* ingestion of files either locally or by directly staging the binary of the
* file to HDFS and then ingesting it within the map phase of a map-reduce job.
*/
abstract public class AbstractLocalIngestWithMapper<T> extends
AbstractStageWholeFileToAvro implements
LocalFileIngestPlugin<T>,
IngestFromHdfsPlugin<WholeFile, T>
{
private final static Logger LOGGER = LoggerFactory.getLogger(AbstractLocalIngestWithMapper.class);
@Override
public boolean isUseReducerPreferred() {
return false;
}
@Override
public IngestWithMapper<WholeFile, T> ingestWithMapper() {
return new InternalIngestWithMapper<T>(
this);
}
@Override
public CloseableIterator<GeoWaveData<T>> toGeoWaveData(
final File input,
final Collection<ByteArrayId> primaryIndexIds,
final String globalVisibility ) {
try (final InputStream inputStream = new FileInputStream(
input)) {
return toGeoWaveDataInternal(
inputStream,
primaryIndexIds,
globalVisibility);
}
catch (final IOException e) {
LOGGER.warn(
"Cannot open file, unable to ingest",
e);
}
return new CloseableIterator.Wrapper(
Iterators.emptyIterator());
}
abstract protected CloseableIterator<GeoWaveData<T>> toGeoWaveDataInternal(
final InputStream file,
final Collection<ByteArrayId> primaryIndexIds,
final String globalVisibility );
@Override
public IngestWithReducer<WholeFile, ?, ?, T> ingestWithReducer() {
return null;
}
private static class InternalIngestWithMapper<T> implements
IngestWithMapper<WholeFile, T>
{
private AbstractLocalIngestWithMapper parentPlugin;
public InternalIngestWithMapper() {}
public InternalIngestWithMapper(
final AbstractLocalIngestWithMapper parentPlugin ) {
this.parentPlugin = parentPlugin;
}
@Override
public WritableDataAdapter<T>[] getDataAdapters(
final String globalVisibility ) {
return parentPlugin.getDataAdapters(globalVisibility);
}
@Override
public CloseableIterator<GeoWaveData<T>> toGeoWaveData(
final WholeFile input,
final Collection<ByteArrayId> primaryIndexIds,
final String globalVisibility ) {
final InputStream inputStream = new ByteBufferBackedInputStream(
input.getOriginalFile());
return parentPlugin.toGeoWaveDataInternal(
inputStream,
primaryIndexIds,
globalVisibility);
}
@Override
public byte[] toBinary() {
return StringUtils.stringToBinary(parentPlugin.getClass().getName());
}
@Override
public void fromBinary(
final byte[] bytes ) {
parentPlugin = PersistenceUtils.classFactory(
StringUtils.stringFromBinary(bytes),
AbstractLocalIngestWithMapper.class);
}
@Override
public Class<? extends CommonIndexValue>[] getSupportedIndexableTypes() {
return parentPlugin.getSupportedIndexableTypes();
}
}
}