package mil.nga.giat.geowave.mapreduce.input;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import mil.nga.giat.geowave.core.store.DataStore;
import mil.nga.giat.geowave.core.store.GeoWaveStoreFinder;
import mil.nga.giat.geowave.core.store.StoreFactoryFamilySpi;
import mil.nga.giat.geowave.core.store.adapter.AdapterIndexMappingStore;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatisticsStore;
import mil.nga.giat.geowave.core.store.index.IndexStore;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.core.store.query.DistributableQuery;
import mil.nga.giat.geowave.core.store.query.QueryOptions;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
import mil.nga.giat.geowave.mapreduce.JobContextAdapterStore;
import mil.nga.giat.geowave.mapreduce.JobContextIndexStore;
import mil.nga.giat.geowave.mapreduce.MapReduceDataStore;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputConfigurator.InputConfig;
public class GeoWaveInputFormat<T> extends
InputFormat<GeoWaveInputKey, T>
{
private static final Class<?> CLASS = GeoWaveInputFormat.class;
protected static final Logger LOGGER = LoggerFactory.getLogger(CLASS);
public static void setStoreOptionsMap(
final Configuration config,
final Map<String, String> storeConfigOptions ) {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
storeConfigOptions);
}
public static void setStoreOptions(
final Configuration config,
final DataStorePluginOptions storeOptions ) {
if (storeOptions != null) {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
storeOptions.getOptionsAsMap());
}
else {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
null);
}
}
public static IndexStore getJobContextIndexStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextIndexStore(
CLASS,
context);
}
public static AdapterIndexMappingStore getJobContextAdapterIndexMappingStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextAdapterIndexMappingStore(
CLASS,
context);
}
public static AdapterStore getJobContextAdapterStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextAdapterStore(
CLASS,
context);
}
public static DataStatisticsStore getJobContextDataStatisticsStore(
final JobContext context ) {
// TODO, this doesn't create a data statistics store wrapping a
// jobcontext as the name implies, need to either wrap a job context or
// rename this (for adapter and index store, adapters and indices are
// stored in the job context rather than multiple processes needing to
// look it up, this doesn't seem to be happening for stats)
return GeoWaveConfiguratorBase.getDataStatisticsStore(
CLASS,
context);
}
public static void setMinimumSplitCount(
final Configuration config,
final Integer minSplits ) {
GeoWaveInputConfigurator.setMinimumSplitCount(
CLASS,
config,
minSplits);
}
public static void setMaximumSplitCount(
final Configuration config,
final Integer maxSplits ) {
GeoWaveInputConfigurator.setMaximumSplitCount(
CLASS,
config,
maxSplits);
}
public static void setIsOutputWritable(
final Configuration config,
final Boolean isOutputWritable ) {
config.setBoolean(
GeoWaveConfiguratorBase.enumToConfKey(
CLASS,
InputConfig.OUTPUT_WRITABLE),
isOutputWritable);
}
public static void setQuery(
final Configuration config,
final DistributableQuery query ) {
GeoWaveInputConfigurator.setQuery(
CLASS,
config,
query);
}
protected static DistributableQuery getQuery(
final JobContext context ) {
return GeoWaveInputConfigurator.getQuery(
CLASS,
context);
}
public static void setQueryOptions(
final Configuration config,
final QueryOptions queryOptions ) {
final PrimaryIndex index = queryOptions.getIndex();
if (index != null) {
// make available to the context index store
JobContextIndexStore.addIndex(
config,
index);
}
try {
// THIS SHOULD GO AWAY, and assume the adapters in the Persistent
// Data Store
// instead. It will fail, due to the 'null', if the query options
// does not
// contain the adapters
for (final DataAdapter<?> adapter : queryOptions.getAdaptersArray(null)) {
// Also store for use the mapper and reducers
JobContextAdapterStore.addDataAdapter(
config,
adapter);
}
}
catch (final Exception e) {
LOGGER
.warn(
"Adapter Ids witih adapters are included in the query options.This, the adapter must be accessible from the data store for use by the consumer/Mapper.",
e);
}
GeoWaveInputConfigurator.setQueryOptions(
CLASS,
config,
queryOptions);
}
protected static QueryOptions getQueryOptions(
final JobContext context ) {
final QueryOptions options = GeoWaveInputConfigurator.getQueryOptions(
CLASS,
context);
return options == null ? new QueryOptions() : options;
}
protected static PrimaryIndex getIndex(
final JobContext context ) {
return GeoWaveInputConfigurator.getIndex(
CLASS,
GeoWaveConfiguratorBase.getConfiguration(context));
}
protected static Boolean isOutputWritable(
final JobContext context ) {
return GeoWaveConfiguratorBase.getConfiguration(
context).getBoolean(
GeoWaveConfiguratorBase.enumToConfKey(
CLASS,
InputConfig.OUTPUT_WRITABLE),
false);
}
protected static Integer getMinimumSplitCount(
final JobContext context ) {
return GeoWaveInputConfigurator.getMinimumSplitCount(
CLASS,
context);
}
protected static Integer getMaximumSplitCount(
final JobContext context ) {
return GeoWaveInputConfigurator.getMaximumSplitCount(
CLASS,
context);
}
@Override
public RecordReader<GeoWaveInputKey, T> createRecordReader(
final InputSplit split,
final TaskAttemptContext context )
throws IOException,
InterruptedException {
final Map<String, String> configOptions = getStoreOptionsMap(context);
final DataStore dataStore = GeoWaveStoreFinder.createDataStore(configOptions);
final AdapterStore adapterStore = getJobContextAdapterStore(context);
if ((dataStore != null) && (dataStore instanceof MapReduceDataStore)) {
final QueryOptions queryOptions = getQueryOptions(context);
final QueryOptions rangeQueryOptions = new QueryOptions(
queryOptions);
return (RecordReader<GeoWaveInputKey, T>) ((MapReduceDataStore) dataStore).createRecordReader(
getQuery(context),
rangeQueryOptions,
adapterStore,
getJobContextDataStatisticsStore(context),
getJobContextIndexStore(context),
isOutputWritable(
context).booleanValue(),
split);
}
LOGGER.error("Data Store does not support map reduce");
throw new IOException(
"Data Store does not support map reduce");
}
/**
* Check whether a configuration is fully configured to be used with an
* Accumulo {@link org.apache.hadoop.mapreduce.InputFormat}.
*
* @param context
* the Hadoop context for the configured job
* @throws IOException
* if the context is improperly configured
* @since 1.5.0
*/
protected static void validateOptions(
final JobContext context )
throws IOException {// attempt to get each of the GeoWave
// stores
// from the job context
try {
final Map<String, String> configOptions = getStoreOptionsMap(context);
final StoreFactoryFamilySpi factoryFamily = GeoWaveStoreFinder.findStoreFamily(configOptions);
if (factoryFamily == null) {
final String msg = "Unable to find GeoWave data store";
LOGGER.warn(msg);
throw new IOException(
msg);
}
}
catch (final Exception e) {
LOGGER.warn(
"Error finding GeoWave stores",
e);
throw new IOException(
"Error finding GeoWave stores",
e);
}
}
public static DataStorePluginOptions getStoreOptions(
final JobContext context ) {
return GeoWaveConfiguratorBase.getStoreOptions(
CLASS,
context);
}
public static Map<String, String> getStoreOptionsMap(
final JobContext context ) {
return GeoWaveConfiguratorBase.getStoreOptionsMap(
CLASS,
context);
}
@Override
public List<InputSplit> getSplits(
final JobContext context )
throws IOException,
InterruptedException {
final Map<String, String> configOptions = getStoreOptionsMap(context);
final DataStore dataStore = GeoWaveStoreFinder.createDataStore(configOptions);
final AdapterStore adapterStore = getJobContextAdapterStore(context);
if ((dataStore != null) && (dataStore instanceof MapReduceDataStore)) {
final QueryOptions queryOptions = getQueryOptions(context);
final QueryOptions rangeQueryOptions = new QueryOptions(
queryOptions);
return ((MapReduceDataStore) dataStore).getSplits(
getQuery(context),
rangeQueryOptions,
adapterStore,
getJobContextDataStatisticsStore(context),
getJobContextIndexStore(context),
getMinimumSplitCount(context),
getMaximumSplitCount(context));
}
LOGGER.error("Data Store does not support map reduce");
throw new IOException(
"Data Store does not support map reduce");
}
}