package mil.nga.giat.geowave.adapter.vector.export;
import java.io.IOException;
import java.util.List;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.geotools.filter.text.cql2.CQLException;
import com.beust.jcommander.JCommander;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import mil.nga.giat.geowave.adapter.vector.GeotoolsFeatureDataAdapter;
import mil.nga.giat.geowave.adapter.vector.avro.AvroSimpleFeatureCollection;
import mil.nga.giat.geowave.adapter.vector.query.cql.CQLQuery;
import mil.nga.giat.geowave.core.cli.operations.config.options.ConfigOptions;
import mil.nga.giat.geowave.core.cli.parser.CommandLineOperationParams;
import mil.nga.giat.geowave.core.cli.parser.OperationParser;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.index.Index;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.core.store.query.DistributableQuery;
import mil.nga.giat.geowave.core.store.query.QueryOptions;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputFormat;
public class VectorMRExportJobRunner extends
Configured implements
Tool
{
private static final Logger LOGGER = LoggerFactory.getLogger(VectorMRExportCommand.class);
public static final String BATCH_SIZE_KEY = "BATCH_SIZE";
private final DataStorePluginOptions storeOptions;
private final VectorMRExportOptions mrOptions;
private final String hdfsHostPort;
private final String hdfsPath;
public VectorMRExportJobRunner(
final DataStorePluginOptions storeOptions,
final VectorMRExportOptions mrOptions,
final String hdfsHostPort,
final String hdfsPath ) {
this.storeOptions = storeOptions;
this.mrOptions = mrOptions;
this.hdfsHostPort = hdfsHostPort;
this.hdfsPath = hdfsPath;
}
/**
* Main method to execute the MapReduce analytic.
*/
public int runJob()
throws CQLException,
IOException,
InterruptedException,
ClassNotFoundException {
Configuration conf = super.getConf();
if (conf == null) {
conf = new Configuration();
setConf(conf);
}
GeoWaveConfiguratorBase.setRemoteInvocationParams(
hdfsHostPort,
mrOptions.getResourceManagerHostPort(),
conf);
final QueryOptions options = new QueryOptions();
final List<String> adapterIds = mrOptions.getAdapterIds();
final AdapterStore adapterStore = storeOptions.createAdapterStore();
if ((adapterIds != null) && !adapterIds.isEmpty()) {
options.setAdapters(Lists.transform(
adapterIds,
new Function<String, DataAdapter<?>>() {
@Override
public DataAdapter<?> apply(
final String input ) {
return adapterStore.getAdapter(new ByteArrayId(
input));
}
}));
}
conf.setInt(
BATCH_SIZE_KEY,
mrOptions.getBatchSize());
if (mrOptions.getIndexId() != null) {
final Index index = storeOptions.createIndexStore().getIndex(
new ByteArrayId(
mrOptions.getIndexId()));
if (index == null) {
JCommander.getConsole().println(
"Unable to find index '" + mrOptions.getIndexId() + "' in store");
return -1;
}
if (index instanceof PrimaryIndex) {
options.setIndex((PrimaryIndex) index);
}
else {
JCommander.getConsole().println(
"Index '" + mrOptions.getIndexId() + "' is not a primary index");
return -1;
}
}
if (mrOptions.getCqlFilter() != null) {
if ((adapterIds == null) || (adapterIds.size() != 1)) {
JCommander.getConsole().println(
"Exactly one type is expected when using CQL filter");
return -1;
}
final String adapterId = adapterIds.get(0);
final DataAdapter<?> adapter = storeOptions.createAdapterStore().getAdapter(
new ByteArrayId(
adapterId));
if (adapter == null) {
JCommander.getConsole().println(
"Type '" + adapterId + "' not found");
return -1;
}
if (!(adapter instanceof GeotoolsFeatureDataAdapter)) {
JCommander.getConsole().println(
"Type '" + adapterId + "' does not support vector export");
return -1;
}
GeoWaveInputFormat.setQuery(
conf,
(DistributableQuery) CQLQuery.createOptimalQuery(
mrOptions.getCqlFilter(),
(GeotoolsFeatureDataAdapter) adapter,
options.getIndex(),
null));
}
GeoWaveInputFormat.setStoreOptions(
conf,
storeOptions);
// the above code is a temporary placeholder until this gets merged with
// the new commandline options
GeoWaveInputFormat.setQueryOptions(
conf,
options);
final Job job = new Job(
conf);
job.setJarByClass(this.getClass());
job.setJobName("Exporting to " + hdfsPath);
FileOutputFormat.setCompressOutput(
job,
true);
FileOutputFormat.setOutputPath(
job,
new Path(
hdfsPath));
job.setMapperClass(VectorExportMapper.class);
job.setInputFormatClass(GeoWaveInputFormat.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
job.setMapOutputKeyClass(AvroKey.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
AvroJob.setOutputKeySchema(
job,
AvroSimpleFeatureCollection.SCHEMA$);
AvroJob.setMapOutputKeySchema(
job,
AvroSimpleFeatureCollection.SCHEMA$);
GeoWaveInputFormat.setMinimumSplitCount(
job.getConfiguration(),
mrOptions.getMinSplits());
GeoWaveInputFormat.setMaximumSplitCount(
job.getConfiguration(),
mrOptions.getMaxSplits());
boolean retVal = false;
try {
retVal = job.waitForCompletion(true);
}
catch (final IOException ex) {
LOGGER.error(
"Error waiting for map reduce tile resize job: ",
ex);
}
return retVal ? 0 : 1;
}
public static void main(
final String[] args )
throws Exception {
final ConfigOptions opts = new ConfigOptions();
final OperationParser parser = new OperationParser();
parser.addAdditionalObject(opts);
final VectorMRExportCommand command = new VectorMRExportCommand();
final CommandLineOperationParams params = parser.parse(
command,
args);
opts.prepare(params);
final int res = ToolRunner.run(
new Configuration(),
command.createRunner(params),
args);
System.exit(res);
}
@Override
public int run(
final String[] args )
throws Exception {
return runJob();
}
}