package mil.nga.giat.geowave.cli.osm.mapreduce.Ingest; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.ClientConfiguration; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat; import org.apache.accumulo.core.client.security.tokens.PasswordToken; import org.apache.accumulo.core.data.Mutation; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroKeyInputFormat; import org.apache.commons.cli.MissingArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import mil.nga.giat.geowave.cli.osm.accumulo.osmschema.Schema; import mil.nga.giat.geowave.cli.osm.operations.options.OSMIngestCommandArgs; import mil.nga.giat.geowave.cli.osm.types.generated.Node; import mil.nga.giat.geowave.cli.osm.types.generated.Relation; import mil.nga.giat.geowave.cli.osm.types.generated.Way; import mil.nga.giat.geowave.core.cli.parser.CommandLineOperationParams; import mil.nga.giat.geowave.core.cli.parser.OperationParser; import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions; import mil.nga.giat.geowave.datastore.accumulo.AccumuloDataStoreFactory; import mil.nga.giat.geowave.datastore.accumulo.BasicAccumuloOperations; import mil.nga.giat.geowave.datastore.accumulo.operations.config.AccumuloRequiredOptions; public class OSMRunner extends Configured implements Tool { private static final Logger log = LoggerFactory.getLogger(OSMRunner.class); private org.apache.avro.Schema avroSchema = null; private String inputAvroFile = null; private final OSMIngestCommandArgs ingestOptions; private final AccumuloRequiredOptions accumuloOptions; public static void main( String[] args ) throws Exception { OSMIngestCommandArgs argv = new OSMIngestCommandArgs(); DataStorePluginOptions opts = new DataStorePluginOptions(); opts.selectPlugin(new AccumuloDataStoreFactory().getType()); OperationParser parser = new OperationParser(); parser.addAdditionalObject(argv); parser.addAdditionalObject(opts); CommandLineOperationParams params = parser.parse(args); if (params.getSuccessCode() == 0) { OSMRunner runner = new OSMRunner( argv, opts); int res = ToolRunner.run( new Configuration(), runner, args); System.exit(res); } System.out.println(params.getSuccessMessage()); System.exit(params.getSuccessCode()); } public OSMRunner( OSMIngestCommandArgs ingestOptions, DataStorePluginOptions inputStoreOptions ) { this.ingestOptions = ingestOptions; if (!inputStoreOptions.getType().equals( new AccumuloDataStoreFactory().getType())) { throw new RuntimeException( "Expected accumulo data store"); } this.accumuloOptions = (AccumuloRequiredOptions) inputStoreOptions.getFactoryOptions(); } public void configureSchema( org.apache.avro.Schema avroSchema ) { this.avroSchema = avroSchema; } private void enableLocalityGroups( OSMIngestCommandArgs argv ) throws AccumuloSecurityException, AccumuloException, TableNotFoundException { BasicAccumuloOperations bao = new BasicAccumuloOperations( accumuloOptions.getZookeeper(), accumuloOptions.getInstance(), accumuloOptions.getUser(), accumuloOptions.getPassword(), accumuloOptions.getGeowaveNamespace()); bao.createTable( argv.getOsmTableName(), true, true, null); bao.addLocalityGroup( argv.getOsmTableName(), Schema.CF.NODE); bao.addLocalityGroup( argv.getOsmTableName(), Schema.CF.WAY); bao.addLocalityGroup( argv.getOsmTableName(), Schema.CF.RELATION); } @Override public int run( String[] args ) throws Exception { Configuration conf = this.getConf(); conf.set( "tableName", ingestOptions.getQualifiedTableName()); conf.set( "osmVisibility", ingestOptions.getVisibilityOptions().getVisibility()); // job settings Job job = Job.getInstance( conf, ingestOptions.getJobName()); job.setJarByClass(OSMRunner.class); switch (ingestOptions.getMapperType()) { case "NODE": { configureSchema(Node.getClassSchema()); inputAvroFile = ingestOptions.getNameNode() + "/" + ingestOptions.getNodesBasePath(); job.setMapperClass(OSMNodeMapper.class); break; } case "WAY": { configureSchema(Way.getClassSchema()); inputAvroFile = ingestOptions.getNameNode() + "/" + ingestOptions.getWaysBasePath(); job.setMapperClass(OSMWayMapper.class); break; } case "RELATION": { configureSchema(Relation.getClassSchema()); inputAvroFile = ingestOptions.getNameNode() + "/" + ingestOptions.getRelationsBasePath(); job.setMapperClass(OSMRelationMapper.class); break; } default: break; } if (avroSchema == null || inputAvroFile == null) { throw new MissingArgumentException( "argument for mapper type must be one of: NODE, WAY, or RELATION"); } enableLocalityGroups(ingestOptions); // input format job.setInputFormatClass(AvroKeyInputFormat.class); FileInputFormat.setInputPaths( job, inputAvroFile); AvroJob.setInputKeySchema( job, avroSchema); // mappper job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setConnectorInfo( job, accumuloOptions.getUser(), new PasswordToken( accumuloOptions.getPassword())); AccumuloOutputFormat.setCreateTables( job, true); AccumuloOutputFormat.setDefaultTableName( job, ingestOptions.getQualifiedTableName()); AccumuloOutputFormat.setZooKeeperInstance( job, new ClientConfiguration().withInstance( accumuloOptions.getInstance()).withZkHosts( accumuloOptions.getZookeeper())); // reducer job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : -1; } }