package mil.nga.giat.geowave.cli.osm.parser; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.openstreetmap.osmosis.osmbinary.BinaryParser; import org.openstreetmap.osmosis.osmbinary.Osmformat; import org.openstreetmap.osmosis.osmbinary.file.BlockInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import mil.nga.giat.geowave.cli.osm.types.generated.MemberType; import mil.nga.giat.geowave.cli.osm.types.generated.Node; import mil.nga.giat.geowave.cli.osm.types.generated.Primitive; import mil.nga.giat.geowave.cli.osm.types.generated.Relation; import mil.nga.giat.geowave.cli.osm.types.generated.RelationMember; import mil.nga.giat.geowave.cli.osm.types.generated.Way; public class OsmPbfParser { private static Logger LOGGER = LoggerFactory.getLogger(OsmPbfParser.class); public Configuration stageData( OsmPbfParserOptions args ) throws IOException { final OsmPbfParserOptions arg = args; final Configuration conf = new Configuration(); conf.set( "fs.default.name", args.getNameNode()); conf.set( "fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); FileSystem fs = FileSystem.get(conf); Path basePath = new Path( arg.getHdfsBasePath()); if (!fs.exists(basePath)) { if (!fs.mkdirs(basePath)) { throw new IOException( "Unable to create staging directory: " + arg.getNameNode() + arg.getHdfsBasePath()); } } Path nodesPath = new Path( arg.getNodesBasePath()); Path waysPath = new Path( arg.getWaysBasePath()); Path relationsPath = new Path( arg.getRelationsBasePath()); final DataFileWriter nodeWriter = new DataFileWriter( new GenericDatumWriter()); final DataFileWriter wayWriter = new DataFileWriter( new GenericDatumWriter()); final DataFileWriter relationWriter = new DataFileWriter( new GenericDatumWriter()); nodeWriter.setCodec(CodecFactory.snappyCodec()); wayWriter.setCodec(CodecFactory.snappyCodec()); relationWriter.setCodec(CodecFactory.snappyCodec()); FSDataOutputStream nodeOut = null; FSDataOutputStream wayOut = null; FSDataOutputStream relationOut = null; final OsmAvroBinaryParser parser = new OsmAvroBinaryParser(); try { nodeOut = fs.create(nodesPath); wayOut = fs.create(waysPath); relationOut = fs.create(relationsPath); nodeWriter.create( Node.getClassSchema(), nodeOut); wayWriter.create( Way.getClassSchema(), wayOut); relationWriter.create( Relation.getClassSchema(), relationOut); parser.setupWriter( nodeWriter, wayWriter, relationWriter); Files.walkFileTree( Paths.get(args.getIngestDirectory()), new SimpleFileVisitor<java.nio.file.Path>() { @Override // I couldn't figure out how to get rid of the findbugs // issue. @SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE") public FileVisitResult visitFile( java.nio.file.Path file, BasicFileAttributes attrs ) throws IOException { if (file.getFileName().toString().endsWith( arg.getExtension())) { loadFileToHdfs( file, parser); } return FileVisitResult.CONTINUE; } }); } catch (IOException ex) { LOGGER.error( "Unable to crrate the FSDataOutputStream", ex); } finally { IOUtils.closeQuietly(nodeWriter); IOUtils.closeQuietly(wayWriter); IOUtils.closeQuietly(relationWriter); IOUtils.closeQuietly(nodeOut); IOUtils.closeQuietly(wayOut); IOUtils.closeQuietly(relationOut); fs.close(); } return conf; } private static void loadFileToHdfs( java.nio.file.Path file, OsmAvroBinaryParser parser ) { InputStream is = null; try { is = new FileInputStream( file.toFile()); new BlockInputStream( is, parser).process(); } catch (FileNotFoundException e) { LOGGER.error( "Unable to load file: " + file.toString(), e); } catch (IOException e1) { LOGGER.error( "Unable to process file: " + file.toString(), e1); } finally { IOUtils.closeQuietly(is); } } private static class OsmAvroBinaryParser extends BinaryParser { private static Logger LOGGER = LoggerFactory.getLogger(OsmAvroBinaryParser.class); private DataFileWriter nodeWriter = null; private DataFileWriter wayWriter = null; private DataFileWriter relationWriter = null; public void setupWriter( DataFileWriter nodeWriter, DataFileWriter wayWriter, DataFileWriter relationWriter ) { this.nodeWriter = nodeWriter; this.wayWriter = wayWriter; this.relationWriter = relationWriter; } @Override protected void parseRelations( List<Osmformat.Relation> rels ) { for (Osmformat.Relation r : rels) { Relation r2 = new Relation(); Primitive p = getPrimitive(r.getInfo()); p.setId(r.getId()); p.setTags(getTags( r.getKeysList(), r.getValsList())); r2.setCommon(p); List<RelationMember> members = new ArrayList<>( r.getRolesSidCount()); for (int i = 0; i < r.getRolesSidCount(); i++) { RelationMember rm = new RelationMember(); rm.setMember(r.getMemids(i)); rm.setRole(getStringById(r.getRolesSid(i))); switch (r.getTypes( i).toString()) { case "NODE": { rm.setMemberType(MemberType.NODE); break; } case "WAY": { rm.setMemberType(MemberType.WAY); break; } case "RELATION": { rm.setMemberType(MemberType.RELATION); break; } default: break; } } r2.setMembers(members); try { relationWriter.append(r2); } catch (IOException e) { LOGGER.error( "Unable to write relation", e); } } } @Override protected void parseDense( Osmformat.DenseNodes nodes ) { long lastId = 0; long lastLat = 0; long lastLon = 0; long lastTimestamp = 0; long lastChangeset = 0; int lastUid = 0; int lastSid = 0; int tagLocation = 0; for (int i = 0; i < nodes.getIdCount(); i++) { Node n = new Node(); Primitive p = new Primitive(); lastId += nodes.getId(i); lastLat += nodes.getLat(i); lastLon += nodes.getLon(i); p.setId(lastId); n.setLatitude(parseLat(lastLat)); n.setLongitude(parseLon(lastLon)); // Weird spec - keys and values are mashed sequentially, and end // of data for a particular node is denoted by a value of 0 if (nodes.getKeysValsCount() > 0) { Map<String, String> tags = new HashMap<>( nodes.getKeysValsCount()); while (nodes.getKeysVals(tagLocation) > 0) { String k = getStringById(nodes.getKeysVals(tagLocation)); tagLocation++; String v = getStringById(nodes.getKeysVals(tagLocation)); tagLocation++; tags.put( k, v); } p.setTags(tags); } if (nodes.hasDenseinfo()) { Osmformat.DenseInfo di = nodes.getDenseinfo(); lastTimestamp += di.getTimestamp(i); lastChangeset += di.getChangeset(i); lastUid += di.getUid(i); lastSid += di.getUserSid(i); p.setTimestamp(lastTimestamp); p.setChangesetId(lastChangeset); p.setUserId((long) lastUid); p.setUserName(getStringById(lastSid)); if (di.getVisibleCount() > 0) { p.setVisible(di.getVisible(i)); } } n.setCommon(p); try { nodeWriter.append(n); } catch (IOException e) { LOGGER.error( "Unable to write dense node", e); } } } @Override protected void parseNodes( List<Osmformat.Node> nodes ) { for (Osmformat.Node n : nodes) { Node n2 = new Node(); Primitive p = getPrimitive(n.getInfo()); p.setId(n.getId()); p.setTags(getTags( n.getKeysList(), n.getValsList())); n2.setCommon(p); n2.setLatitude(parseLat(n.getLat())); n2.setLongitude(parseLon(n.getLon())); try { nodeWriter.append(n2); } catch (IOException e) { LOGGER.error( "Unable to write node", e); } } } @Override protected void parseWays( List<Osmformat.Way> ways ) { for (Osmformat.Way w : ways) { Way w2 = new Way(); Primitive p = getPrimitive(w.getInfo()); p.setId(w.getId()); p.setTags(getTags( w.getKeysList(), w.getValsList())); w2.setCommon(p); long lastRef = 0; List<Long> nodes = new ArrayList<>( w.getRefsCount()); for (Long ref : w.getRefsList()) { lastRef += ref; nodes.add(lastRef); } w2.setNodes(nodes); try { wayWriter.append(w2); } catch (IOException e) { LOGGER.error( "Unable to write way", e); } } } @Override protected void parse( Osmformat.HeaderBlock header ) { } public void complete() { System.out.println("Complete!"); } private Map<String, String> getTags( List<Integer> k, List<Integer> v ) { Map<String, String> tags = new HashMap<String, String>( k.size()); for (int i = 0; i < k.size(); i++) { tags.put( getStringById(k.get(i)), getStringById(v.get(i))); } return tags; } private Primitive getPrimitive( Osmformat.Info info ) { Primitive p = new Primitive(); p.setVersion((long) info.getVersion()); p.setTimestamp(info.getTimestamp()); p.setUserId((long) info.getUid()); try { p.setUserName(getStringById(info.getUid())); } catch (Exception ex) { LOGGER.warn( "Error, input file doesn't contain a valid string table for user id: " + info.getUid(), ex); p.setUserName(String.valueOf(info.getUid())); } p.setChangesetId(info.getChangeset()); p.setVisible(info.getVisible()); return p; } } }