package me.osm.gazetter.join.out_handlers;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_ADDRESS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_ADDR_LEVEL;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_CENTER_POINT;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_FEATURE_ID;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_FULL_GEOMETRY;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_ID;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_NEARBY_PLACES;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_NEARBY_STREETS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_NEAREST_NEIGHBOUR;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_NEAREST_PLACE;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_POI_ADDR_MATCH;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_POI_CLASS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_POI_KEYWORDS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_POI_TYPE_NAMES;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_REFS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_TAGS;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_TIMESTAMP;
import static me.osm.gazetter.join.out_handlers.GazetteerSchemeConstants.GAZETTEER_SCHEME_TYPE;
import gnu.trove.impl.sync.TSynchronizedLongSet;
import gnu.trove.set.TLongSet;
import gnu.trove.set.hash.TLongHashSet;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import me.osm.gazetter.Options;
import me.osm.gazetter.addresses.AddressesUtils;
import me.osm.gazetter.join.util.ExportTagsStatisticCollector;
import me.osm.gazetter.out.AddrRowValueExctractorImpl;
import me.osm.gazetter.striper.FeatureTypes;
import me.osm.gazetter.striper.GeoJsonWriter;
import me.osm.gazetter.striper.JSONFeature;
import me.osm.gazetter.utils.FileUtils;
import me.osm.gazetter.utils.JSONHash;
import me.osm.gazetter.utils.LocatePoint;
import me.osm.osmdoc.localization.L10n;
import me.osm.osmdoc.model.Feature;
import me.osm.osmdoc.model.Tag.Val;
import me.osm.osmdoc.read.DOCFileReader;
import me.osm.osmdoc.read.DOCFolderReader;
import me.osm.osmdoc.read.DOCReader;
import me.osm.osmdoc.read.OSMDocFacade;
import me.osm.osmdoc.read.tagvalueparsers.LogTagsStatisticCollector;
import me.osm.osmdoc.read.tagvalueparsers.TagsStatisticCollector;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.code.externalsorting.ExternalSort;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.LineString;
/**
* Writes data as JSON, out-gazetteer out handler.
* */
public class GazetteerOutWriter extends AddressPerRowJOHBase {
private static final Logger log = LoggerFactory.getLogger(GazetteerOutWriter.class);
private static final String TRANSLATE_POI_TYPES_OPTION = "translate_poi_types";
private static final List<String> OPTIONS = Arrays.asList(
"out", "local_admin", "locality",
"neighborhood", "poi_catalog",
TRANSLATE_POI_TYPES_OPTION,
"fill_addresses", "export_all_names",
"full_geometry",
"usage", "tag-stat",
"sort", "isort");
private TagsStatisticCollector tagStatistics;
/**
* Name, used to call this handler from command line.
*
* @see Options#getPredefinedOutHandlers()
* */
public static final String NAME = "out-gazetteer";
private OSMDocFacade osmDocFacade = null;
private List<String> localAdminKeys;
private List<String> localityKeys;
private List<String> neighborhoodKeys;
private boolean exportAllNames = false;
private boolean translatePOITypes = false;
private boolean fullGeometry = true;
private DOCReader reader = null;
private Set<String> fillAddrOpts = null;
private String tagStatPath;
private String outFile;
private OutGazetteerSort sort;
private boolean isort;
private AtomicInteger poipntc = new AtomicInteger();
private AtomicInteger adrpntc = new AtomicInteger();
private AtomicInteger plcpntc = new AtomicInteger();
private AtomicInteger admbndc = new AtomicInteger();
private AtomicInteger plcbndc = new AtomicInteger();
private AtomicInteger hghwayc = new AtomicInteger();
private AtomicInteger hghnetc = new AtomicInteger();
private TLongSet plcbndIdSet;
private TLongSet hghwayIdSet;
private PrintWriter hghnetWriter;
private File hghNetFile;
@Override
public JoinOutHandler initialize(HandlerOptions parsedOpts) {
if(parsedOpts.has("usage")) {
printUsage();
System.exit(0);
}
super.initialize(parsedOpts);
translatePOITypes = parsedOpts.getFlag(TRANSLATE_POI_TYPES_OPTION, true, false);
fillAddrOpts = new HashSet<String>(parsedOpts.getList("fill_addresses",
Arrays.asList("ref", "levels", "nearest", "obj", "trans", "alt_names")));
exportAllNames = parsedOpts.getFlag("export_all_names", true, false);
fullGeometry = parsedOpts.getFlag("full_geometry", true, true);
localAdminKeys = parsedOpts.getList("local_admin", Arrays.asList("boundary:6"));
localityKeys = parsedOpts.getList("locality",
Arrays.asList("place:city", "place:town", "place:village", "place:hamlet", "boundary:8"));
neighborhoodKeys = parsedOpts.getList("locality",
Arrays.asList("place:town", "place:village", "place:hamlet", "place:neighbour", "boundary:9", "boundary:10"));
String poiCatalogPath = parsedOpts.getString("poi_catalog", "jar");
if ("no".equals(poiCatalogPath)) {
log.info("No osm-doc, skipping poi processing");
}
else {
if(poiCatalogPath.endsWith(".xml") || poiCatalogPath.equals("jar")) {
reader = new DOCFileReader(poiCatalogPath);
}
else {
reader = new DOCFolderReader(poiCatalogPath);
}
osmDocFacade = new OSMDocFacade(reader, null);
}
if(parsedOpts.has(null)) {
initializeWriter(parsedOpts.getString(null, null));
}
else {
initializeWriter(parsedOpts.getString("out", null));
}
tagStatPath = parsedOpts.getString("tag-stat", null);
if(tagStatPath == null) {
tagStatistics = new LogTagsStatisticCollector();
}
else {
tagStatistics = new ExportTagsStatisticCollector();
}
sort = OutGazetteerSort.valueOf(StringUtils.upperCase(
parsedOpts.getString("sort", OutGazetteerSort.UNIQUE.name())));
isort = parsedOpts.getFlag("isort", true, false);
if(sort == OutGazetteerSort.UNIQUE) {
plcbndIdSet = new TSynchronizedLongSet(new TLongHashSet());
hghwayIdSet = new TSynchronizedLongSet(new TLongHashSet());
}
return this;
}
@Override
protected Collection<String> getHandlerArguments(
Collection<String> defOptions) {
defOptions.addAll(OPTIONS);
return defOptions;
}
protected void printUsage() {
StringBuilder usage = new StringBuilder();
usage.append("Usage: join --handlers ").append(NAME).append("[out_file]");
int i = 0;
for(String opt : OPTIONS) {
usage.append(" ").append("[").append(opt).append("[=<val>]]");
if(i%3 == 0 && i > 0) {
usage.append("\n\t");
}
i++;
}
usage.append("\n");
usage.append("\n");
usage.append("\tout=<file | - > - File where to store results. Use - for stdout. ")
.append("Files with .gz or .bz2 extensions will be compressed.");
usage.append("\n");
usage.append("\n");
usage.append("\tlocal_admin=boundary:1 boundary:2 place:town - Boundary levels, which will be matched as local_admin addr level.");
usage.append("\n");
usage.append("\n");
usage.append("\tlocality=... Same as local_admin. Boundary levels, which will be matched as locality addr level.");
usage.append("\n");
usage.append("\n");
usage.append("\tneighborhood=... Same as local_admin. Boundary levels, which will be matched as neighborhood addr level.");
usage.append("\n");
usage.append("\n");
usage.append("\tpoi_catalog=<jar | no | path> Path to file .xml or folder with osm-doc poi classificator.")
.append("jar - default value, (use embedded catalog), no - skip poi classificator processing and poi readig.");
usage.append("\n");
usage.append("\n");
usage.append("\ttranslate_poi_types [=true|false] Export or not poi types translations. False if missed.");
usage.append("\n");
usage.append("\n");
usage.append("\texport_all_names [=true|false] Export or not all object tags with 'name' in key name. False if missed.");
usage.append("\n");
usage.append("\n");
usage.append("\tfull_geometry [=true|false] Export objects' full geometry. True if missed.");
usage.append("\n");
usage.append("\n");
usage.append("\tfill_addresses=[ref] [levels] [nearest] [obj] [trans] [alt_names] Which parts of addresses to fill.");
usage.append("\n");
usage.append("\t\tref - add ref object with ids of mathced objects for each addr level as ref.level");
usage.append("\n");
usage.append("\t\tlevels - Match addresses to levels or not.");
usage.append("\n");
usage.append("\t\tnearest - Export nearest and nearby places neighbours and streets.");
usage.append("\n");
usage.append("\t\tobj - Export original address row object.");
usage.append("\n");
usage.append("\t\ttrans - Translate or not names for addr parts.");
usage.append("\n");
usage.append("\t\talt_names - Export addr parts alternative names.");
usage.append("\n");
usage.append("\t\tsort - Type of sort [NONE (Skip sorting) | ID (Sort by id type-ghash-osmid) | "
+ "HIERARCHICAL (Sort with dependencies) | UNIQUE (Do not sort results, but skip duplicates) ].");
usage.append("\t\tisort - Inverse order");
usage.append("\n");
usage.append("\n");
usage.append("\tusage Print this message and exit.");
usage.append("\n");
System.out.println(usage.toString());
}
@Override
protected void handlePoiPointAddrRow(JSONObject object, JSONObject address,
String stripe) {
// skip pois with empty addresses
if(address == null) {
return;
}
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
fillPOI(result, object, address.getString("poiAddrMatch"));
println(result.toString());
flush();
poipntc.getAndIncrement();
}
}
@Override
protected void handleAddrNodeAddrRow(JSONObject object, JSONObject address,
String stripe) {
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)){
println(result.toString());
flush();
adrpntc.getAndIncrement();
}
}
@Override
protected void handleHighwayAddrRow(JSONObject object, JSONObject address,
String stripe) {
if(hghwayIdSet != null) {
String ftype = object.getString("ftype");
String rowId = AddrRowValueExctractorImpl.getUID(object, address, ftype);
long longHash = ByteBuffer.wrap(DigestUtils.md5(rowId)).getLong();
if(!hghwayIdSet.add(longHash)) {
return;
}
}
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
println(result.toString());
flush();
hghwayc.getAndIncrement();
}
}
@Override
protected synchronized void handleHighwayNetAddrRow(JSONObject object, JSONObject address,
String stripe) {
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
getHgNetWriter().println(object.getString("id") + "\t" + result.toString());
}
}
private PrintWriter getHgNetWriter() {
try {
if(this.hghnetWriter == null) {
this.hghNetFile = new File("hghnets.tmp.gz");
// close in allDone
this.hghnetWriter = FileUtils.getPrintWriter(hghNetFile, false);
}
}
catch (Exception e) {
throw new RuntimeException("Can't create file for highways networks");
}
return this.hghnetWriter;
}
@Override
protected void handlePlaceBoundaryAddrRow(JSONObject object, JSONObject address,
String stripe) {
if(address == null) {
return;
}
if(plcbndIdSet != null) {
String ftype = object.getString("ftype");
String rowId = AddrRowValueExctractorImpl.getUID(object, address, ftype);
long longHash = ByteBuffer.wrap(DigestUtils.md5(rowId)).getLong();
if(!plcbndIdSet.add(longHash)) {
return;
}
}
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
println(result.toString());
flush();
plcbndc.getAndIncrement();
}
}
@Override
protected void handlePlacePointAddrRow(JSONObject object,
JSONObject address, String stripe) {
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
println(result.toString());
flush();
plcpntc.getAndIncrement();
}
}
@Override
protected void handleAdminBoundaryAddrRow(JSONObject object,
JSONObject address, String stripe) {
if(StringUtils.contains(stripe, "binx")) {
JSONFeature result = new JSONFeature();
if(fillObject(result, address, object)) {
println(result.toString());
flush();
admbndc.getAndIncrement();
}
}
}
/**
* Fill fields, common for all kind of objects
* */
protected boolean fillObject(JSONFeature result, JSONObject addrRow, JSONObject jsonObject) {
try {
String ftype = jsonObject.getString("ftype");
String rowId = AddrRowValueExctractorImpl.getUID(jsonObject, addrRow, ftype);
result.put(GAZETTEER_SCHEME_ID, rowId);
result.put(GAZETTEER_SCHEME_FEATURE_ID, jsonObject.getString("id"));
result.put(GAZETTEER_SCHEME_TYPE, ftype);
result.put(GAZETTEER_SCHEME_TIMESTAMP, jsonObject.getString("timestamp"));
if(fillAddrOpts.contains("obj")) {
result.put(GAZETTEER_SCHEME_ADDRESS, addrRow);
}
Set<String> langs = getLangs(addrRow);
Map<String, JSONObject> mapLevels = mapLevels(addrRow);
putName(result, ftype, mapLevels, jsonObject, addrRow);
putAltNames(result, ftype, mapLevels, jsonObject, addrRow);
putNameTranslations(result, ftype, mapLevels, jsonObject, addrRow, langs);
if(exportAllNames) {
result.put("all_names", new JSONObject(AddressesUtils.filterNameTags(jsonObject)));
}
if(fillAddrOpts.contains("nearest")) {
putNearbyStreets(result, ftype, mapLevels, jsonObject, langs);
putNearbyPlaces(result, ftype, mapLevels, jsonObject, langs);
}
JSONObject refs = new JSONObject();
if(fillAddrOpts.contains("levels") || fillAddrOpts.contains("ref")) {
String minLVL = putAddrParts(result, refs, addrRow, mapLevels, langs);
if(fillAddrOpts.contains("ref")) {
result.put(GAZETTEER_SCHEME_REFS, refs);
}
if(minLVL != null) {
result.put(GAZETTEER_SCHEME_ADDR_LEVEL, minLVL);
}
}
JSONObject properties = jsonObject.optJSONObject("properties");
if(properties != null) {
result.put(GAZETTEER_SCHEME_TAGS, properties);
}
JSONObject centroid = getCentroid(jsonObject, ftype);
if(centroid != null) {
result.put(GAZETTEER_SCHEME_CENTER_POINT, centroid);
}
if(FeatureTypes.HIGHWAY_NET_FEATURE_TYPE.equals(ftype)) {
result.put("members", jsonObject.get("members"));
result.put("geometries", jsonObject.get("geometries"));
}
if(fullGeometry) {
JSONObject geom = getFullGeometry(jsonObject, ftype);
if(geom != null) {
Geometry g = GeoJsonWriter.parseGeometry(geom);
if(g != null && g.isValid()) {
if(geom != null) {
String esGeomType = geom.getString(GAZETTEER_SCHEME_TYPE).toLowerCase();
geom.put(GAZETTEER_SCHEME_TYPE, esGeomType);
}
result.put(GAZETTEER_SCHEME_FULL_GEOMETRY, geom);
}
}
}
result.put("hhash", getHierarchyHash(rowId, addrRow, mapLevels, refs));
String md5Hex = DigestUtils.md5Hex(JSONHash.asCanonicalString(result, new HashSet<String>(Arrays.asList("timestamp"))));
result.put("md5", md5Hex);
return true;
}
catch (Exception e) {
log.error("Can't write {}", result.optString(GAZETTEER_SCHEME_FEATURE_ID), e);
return false;
}
}
protected String getHierarchyHash(String rowId, JSONObject addrRow,
Map<String, JSONObject> mapLevels, JSONObject refs) {
List<String> result = new ArrayList<>();
result.add(refs.optString("admin0", null));
result.add(refs.optString("admin1", null));
result.add(refs.optString("admin2", null));
result.add(refs.optString("local_admin", null));
result.add(refs.optString("locality", null));
result.add(refs.optString("neighborhood", null));
String street = refs.optString("street", null);
if(street == null) {
street = addrRow.optString("street_name", null);
}
result.add(street);
result.removeAll(Collections.singleton(null));
if(result.isEmpty()) {
return null;
}
result.add(rowId);
return StringUtils.join(result, "/");
}
protected void fillPOI(JSONFeature result, JSONObject jsonObject,
String poiAddrMatch) {
if (osmDocFacade == null) {
// Skip poi processing if we don't have osm-doc
return;
}
JSONArray typesArray = jsonObject.getJSONArray("poiTypes");
JSONObject tags = jsonObject.getJSONObject("properties");
result.put(GAZETTEER_SCHEME_POI_CLASS, typesArray);
List<Feature> poiClassess = new ArrayList<Feature>();
for(int i = 0; i < typesArray.length(); i++) {
Feature poiClass = osmDocFacade.getFeature(typesArray.getString(i));
if(poiClass != null) {
poiClassess.add(poiClass);
}
}
if(poiClassess.isEmpty()) {
return;
}
if(translatePOITypes) {
JSONObject trans = new JSONObject();
for(Feature f : poiClassess) {
String className = f.getName();
JSONObject classNameT = new JSONObject();
String title = f.getTitle();
for(String sl : L10n.supported) {
classNameT.put(sl, L10n.tr(title, Locale.forLanguageTag(sl)));
}
trans.put(className, classNameT);
}
result.put(GAZETTEER_SCHEME_POI_TYPE_NAMES, trans);
}
fillPoiAddrRefs(result, jsonObject, poiAddrMatch);
Map<String, List<Val>> moreTagsVals = new HashMap<String, List<Val>>();
JSONObject moreTags = osmDocFacade.parseMoreTags(poiClassess, tags,
tagStatistics, moreTagsVals);
result.put("more_tags", moreTags);
LinkedHashSet<String> keywords = new LinkedHashSet<String>();
osmDocFacade.collectKeywords(poiClassess, moreTagsVals, keywords, null);
result.put(GAZETTEER_SCHEME_POI_KEYWORDS, new JSONArray(keywords));
}
protected void fillPoiAddrRefs(JSONFeature result, JSONObject jsonObject,
String poiAddrMatch) {
JSONArray poiAddrRefs = new JSONArray();
if(poiAddrMatch != null) {
result.put(GAZETTEER_SCHEME_POI_ADDR_MATCH, poiAddrMatch);
if(!"boundaries".equals(poiAddrMatch)) {
Object matchedAddresses = jsonObject.getJSONObject("joinedAddresses").opt(poiAddrMatch);
if(matchedAddresses instanceof JSONObject) {
poiAddrRefs.put(((JSONObject)matchedAddresses).getString("id"));
}
else if(matchedAddresses instanceof JSONArray) {
JSONArray maa = (JSONArray) matchedAddresses;
for(int i=0; i<maa.length();i++) {
poiAddrRefs.put(maa.getJSONObject(i).getString("id"));
}
}
}
result.getJSONObject("refs").put("poi_addresses", poiAddrRefs);
}
}
protected JSONObject getCentroid(JSONObject jsonObject, String ftype) {
JSONObject result = new JSONObject();
if(FeatureTypes.HIGHWAY_FEATURE_TYPE.equals(ftype)) {
LineString ls = GeoJsonWriter.getLineStringGeometry(
jsonObject.getJSONObject(GeoJsonWriter.GEOMETRY)
.getJSONArray(GeoJsonWriter.COORDINATES));
Coordinate c = new LocatePoint(ls, ls.getLength() * 0.5).getPoint();
if(Double.isInfinite(c.x) ||
Double.isInfinite(c.y))
{
return null;
}
result.put("lon", c.x);
result.put("lat", c.y);
}
else if (jsonObject.has("center_point")) {
return jsonObject.getJSONObject("center_point");
}
else{
JSONArray coords = jsonObject.getJSONObject(GeoJsonWriter.GEOMETRY)
.getJSONArray(GeoJsonWriter.COORDINATES);
double lon = coords.getDouble(0);
double lat = coords.getDouble(1);
if(Double.isInfinite(lon) || Double.isNaN(lon) ||
Double.isInfinite(lat) || Double.isNaN(lat))
{
return null;
}
result.put("lon", lon);
result.put("lat", lat);
}
return result;
}
protected JSONObject getFullGeometry(JSONObject jsonObject, String ftype) {
JSONObject fullGeometry = null;
if(FeatureTypes.PLACE_POINT_FTYPE.equals(ftype)) {
JSONObject matchedBoundary = jsonObject.optJSONObject("matchedBoundary");
if(matchedBoundary != null) {
fullGeometry = matchedBoundary.getJSONObject(GeoJsonWriter.META).optJSONObject(GeoJsonWriter.FULL_GEOMETRY);
}
}
else {
JSONObject meta = jsonObject.getJSONObject(GeoJsonWriter.META);
fullGeometry = meta.optJSONObject("fullGeometry");
}
return fullGeometry;
}
protected String putAddrParts(JSONObject result, JSONObject refs,
JSONObject addrRow, Map<String, JSONObject> mapLevels,
Set<String> langs) {
String minLvl = null;
if(mapLevels == null) {
return null;
}
JSONObject admin0 = mapLevels.get("boundary:2");
putAddrLevel(result, refs, langs, admin0, "admin0");
if(admin0 != null) {
minLvl = "admin0";
}
JSONObject admin1 = mapLevels.get("boundary:3");
putAddrLevel(result, refs, langs, admin1, "admin1");
if(admin1 != null) {
minLvl = "admin1";
}
JSONObject admin2 = mapLevels.get("boundary:4");
putAddrLevel(result, refs, langs, admin2, "admin2");
if(admin2 != null) {
minLvl = "admin2";
}
JSONObject local_admin = getNotNull(mapLevels, localAdminKeys, null);
putAddrLevel(result, refs, langs, local_admin, "local_admin");
if(local_admin != null) {
minLvl = "local_admin";
}
JSONObject locality = getNotNull(mapLevels, localityKeys, local_admin);
putAddrLevel(result, refs, langs, locality, "locality");
if(locality != null) {
minLvl = "locality";
}
JSONObject neighborhood = getNotNull(mapLevels, neighborhoodKeys, locality);
putAddrLevel(result, refs, langs, neighborhood, "neighborhood");
if(neighborhood != null) {
minLvl = "neighborhood";
}
JSONObject street = mapLevels.get("street");
putAddrLevel(result, refs, langs, street, "street");
if(street != null) {
minLvl = "street";
}
JSONObject hn = mapLevels.get("hn");
if(hn != null && fillAddrOpts.contains("levels")) {
result.put("housenumber", hn.optString("name"));
}
if(hn != null) {
minLvl = "housenumber";
}
return minLvl;
}
protected JSONObject getNotNull(Map<String, JSONObject> mapLevels,
List<String> levels, JSONObject upper) {
for(String lvl : levels) {
if(mapLevels.get(lvl) != null) {
if(upper != null) {
String upperId = getAddrPartId(upper);
String thisId = getAddrPartId(mapLevels.get(lvl));
if(!upperId.equals(thisId)) {
return mapLevels.get(lvl);
}
}
else {
return mapLevels.get(lvl);
}
}
}
return null;
}
protected String getAddrPartId(JSONObject upper) {
String upperId = upper.optString("lnk");
if(StringUtils.stripToNull(upperId) == null) {
upperId = upper.optString("name");
}
return upperId;
}
protected void putAddrLevel(JSONObject result, JSONObject refs,
Set<String> langs, JSONObject admin0, String key) {
if(admin0 != null && fillAddrOpts.contains("levels")) {
String name = admin0.optString("name");
if(StringUtils.isNotBlank(name)) {
result.put(key + "_name", name);
JSONObject namesHash = admin0.optJSONObject("names");
Map<String, String> names = AddressesUtils.filterNameTags(namesHash);
names.remove("name");
filterNamesByLangs(names, langs);
if(!names.isEmpty() && fillAddrOpts.contains("alt_names")) {
result.put(key + "_alternate_names", new JSONArray(names.values()));
}
JSONObject translations = AddressesUtils.getNamesTranslations(namesHash, langs);
if(translations != null && translations.length() > 0 && fillAddrOpts.contains("trans")) {
result.put(key + "_name_trans", translations);
}
}
String lnk = admin0.optString("lnk");
if(StringUtils.isNotEmpty(lnk)) {
refs.put(key, lnk);
}
}
}
protected void filterNamesByLangs(Map<String, String> names, Set<String> langs) {
if(names != null) {
Iterator<Entry<String, String>> iterator = names.entrySet().iterator();
while (iterator.hasNext()) {
Entry<String, String> entry = iterator.next();
String key = entry.getKey();
if(key.contains(":")) {
String[] split = StringUtils.split(key, ':');
for(String s : split) {
if(langs.contains(s)) {
continue;
}
}
iterator.remove();
}
}
}
}
protected Set<String> getLangs(JSONObject addrRow) {
Set<String> langsSet = new HashSet<String>();
if(addrRow != null) {
JSONArray langs = addrRow.optJSONArray("langs");
if(langs != null && langs.length() > 0) {
for(int i = 0; i < langs.length(); i++) {
String lang = langs.optString(i);
if(StringUtils.isNotBlank(lang)) {
langsSet.add(lang);
}
}
}
}
return langsSet;
}
protected void putNearbyPlaces(JSONObject result, String ftype,
Map<String, JSONObject> mapLevels, JSONObject jsonObject, Set<String> langs) {
if(jsonObject.has("nearestCity")) {
JSONObject nearestCitySRC = jsonObject.getJSONObject("nearestCity");
String placeString = nearestCitySRC.getJSONObject("properties").optString("place");
if(StringUtils.isNotBlank(placeString)) {
JSONObject place = asIdNameNames(nearestCitySRC, langs);
if(place != null) {
place.put("place", placeString);
if(place.has(GAZETTEER_SCHEME_ID)) {
place.put(GAZETTEER_SCHEME_ID, StringUtils.replace(place.getString(GAZETTEER_SCHEME_ID),
FeatureTypes.PLACE_DELONEY_FTYPE, FeatureTypes.PLACE_POINT_FTYPE));
}
result.put(GAZETTEER_SCHEME_NEAREST_PLACE, place);
}
}
}
if(jsonObject.has("nearestNeighbour")) {
JSONObject nearestCitySRC = jsonObject.getJSONObject("nearestNeighbour");
String placeString = nearestCitySRC.getJSONObject("properties").optString("place");
if(StringUtils.isNotBlank(placeString)) {
JSONObject place = asIdNameNames(nearestCitySRC, langs);
if(place != null) {
place.put("place", placeString);
if(place.has(GAZETTEER_SCHEME_ID)) {
place.put(GAZETTEER_SCHEME_ID, StringUtils.replace(place.getString(GAZETTEER_SCHEME_ID),
FeatureTypes.PLACE_DELONEY_FTYPE, FeatureTypes.PLACE_POINT_FTYPE));
}
result.put(GAZETTEER_SCHEME_NEAREST_NEIGHBOUR, place);
}
}
}
if(jsonObject.has("neighbourCities")) {
List<JSONObject> list = new ArrayList<JSONObject>();
JSONArray jsonArray = jsonObject.getJSONArray("neighbourCities");
for(int i = 0; i < jsonArray.length(); i++) {
JSONObject placeSRC = jsonArray.getJSONObject(i);
String placeString = placeSRC.getJSONObject("properties").optString("place");
JSONObject place = asIdNameNames(placeSRC, langs);
if(place != null) {
place.put("place", placeString);
if(place.has(GAZETTEER_SCHEME_ID)) {
place.put(GAZETTEER_SCHEME_ID, StringUtils.replace(place.getString(GAZETTEER_SCHEME_ID),
FeatureTypes.PLACE_DELONEY_FTYPE, FeatureTypes.PLACE_POINT_FTYPE));
}
list.add(place);
}
}
result.put(GAZETTEER_SCHEME_NEARBY_PLACES, new JSONArray(list));
}
}
protected void putNearbyStreets(JSONObject result, String ftype,
Map<String, JSONObject> mapLevels, JSONObject jsonObject, Set<String> langs) {
if(jsonObject.has("nearbyStreets")) {
JSONArray streetsSRC = jsonObject.getJSONArray("nearbyStreets");
if(streetsSRC.length() > 0) {
JSONArray streets = new JSONArray();
for(int i = 0; i < streetsSRC.length(); i++) {
JSONObject streetSRC = streetsSRC.getJSONObject(i);
JSONObject street = asIdNameNames(streetSRC, langs);
if(street != null) {
JSONObject properties = streetSRC.optJSONObject("properties");
street.put("highway", properties.optString("highway"));
streets.put(street);
}
}
result.put(GAZETTEER_SCHEME_NEARBY_STREETS, streets);
}
}
}
protected JSONObject asIdNameNames(JSONObject src, Set<String> langs) {
JSONObject result = new JSONObject();
result.put(GAZETTEER_SCHEME_ID, src.getString("id"));
JSONObject properties = src.optJSONObject("properties");
Map<String, String> nameTags = AddressesUtils.filterNameTags(properties);
if(nameTags.containsKey("name")) {
result.put("name", nameTags.get("name"));
nameTags.remove("name");
if(!nameTags.isEmpty() && fillAddrOpts.contains("alt_names")) {
result.put("alt_names", new JSONArray(nameTags.values()));
}
JSONObject translations = AddressesUtils.getNamesTranslations(properties, langs);
if(translations != null && translations.length() > 0 && fillAddrOpts.contains("trans")) {
result.put("name_trans", translations);
}
return result;
}
return null;
}
protected void putNameTranslations(JSONObject result, String ftype,
Map<String, JSONObject> mapLevels, JSONObject jsonObject, JSONObject addrRow, Set<String> langs) {
if(!FeatureTypes.ADDR_POINT_FTYPE.equals(ftype)) {
JSONObject properties = jsonObject.optJSONObject("properties");
JSONObject translations = AddressesUtils.getNamesTranslations(properties, langs);
if(translations != null && translations.length() > 0) {
result.put("name_trans", translations);
}
}
}
protected void putAltNames(JSONObject result, String ftype,
Map<String, JSONObject> mapLevels, JSONObject jsonObject, JSONObject addrRow) {
if(!FeatureTypes.ADDR_POINT_FTYPE.equals(ftype)) {
JSONObject properties = jsonObject.optJSONObject("properties");
Map<String, String> altNames = AddressesUtils.filterNameTags(properties);
altNames.remove("name");
if(!altNames.isEmpty()) {
result.put("alt_names", new JSONArray(altNames.values()));
}
}
}
protected void putName(JSONObject result, String ftype,
Map<String, JSONObject> mapLevels, JSONObject jsonObject, JSONObject addrRow) {
JSONObject properties = jsonObject.optJSONObject("properties");
if(properties != null && properties.has("name")) {
result.put("name", properties.getString("name"));
}
}
protected Map<String, JSONObject> mapLevels(JSONObject addrRow) {
try {
if(addrRow == null) {
return null;
}
Map<String, JSONObject> result = new HashMap<String, JSONObject>();
JSONArray parts = addrRow.getJSONArray("parts");
for(int i = 0; i < parts.length(); i++) {
JSONObject part = parts.getJSONObject(i);
result.put(part.getString("lvl"), part);
}
return result;
}
catch (JSONException e) {
return null;
}
}
@Override
protected void initializeWriter(String file) {
if(file == null) {
System.out.println("There is no out file");
printUsage();
System.exit(1);
}
this.outFile = file;
super.initializeWriter(this.outFile);
}
@Override
public void allDone() {
if(this.hghnetWriter != null) {
this.hghnetWriter.flush();
this.hghnetWriter.close();
log.info("Merge higway networks");
// it writes into def. writer, so do it before
// super.allDone() call which will close writer
mergeAndSortHghnets();
// cleanup
this.hghNetFile.delete();
log.info("Done merge highway networks");
}
// Flush and close out writer
super.allDone();
sortResults();
writeTagStat();
log.info("Wrote poi points: {}", poipntc.get());
log.info("Wrote address points: {}", adrpntc.get());
log.info("Wrote highway segments: {}", hghwayc.get());
log.info("Wrote highway networks: {}", hghnetc.get());
log.info("Wrote place boundaries: {}", plcbndc.get());
log.info("Wrote place points: {}", plcpntc.get());
log.info("Wrote admin boundaries: {}", admbndc.get());
}
private void sortResults() {
if(OutGazetteerSort.NONE != sort && OutGazetteerSort.UNIQUE != sort) {
log.info("Sorting results. Sort: {}, inverse order: {}", sort, isort);
try {
File file = new File(this.outFile);
BufferedReader fbr = new BufferedReader(new InputStreamReader(FileUtils.getFileIS(file)));
Comparator<String> cmp = OutGazetteerSort.HIERARCHICAL == sort
? new JSONHComparator(isort) : new JSONByIdComparator(isort);
List<File> batch = ExternalSort.sortInBatch(fbr, file.length(), cmp,
ExternalSort.DEFAULTMAXTEMPFILES, ExternalSort.estimateAvailableMemory(),
Charset.forName("utf-8"), null, true, 0, true);
log.trace("Done ExternalSort.sortInBatch");
initializeWriter(outFile);
ExternalSort.mergeSortedFiles(batch, new BufferedWriter(writer),
cmp, Charset.forName("utf-8"), true, true);
log.trace("Done ExternalSort.mergeSortedFiles");
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
else {
log.info("Skip sort.");
}
}
private void mergeAndSortHghnets() {
try {
BufferedReader fbr = new BufferedReader(new InputStreamReader(FileUtils.getFileIS(this.hghNetFile)));
Comparator<String> cmp = OutGazetteerSort.HIERARCHICAL == sort
? new JSONHComparator(isort) : new JSONByIdComparator(isort);
List<File> batch = ExternalSort.sortInBatch(
fbr, this.hghNetFile.length(),
ExternalSort.defaultcomparator,
ExternalSort.DEFAULTMAXTEMPFILES,
ExternalSort.estimateAvailableMemory(),
Charset.forName("utf-8"), null,
false, 0, true);
//new HgnetMergerFakeWriter(this)
//BufferedWriter dbgWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("/home/dkiselev/hghnets.dbg"))));
ExternalSort.mergeSortedFiles(batch, new HgnetMergerFakeWriter(this),
cmp, Charset.forName("utf-8"), false, true);
//dbgWriter.close();
log.trace("Done ExternalSort.mergeSortedFiles");
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
private void writeTagStat() {
if(StringUtils.isNotBlank(tagStatPath)) {
log.info("Write tag usage statistics to {}", tagStatPath);
Collection<JSONObject> usage = ((ExportTagsStatisticCollector)tagStatistics).asJson();
try {
PrintWriter printwriter = FileUtils.getPrintWriter(new File(tagStatPath), false);
for(JSONObject jo : usage) {
printwriter.println(jo.toString());
}
printwriter.flush();
printwriter.close();
} catch (IOException e) {
throw new RuntimeException();
}
}
}
// default access
void writeMergedHGHNET(String string) {
hghnetc.getAndIncrement();
println(string);
}
}