/* * Data Hub Service (DHuS) - For Space data distribution. * Copyright (C) 2013,2014,2015 GAEL Systems * * This file is part of DHuS software sources. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package fr.gael.dhus.search.geocoder.impl; import com.vividsolutions.jts.geom.Geometry; import com.vividsolutions.jts.geom.Point; import com.vividsolutions.jts.io.ParseException; import com.vividsolutions.jts.io.WKTReader; import com.vividsolutions.jts.io.WKTWriter; import com.vividsolutions.jts.simplify.TopologyPreservingSimplifier; import fr.gael.dhus.database.object.config.search.GeocoderConfiguration; import fr.gael.dhus.database.object.config.search.NominatimConfiguration; import fr.gael.dhus.search.geocoder.Geocoder; import fr.gael.drb.DrbAttribute; import fr.gael.drb.DrbNode; import fr.gael.drb.DrbSequence; import fr.gael.drb.impl.xml.XmlDocument; import fr.gael.drb.query.Query; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.StringTokenizer; /** * A Geocoder implementation based on Nominatim Web Service, a service * provided and hosted by Open Street Map organization. * <p> * This geocoder can be parameterized through the * "geocoder.nominatim.boundingbox" system property. If absent or set to * "true" (non case sensitive), this geocoder will query only the bounding * box of the matching place from the Nominatim Web Service i.e. the four * corners encompassing the place. Any other value, a priori "false", will * make this geocoder querying the complete polygon boundaries from the * Nominatim Web Service. This latter option may have lower performance than * simple bounding box option depending on the number of vertices composing * the place's boundaries. * </p> */ public class NominatimGeocoder implements Geocoder { /** * A logger for this class. */ private static final Logger LOGGER = LogManager.getLogger(NominatimGeocoder.class); /** * Arbitrarily small degree * Following a uncontrolled behavior of the current use of Solr Spatial, * polygons matching a strict bounding box i.e. perfect rectangle, the * coordinates have to be arbitrarily shifted */ private static final double EPSILON_DEG = 0.0001; /** * URL string of the Nominatim Web Service. */ private String nominatimUrl = "http://nominatim.openstreetmap.org"; /** * System property toggling bounding box mode (rectangle) or full * boundaries mode. */ private boolean boundingBoxFlag=false; /** * The actual maximum number of points that can be returned in a WKT * geometry. The default is 50. */ private int maxPointNumber = 50; /** * Default constructor. * @param conf configuration for geocoders, may be null. */ public NominatimGeocoder(GeocoderConfiguration conf) { if (conf == null) { LOGGER.warn("Context not present: using default values"); return; } NominatimConfiguration n_conf = conf.getNominatimConfiguration(); init(conf.getUrl(), n_conf.getMaxPointNumber(), n_conf.isBoundingBox()); } /** * Outside any context, build nominatim with user defined settings. * @param nomi_url * @param max_point_number * @param bounding_box_flag */ public NominatimGeocoder (String nomi_url, Integer max_point_number, Boolean bounding_box_flag) { init (nomi_url, max_point_number, bounding_box_flag); } /** * Initialize nominatim settings without configuration manager * @param nomi_url * @param max_point_number * @param bounding_box_flag */ private void init (String nomi_url, Integer max_point_number, Boolean bounding_box_flag) { if ((nomi_url!=null) && !nomi_url.trim ().isEmpty ()) nominatimUrl = nomi_url.trim (); if (bounding_box_flag != null) boundingBoxFlag = bounding_box_flag; if (max_point_number!=null) maxPointNumber = max_point_number; } // End NominatimGeocoder() @Override public String getName() { return this.getClass().getName(); } /** * In this implementation, the first place with a "place_rank" attribute * strictly lower that 18 and with a "geotext" not starting by "POINT" * will be considered. This avoids locations limited to a point or most * of points of low interest e.g. ENVISAT scale model at ESTEC. * * @see <a href= * "http://wiki.openstreetmap.org/wiki/Nominatim/Development_overview" * >Nominatim - Development_overview</a> for more information about * place_rank levels. */ @Override public String getBoundariesWKT(final String address) { // Prepare search URL URL nominatim_search_url; try { nominatim_search_url = new URL(nominatimUrl + "/search?format=xml&polygon_text=1&q=" + address); } catch (MalformedURLException exception) { LOGGER.warn("Malformed Nominatim request URL", exception); return null; } // Get stream result from Nominatim service XmlDocument searchresults_document = null; // Open URL stream try (InputStream input_stream = nominatim_search_url.openStream()) { // Parse result string as an XML document searchresults_document = new XmlDocument(input_stream); } catch (Exception exception) { LOGGER.warn("Cannot get response from Nominatim service: " + nominatim_search_url, exception); return null; } return computeWKT (searchresults_document); } // End getBoundariesWKT(String) /** * Compute WKT format string from Nominatim XML place list. * If boundingBoxFlag property is set to true, the bounding box of the area * will be retained for the WKT area otherwise the exact footprint is kept. * @See {@link #getBoundariesWKT(String)} */ String computeWKT (XmlDocument document) { if ((document == null) || (document.getChildrenCount() <= 0)) { LOGGER.warn("Null or empty document"); return null; } // Query places from result document DrbSequence places_sequence = new Query("(*/place[xs:int(@place_rank) < 20]" + "[@geotext]" + "[fn:not(fn:matches(@geotext, 'POINT.*'))])[1]").evaluate( document); // Return immediately if no place has been found if ((places_sequence == null) || (places_sequence.getLength() <= 0)) { return null; } // Get place node DrbNode place_node = (DrbNode)places_sequence.getItem(0); // Case of bounding box request (depends on system property) if (boundingBoxFlag) { // Get bounding box attribute DrbAttribute boundingbox_value = place_node.getAttribute("boundingbox"); // Return immediately if no bounding box attribute has been found if (boundingbox_value == null) { LOGGER.warn("Returned place \"" + place_node.getAttribute("display_name") + "\" has no \"" + "boundingbox\" attribute"); return null; } // Extract latitude and longitude extents StringTokenizer boundingbox_tokenizer = new StringTokenizer(""+boundingbox_value.getValue (), ","); double min_lat, max_lat, min_lon, max_lon; try { min_lat = Double.parseDouble(boundingbox_tokenizer.nextToken()); max_lat = Double.parseDouble(boundingbox_tokenizer.nextToken()); min_lon = Double.parseDouble(boundingbox_tokenizer.nextToken()); max_lon = Double.parseDouble(boundingbox_tokenizer.nextToken()); } catch (Exception exception) { LOGGER.warn("Error while parsing bouding box"); return null; } // Return WKT polygon // The returned polygon is slightly randomized with an epsilon // added or subtracted to the coordinates in order to avoid // perfect rectangles that seems to behave incorrectly in the // current usage of Solr Spatial. It is not said that this comes // form Solr at the current level of understanding. return "POLYGON ((" + min_lon + " " + max_lat + ", " + max_lon + " " + (max_lat + EPSILON_DEG) + ", " + (max_lon + EPSILON_DEG) + " " + min_lat + ", " + (min_lon - EPSILON_DEG) + " " + (min_lat - EPSILON_DEG) + ", " + min_lon + " " + max_lat + "))"; } // Case of full polygon boundaries request else { // Get geotext attribute DrbAttribute geotext_attribute = place_node.getAttribute("geotext"); // Return immediately if no geotext attribute has been found if (geotext_attribute == null) { LOGGER.warn("Returned place \"" + place_node.getAttribute("display_name") + "\" has no \"" + "geotext\" attribute"); return null; } // Get "geotext" WKT entry String geotext = geotext_attribute.getValue().toString(); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Retrieved footprint:" + geotext); } // Return simplified WKT if not null if (geotext != null) { try { Geometry boundaries = new WKTReader().read(geotext); boundaries = simplifyGeometry(boundaries, this.maxPointNumber); geotext = new WKTWriter().write(boundaries); } catch (ParseException exception) { LOGGER.error("Error while parsing WKT: \"" + geotext + "\"", exception); return null; } return geotext; } // Return null if no entry was found and log as necessary if (LOGGER.isDebugEnabled()) { LOGGER.warn("No boundaries found"); } return null; } } /* computeWKT */ public void setUrl (String url) { this.nominatimUrl = url; } /** * Returns a simplified version of the input geometry with a number of * points reduced to a maximum provided in parameter. * * The simplification algorithm can be outlined as follow: * <ul> * <li>returns null if the input geometry is null;</li> * <li>returns the input geometry if the input one has already a number * of points lower or equal to the maximum allowed;</li> * <li></li> * </ul> * * @param input_geometry the geometry to be simplified. * @param max_output_points the maximum number of points of output * geometry. * * @return the simplified geometry or null if the input is null. */ private static Geometry simplifyGeometry(final Geometry input_geometry, final int max_output_points) { Geometry geometry = input_geometry; double cluster_factor = 0.2; int current_point_number = -1; int previous_point_number = -2; while ((current_point_number != previous_point_number) && (geometry.getNumPoints() > max_output_points)) { previous_point_number = current_point_number; current_point_number = geometry.getNumPoints(); geometry = simplifyGeometry(geometry, max_output_points, cluster_factor); cluster_factor += 0.05; } return geometry; } private static Geometry simplifyGeometry(final Geometry input_geometry, final int max_output_points, final double cluster_factor) { // Return null if the input geometry is null if (input_geometry == null) { return null; } // Return the input geometry if the number of points is already lower // or equal to the maximum allowed if (input_geometry.getNumPoints() <= max_output_points) { return input_geometry; } // Assign local geometry to refine Geometry geometry = input_geometry; if (geometry.getNumGeometries() > 1) { geometry = convexHullOneLevel(geometry).union(); geometry = clusterizeGeometry(geometry, cluster_factor); } int current_point_number = geometry.getNumPoints(); int previous_point_number = -1; int iteration_count = 0; double tolerance = 0.005; while ((current_point_number > max_output_points) && (iteration_count < 10)) { previous_point_number = current_point_number; current_point_number = geometry.getNumPoints(); if (current_point_number == previous_point_number) { iteration_count += 1; } else { iteration_count = 0; } geometry = TopologyPreservingSimplifier.simplify(geometry, tolerance); tolerance += 0.005; } return geometry; } private static Geometry convexHullOneLevel(final Geometry geometry) { if (geometry.getNumGeometries() > 1) { Geometry [] convex_hulls = new Geometry [geometry.getNumGeometries()]; for (int igeom=0; igeom<geometry.getNumGeometries(); igeom++) { convex_hulls[igeom] = geometry.getGeometryN(igeom).convexHull(); } return geometry.getFactory().createGeometryCollection(convex_hulls); } else { return geometry.convexHull(); } } private static Geometry clusterizeGeometry(final Geometry geometry, final double distance_ratio) { if (geometry == null) { return null; } int number_geometries = geometry.getNumGeometries(); if (number_geometries > 1) { Geometry [] clustered_geometries = new Geometry [number_geometries]; for (int igeom=0; igeom<number_geometries-1; igeom++) { Geometry current_geometry = geometry.getGeometryN(igeom); Point current_centroid = current_geometry.getCentroid(); if ((current_geometry == null) || (current_centroid == null)) { // TODO Warning continue; } ArrayList<Geometry> current_cluster = new ArrayList<Geometry>(); current_cluster.add(current_geometry); for (int jgeom=igeom+1; jgeom<number_geometries; jgeom++) { Geometry next_geometry = geometry.getGeometryN(jgeom); Point next_centroid = next_geometry.getCentroid(); if ((next_geometry == null) || (next_centroid == null)) { // TODO Warning continue; } double distance = current_geometry.distance(next_geometry); double centroids_distance = current_centroid.distance(next_centroid); if (distance < (centroids_distance * distance_ratio)) { current_cluster.add(next_geometry); } } Geometry [] current_cluster_array = new Geometry [current_cluster.size()]; clustered_geometries[igeom] = geometry.getFactory().createGeometryCollection( current_cluster.toArray(current_cluster_array)); } clustered_geometries[number_geometries-1] = geometry.getGeometryN(number_geometries-1); return convexHullOneLevel( geometry.getFactory().createGeometryCollection( clustered_geometries)).union(); } else { return geometry; } } } // End NominatimGeocoder class