/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ package com.gisgraphy.importer; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.hibernate.FlushMode; import org.hibernate.exception.ConstraintViolationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; import com.gisgraphy.addressparser.format.BasicAddressFormater; import com.gisgraphy.domain.geoloc.entity.AlternateName; import com.gisgraphy.domain.geoloc.entity.City; import com.gisgraphy.domain.geoloc.entity.GisFeature; import com.gisgraphy.domain.geoloc.entity.OpenStreetMap; import com.gisgraphy.domain.geoloc.entity.PostOffice; import com.gisgraphy.domain.geoloc.entity.ZipCode; import com.gisgraphy.domain.repository.ICityDao; import com.gisgraphy.domain.repository.IGisFeatureDao; import com.gisgraphy.domain.repository.IIdGenerator; import com.gisgraphy.domain.repository.ISolRSynchroniser; import com.gisgraphy.domain.valueobject.AlternateNameSource; import com.gisgraphy.domain.valueobject.GISSource; import com.gisgraphy.domain.valueobject.NameValueDTO; import com.gisgraphy.domain.valueobject.Output; import com.gisgraphy.domain.valueobject.Output.OutputStyle; import com.gisgraphy.fulltext.FullTextSearchEngine; import com.gisgraphy.helper.GeolocHelper; import com.gisgraphy.helper.StringHelper; import com.vividsolutions.jts.geom.Point; /** * Import the POI from an (pre-processed) openStreet map data file. * The goal of this importer is to cross information between geonames and Openstreetmap. * * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public class OpenStreetMapPoisSimpleImporter extends AbstractSimpleImporterProcessor { public static final int DISTANCE = 40000; protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapPoisSimpleImporter.class); public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT); private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); BasicAddressFormater formater = BasicAddressFormater.getInstance(); LabelGenerator labelGenerator = LabelGenerator.getInstance(); @Autowired protected IIdGenerator idGenerator; @Autowired protected IGisFeatureDao gisFeatureDao; @Autowired protected ISolRSynchroniser solRSynchroniser; OsmAmenityToPlacetype osmAmenityToPlacetype = new OsmAmenityToPlacetype(); @Autowired protected ICityDao cityDao; protected boolean shouldFillIsInField(){ return importerConfig.isOpenStreetMapFillIsIn(); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear() */ @Override protected void flushAndClear() { gisFeatureDao.flushAndClear(); } @Override protected void setup() { super.setup(); //temporary disable logging when importing FullTextSearchEngine.disableLogging=true; idGenerator.sync(); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles() */ @Override protected File[] getFiles() { return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapPoisDir()); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns() */ @Override protected int getNumberOfColumns() { return 7; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String) */ @Override protected void processData(String line) throws ImporterException { String[] fields = line.split("\t"); String amenityFields = null; // // Line table has the following fields : // --------------------------------------------------- //0 : Node type; 1 : id; 2 : name; 3 : countrycode;4 : alternatenames; //5 : location, 6 : amenity; // // checkNumberOfColumn(fields); //amenity if (!isEmptyField(fields, 6, true)) { amenityFields=fields[6].trim(); } List<GisFeature> pois = createAndpopulatePoi(fields,amenityFields); if (pois == null){ return; } try { for (GisFeature poi:pois){ gisFeatureDao.save(poi); } } catch (ConstraintViolationException e) { logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e); }catch (Exception e) { logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e); } } List<GisFeature> createAndpopulatePoi(String[] fields, String amenity) { String[] tags = splitTags(amenity); List<GisFeature> pois = osmAmenityToPlacetype.getObjectsFromTags(tags); for (GisFeature poi:pois){ poi.setSource(GISSource.OSM); //osmId if (!isEmptyField(fields, 1, true)) { String osmIdAsString =fields[1].trim(); Long osmId; try { osmId = Long.parseLong(osmIdAsString); poi.setOpenstreetmapId(osmId); } catch (NumberFormatException e) { logger.error("can not parse openstreetmap id "+osmIdAsString); return null; } } // name if (!isEmptyField(fields, 2, false)) { String name=fields[2].trim(); if (name.length()>=GisFeature.NAME_MAX_LENGTH){ logger.warn(name+ " is a too long"); return null; } if (name==null || "".equals(name.trim())|| "\"\"".equals(name.trim())){ poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name } poi.setName(name); }else { poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name } //countrycode if (!isEmptyField(fields, 3, true)) { String countryCode=fields[3].trim().toUpperCase(); poi.setCountryCode(countryCode); } //populate alternatenames if (!isEmptyField(fields, 4, false)) { String alternateNamesAsString=fields[4].trim(); populateAlternateNames(poi,alternateNamesAsString); } //location if (!isEmptyField(fields, 5, false)) { try { Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[5]); poi.setLocation(location); } catch (RuntimeException e) { logger.warn("can not parse location for "+fields[6]+" : "+e); return null; } } else { return null; } if (shouldFillIsInField()) { //we try to process is_in fields, because we want to fill adm and zip too setIsInFields(poi); } //featureId poi.setFeatureId(idGenerator.getNextFeatureId()); //labels poi.setLabel(labelGenerator.generateLabel(poi)); poi.setAlternateLabels(labelGenerator.generateLabels(poi)); poi.setFullyQualifiedName(labelGenerator.getFullyQualifiedName(poi)); } return pois; } protected String[] splitTags(String amenity) { String[] tags= new String[14]; String[] tagsvalues = amenity.split("___"); for (int j =0;j<tagsvalues.length;j++){ if (!"".equals(tagsvalues[j].trim())){ tags[j]=tagsvalues[j]; } } return tags; } private void setBestZip(GisFeature gisgeature) { //we set the zipcode as the best one if (gisgeature.getIsInZip()!=null && gisgeature.getIsInZip().size() >0 && gisgeature.getZipCode()==null){ gisgeature.setZipCode(labelGenerator.getBestZipString(gisgeature.getIsInZip())); } } protected void setIsInFields(GisFeature poi) { if (poi != null && poi.getLocation() != null) { //first searchByShape because it is the more reliable : City cityByShape = cityDao.getByShape(poi.getLocation(),poi.getCountryCode(),true); if (cityByShape != null){ poi.setIsIn(cityByShape.getName()); poi.setCityId(cityByShape.getId()); poi.setCityConfident(true); poi.setPopulation(cityByShape.getPopulation()); if (cityByShape.getZipCodes() != null) { for (ZipCode zip:cityByShape.getZipCodes()){ poi.addZip(zip.getCode()); } } if (cityByShape.getAlternateNames()!=null){ for (AlternateName name : cityByShape.getAlternateNames() ){ if (name!=null && name.getName()!=null){ poi.addIsInCitiesAlternateName(name.getName()); } } } if (cityByShape.getAdm()!=null){ poi.setIsInAdm(cityByShape.getAdm().getName()); } setBestZip(poi); return; } City city = getNearestCity(poi.getLocation(),poi.getCountryCode(), true); if (city != null) { poi.setPopulation(city.getPopulation()); poi.setIsInAdm(getDeeperAdmName(city)); if (city.getZipCodes() != null) { for (ZipCode zip:city.getZipCodes()){ if (zip != null && zip.getCode()!=null){ poi.addZip(zip.getCode()); } } } if (city.getName() != null && poi.getIsIn()==null) {//only if it has not be set by the openstreetmap is_in field //we can here have some concordance problem if the city found is not the one populate in the osm is_in fields. poi.setIsIn(pplxToPPL(city.getName())); } if (city.getAlternateNames()!=null){ for (AlternateName name : city.getAlternateNames() ){ if (name!=null && name.getName()!=null){ poi.addIsInCitiesAlternateName(name.getName()); } } } } City city2 = getNearestCity(poi.getLocation(),poi.getCountryCode(), false); if (city2 != null) { if (city != null){ if (city.getFeatureId() == city2.getFeatureId()) { setBestZip(poi); return; } if (city2.getLocation()!=null && city.getLocation()!=null && GeolocHelper.distance(poi.getLocation(),city2.getLocation())>GeolocHelper.distance(poi.getLocation(),city.getLocation())){ setBestZip(poi); return; } } //we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed if (city2.getPopulation() != null && city2.getPopulation() != 0 && (poi.getPopulation() == null || poi.getPopulation() == 0)) { poi.setPopulation(city2.getPopulation()); } if (poi.getIsIn() == null) { poi.setIsIn(pplxToPPL(city2.getName())); } else { poi.setIsInPlace(pplxToPPL(city2.getName())); } if (poi.getIsInAdm() == null) { poi.setIsInAdm(getDeeperAdmName(city2)); } if (city2.getZipCodes() != null ) {//we merge the zipcodes for is_in and is_in_place, so we don't check //if zipcodes are already filled for (ZipCode zip:city2.getZipCodes()){ if (zip!=null && zip.getCode()!=null){ poi.addZip(zip.getCode()); } } } if (city==null && city2!=null){//add AN only if there are not added yet if (city2.getAlternateNames()!=null){ for (AlternateName name : city2.getAlternateNames() ){ if (name!=null && name.getName()!=null){ poi.addIsInCitiesAlternateName(name.getName()); } } } } } setBestZip(poi); } } /** * tests if city is a paris district, if so it is probably a pplx that is newly considered as ppl http://forum.geonames.org/gforum/posts/list/2063.page */ protected String pplxToPPL(String cityName){ if (cityName!=null){ Matcher matcher = pattern.matcher(cityName); if (matcher.find()) { return matcher.group(1); } else { return cityName; } } else { return cityName; } } protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) { if (location ==null){ return null; } return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE); } protected String getDeeperAdmName(City city) { if (city != null) { if (city.getAdm5Name() != null) { return city.getAdm5Name(); } else if (city.getAdm4Name() != null) { return city.getAdm4Name(); } else if (city.getAdm3Name() != null) { return city.getAdm3Name(); } else if (city.getAdm2Name() != null) { return city.getAdm2Name(); } else if (city.getAdm1Name() != null) { return city.getAdm1Name(); } else { return null; } } else { return null; } } GisFeature populateAlternateNames(GisFeature poi, String alternateNamesAsString) { return ImporterHelper.populateAlternateNames(poi,alternateNamesAsString); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped() */ @Override public boolean shouldBeSkipped() { return !importerConfig.isOpenstreetmapImporterEnabled(); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode() */ @Override protected void setCommitFlushMode() { this.gisFeatureDao.setFlushMode(FlushMode.COMMIT); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments() */ @Override protected boolean shouldIgnoreComments() { return true; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine() */ @Override protected boolean shouldIgnoreFirstLine() { return false; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback() */ public List<NameValueDTO<Integer>> rollback() { List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>(); logger.info("reseting openstreetmap cities..."); //TODO only POI that have source openstreetmap deletedObjectInfo .add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0)); resetStatus(); return deletedObjectInfo; } @Override //TODO test protected void tearDown() { super.tearDown(); String savedMessage = this.statusMessage; /*try { FullTextSearchEngine.disableLogging=true; this.statusMessage = internationalisationService.getString("import.fulltext.optimize"); solRSynchroniser.optimize(); } finally { // we restore message in case of error this.statusMessage = savedMessage; }*/ } @Required public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) { this.solRSynchroniser = solRSynchroniser; } @Required public void setIdGenerator(IIdGenerator idGenerator) { this.idGenerator = idGenerator; } public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) { this.gisFeatureDao = gisFeatureDao; } @Required public void setCityDao(ICityDao cityDao) { this.cityDao = cityDao; } }