/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ package com.gisgraphy.importer; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.hibernate.FlushMode; import org.hibernate.exception.ConstraintViolationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; import com.gisgraphy.addressparser.format.BasicAddressFormater; import com.gisgraphy.domain.geoloc.entity.AlternateName; import com.gisgraphy.domain.geoloc.entity.City; import com.gisgraphy.domain.geoloc.entity.CitySubdivision; import com.gisgraphy.domain.geoloc.entity.GisFeature; import com.gisgraphy.domain.geoloc.entity.OpenStreetMap; import com.gisgraphy.domain.geoloc.entity.ZipCode; import com.gisgraphy.domain.repository.ICityDao; import com.gisgraphy.domain.repository.ICitySubdivisionDao; import com.gisgraphy.domain.repository.IIdGenerator; import com.gisgraphy.domain.repository.IOpenStreetMapDao; import com.gisgraphy.domain.repository.ISolRSynchroniser; import com.gisgraphy.domain.valueobject.GISSource; import com.gisgraphy.domain.valueobject.NameValueDTO; import com.gisgraphy.domain.valueobject.SpeedMode; import com.gisgraphy.fulltext.FullTextSearchEngine; import com.gisgraphy.geoloc.GeolocSearchEngine; import com.gisgraphy.helper.GeolocHelper; import com.gisgraphy.helper.StringHelper; import com.gisgraphy.street.StreetType; import com.gisgraphy.util.StringUtil; import com.vividsolutions.jts.geom.LineString; import com.vividsolutions.jts.geom.Point; /** * Import the street from an (pre-processed) openStreet map data file . * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public class OpenStreetMapSimpleImporter extends AbstractSimpleImporterProcessor { protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapSimpleImporter.class); public static final int DISTANCE = 40000; BasicAddressFormater formater = BasicAddressFormater.getInstance(); LabelGenerator labelGenerator = LabelGenerator.getInstance(); @Autowired protected IIdGenerator idGenerator; @Autowired protected IOpenStreetMapDao openStreetMapDao; @Autowired protected ISolRSynchroniser solRSynchroniser; @Autowired protected IMunicipalityDetector municipalityDetector; @Autowired protected ICityDao cityDao; @Autowired protected ICitySubdivisionDao citySubdivisionDao; private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); public static final Float SUBURB_MAX_DISTANCE = 5000f; /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear() */ @Override protected void flushAndClear() { openStreetMapDao.flushAndClear(); } @Override protected void setup() { super.setup(); //temporary disable logging when importing FullTextSearchEngine.disableLogging=true; GeolocSearchEngine.disableLogging=true; logger.info("reseting Openstreetmap generatedId"); idGenerator.sync(); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles() */ @Override protected File[] getFiles() { return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapDir()); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns() */ @Override protected int getNumberOfColumns() { return 10; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String) */ @Override protected void processData(String line) throws ImporterException { String[] fields = line.split("\t"); // // old Line table has the following fields : // --------------------------------------------------- //0: id; 1 name; 2 location; 3 length ;4 countrycode; 5 : gid ; //6 type; 7 oneway; 8 : shape; 9 : Alternate names // // new table has the following fields : // --------------------------------------------------- //0: id; 1: name; 2: location; 3: length ; 4: countrycode; 5 : is_in; 6: postcode; 7: is_in_adm; // 8: type; 9: oneway;10: shape; 11: max_speed; 12: lanes; 13: toll; 14: surface; 15 azimuth start ; 16 azimut end; 17 : street ref (http://wiki.openstreetmap.org/wiki/Key:ref) 18: alternatenames; // checkNumberOfColumn(fields); OpenStreetMap street = new OpenStreetMap(); street.setSource(GISSource.OSM); // set id if (!isEmptyField(fields, 0, false)) { Long openstreetmapId= null; try { openstreetmapId = new Long(fields[0].trim()); } catch (NumberFormatException e) { logger.warn("can not get openstreetmap id for "+fields[0]); } street.setOpenstreetmapId(openstreetmapId); } // set name if (!isEmptyField(fields, 1, false)) { street.setName(fields[1].trim()); StringHelper.updateOpenStreetMapEntityForIndexation(street); } //location if (!isEmptyField(fields, 2, false)) { try { Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[2]); street.setLocation(location); } catch (RuntimeException e) { logger.warn("can not parse location for "+fields[1]+" : "+e); return; } } if (!isEmptyField(fields, 3, false)) { Double length; try { length = new Double(fields[3].trim()); street.setLength(length); } catch (NumberFormatException e) { e.printStackTrace(); } } if (!isEmptyField(fields, 4, false)) { street.setCountryCode(fields[4].trim()); } //5 is_in see behind //6 zip if (!isEmptyField(fields, 6, false)) { populateZip(fields[6].trim(),street); } //7 is_in_adm for future use //bypass //8 streettype if (!isEmptyField(fields, 8, false)) { StreetType type; try { type = StreetType.valueOf(fields[8].toUpperCase()); street.setStreetType(type); } catch (Exception e) { logger.warn("can not determine streetType for "+fields[0]+"/"+fields[8]+" : "+e); street.setStreetType(StreetType.UNCLASSIFIED); } } //9 one way if (!isEmptyField(fields, 9, false)) { boolean oneWay = false; try { oneWay = fields[9].equals("t"); street.setOneWay(oneWay); } catch (Exception e) { logger.warn("can not determine oneway for "+fields[1]+"/"+fields[9]+" : "+e); } } //10 shape if (!isEmptyField(fields, 10, true)) { try { street.setShape((LineString)GeolocHelper.convertFromHEXEWKBToGeometry(fields[10])); } catch (RuntimeException e) { logger.warn("can not parse shape for "+fields[0]+"/"+fields[10] +" : "+e); return; } } //11 max speed if (!isEmptyField(fields,11 , false)) { PopulateMaxSpeed(street,fields[11]); } //12 lanes if (!isEmptyField(fields,12 , false)) { try { Integer lanes = Integer.parseInt(fields[12]); street.setLanes(lanes); } catch (NumberFormatException e) { logger.warn("can not parse lanes for "+fields[0]+"/"+fields[12] +" : "+e); } } //13 toll if (!isEmptyField(fields, 13, false)) { if (fields[13].equalsIgnoreCase("yes") || fields[13].equalsIgnoreCase("snowmobile")){ street.setToll(true); } } //14 surface if (!isEmptyField(fields, 14, false)) { street.setSurface(fields[14].trim()); } //5 is_in /*if (!isEmptyField(fields, 5, false)) { street.setIsIn(fields[5].trim()); }*/ if (shouldFillIsInField()) { //we try to process is_in fields, because we want to fill adm and zip too setIsInFields(street); } long generatedId= idGenerator.getNextGId(); street.setGid(new Long(generatedId)); //azimuth *2 if (!isEmptyField(fields, 15, false)){ street.setAzimuthStart(parseAzimuth(fields[15])); } if (!isEmptyField(fields, 16, false)){ street.setAzimuthEnd(parseAzimuth(fields[16])); } if (!isEmptyField(fields, 17, false)){ street.setStreetRef(fields[17]); } //alternate names if (fields.length == 19 && !isEmptyField(fields, 18, false)){ populateAlternateNames(street,fields[18]); } //labels if (street.getName() !=null){ street.setAlternateLabels(labelGenerator.generateLabels(street)); street.setLabel(labelGenerator.generateLabel(street)); street.setFullyQualifiedName(labelGenerator.getFullyQualifiedName(street, false)); street.setLabelPostal(labelGenerator.generatePostal(street)); } try { openStreetMapDao.save(street); } catch (ConstraintViolationException e) { logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e); }catch (Exception e) { logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e); } } protected void populateZip(String zipAsString, OpenStreetMap osm) { String[] zips = zipAsString.split(";|\\||,"); for (int i = 0;i<zips.length;i++){ osm.addIsInZip(zips[i]); } if (osm.getIsInZip()!=null && osm.getIsInZip().size() >0){ osm.setZipCode(labelGenerator.getBestZipString(osm.getIsInZip())); } } private void setBestZip(OpenStreetMap street) { //we set the zipcode as the best one (when necessary) if (street.getIsInZip()!=null && street.getIsInZip().size() >0 && street.getZipCode()==null){ street.setZipCode(labelGenerator.getBestZipString(street.getIsInZip())); } } protected Integer parseAzimuth(String azimutStr) { if (azimutStr==null){ return null; } Float azimuth = null; try { azimuth = Float.parseFloat(azimutStr); if (azimuth == null || azimuth.intValue()<0 || azimuth >360){ return null; } return azimuth.intValue(); } catch (NumberFormatException e) { logger.warn("can not parse azimuth "+azimutStr +" : "+e); return null; } } protected void PopulateMaxSpeed(OpenStreetMap street, String string) { if (string!=null && string.trim()!=""){ String[] fields= string.split("___"); String trimField= ""; if (fields.length>=1){ trimField = fields[0].trim(); if (!"".equals(trimField) && StringUtil.containsDigit(trimField)){ street.setMaxSpeed(trimField); street.setSpeedMode(SpeedMode.OSM); } } if (fields.length>=2){ trimField = fields[1].trim(); if (!"".equals(trimField) && StringUtil.containsDigit(trimField)){ street.setMaxSpeedBackward(trimField); street.setSpeedMode(SpeedMode.OSM); } } if (fields.length==3){ trimField = fields[2].trim(); if (!"".equals(trimField) && street.getMaxSpeed()==null && StringUtil.containsDigit(trimField)){ street.setMaxSpeed(trimField); street.setSpeedMode(SpeedMode.OSM); } } } } OpenStreetMap populateAlternateNames(OpenStreetMap street, String alternateNamesAsString) { return ImporterHelper.populateAlternateNames(street, alternateNamesAsString); } protected void setIsInFields(OpenStreetMap street) { if (street != null && street.getLocation() != null) { //first search By Shape because it is the more reliable : City cityByShape = cityDao.getByShape(street.getLocation(),street.getCountryCode(),true); if (cityByShape != null){ street.setIsIn(cityByShape.getName()); street.setCityId(cityByShape.getId()); street.setCityConfident(true); street.setPopulation(cityByShape.getPopulation()); if (street.getZipCode()== null && cityByShape.getZipCodes() != null) {//only if the zipcode is not previously set with the value from CSV for (ZipCode zip:cityByShape.getZipCodes()){ street.addIsInZip(zip.getCode()); } } if (cityByShape.getAlternateNames()!=null){ for (AlternateName name : cityByShape.getAlternateNames() ){ if (name!=null && name.getName()!=null){ street.addIsInCitiesAlternateName(name.getName()); } } } //we add the name of the city as well as the alternatename, so we can search in one field (only is_in_city) if (cityByShape.getName()!=null & !"".equals(cityByShape.getName().trim())){ street.addIsInCitiesAlternateName(cityByShape.getName()); } setAdmNames(street, cityByShape); //AFTER setting admnames, we took the best one street.setIsInAdm(getBestAdmName(cityByShape));//cityByShape.getAdm().getName() //set the is_in_place CitySubdivision subdivision = citySubdivisionDao.getByShape(street.getLocation(),cityByShape.getCountryCode()); if (subdivision !=null){ street.setIsInPlace(subdivision.getName()); } setBestZip(street); return; } // List<City> cities = getNearestCities(street.getLocation(),street.getCountryCode()); City city = getNearestCityFromList(cities, true); if (city != null) { street.setPopulation(city.getPopulation()); setAdmNames(street, city); //AFTER setting admnames, we took the best one if (street.getIsInAdm()==null){ street.setIsInAdm(getBestAdmName(city)); } if (street.getZipCode()== null && city.getZipCodes() != null) {//only if the zipcode is not previously set with the value from CSV for (ZipCode zip:city.getZipCodes()){ if (zip != null && zip.getCode()!=null){ street.addIsInZip(zip.getCode()); } } } if (city.getName() != null && street.getIsIn()==null) {//only if it has not be set by the openstreetmap is_in field //we can here have some concordance problem if the city found is not the one populate in the osm is_in fields. street.setIsIn(pplxToPPL(city.getName())); street.setCityId(city.getId()); } if (city.getAlternateNames()!=null){ for (AlternateName name : city.getAlternateNames() ){ if (name!=null && name.getName()!=null){ street.addIsInCitiesAlternateName(name.getName()); } } } } // City city2 = getNearestCityFromList(cities, false); if (city2 != null) { if (city != null){ if (city.getFeatureId() == city2.getFeatureId()) { setBestZip(street); return; } if (city2.getLocation()!=null && city.getLocation()!=null && GeolocHelper.distance(street.getLocation(),city2.getLocation())>GeolocHelper.distance(street.getLocation(),city.getLocation())){ setBestZip(street); return; } } //we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed if (city2.getPopulation() != null && city2.getPopulation() != 0 && (street.getPopulation() == null || street.getPopulation() == 0)) { street.setPopulation(city2.getPopulation()); } if (street.getIsIn() == null) { street.setIsIn(pplxToPPL(city2.getName())); street.setCityId(city2.getId()); } else { street.setIsInPlace(pplxToPPL(city2.getName())); } setAdmNames(street, city2); //AFTER setting admnames, we took the best one if (street.getIsInAdm()==null){ street.setIsInAdm(getBestAdmName(city2)); } if (street.getZipCode()== null && city2.getZipCodes() != null ) {//we merge the zipcodes for is_in and is_in_place, so we don't check //only if the zipcode is not previously set with the value from CSV //if zipcodes are already filled for (ZipCode zip:city2.getZipCodes()){ if (zip!=null && zip.getCode()!=null){ street.addIsInZip(zip.getCode()); } } /*if (street.getIsInZip()!=null && street.getIsInZip().size() >0){ street.setZipCode(labelGenerator.getBestZipString(street.getIsInZip())); }*/ } if (city==null && city2!=null){//add AN only if there are not added yet if (city2.getAlternateNames()!=null){ for (AlternateName name : city2.getAlternateNames() ){ if (name!=null && name.getName()!=null){ street.addIsInCitiesAlternateName(name.getName()); } } } } } setBestZip(street); } } protected void setAdmNames(OpenStreetMap street,City city) { if (city != null && street !=null) { //we only set admnames if it is not already filled if (city.getAdm5Name() != null && street.getAdm5Name()==null) { street.setAdm5Name(city.getAdm5Name()); } if (city.getAdm4Name() != null && street.getAdm4Name()==null) { street.setAdm4Name(city.getAdm4Name()); } if (city.getAdm3Name() != null && street.getAdm3Name()==null) { street.setAdm3Name(city.getAdm3Name()); } if (city.getAdm2Name() != null && street.getAdm2Name()==null) { street.setAdm2Name(city.getAdm2Name()); } if (city.getAdm1Name() != null && street.getAdm1Name()==null) { street.setAdm1Name(city.getAdm1Name()); } } } protected String getBestAdmName(GisFeature gisFeature) { if (gisFeature != null) { if (gisFeature.getCountryCode()!= null && formater.getAdmLevelByContryCode(gisFeature.getCountryCode())!=0){ int level = formater.getAdmLevelByContryCode(gisFeature.getCountryCode()); if (level == 1) { return gisFeature.getAdm1Name(); } else if (level == 2) { if (gisFeature.getAdm2Name()!=null){ return gisFeature.getAdm2Name(); } else { return gisFeature.getAdm1Name(); } } else if (level == 3) { if (gisFeature.getAdm3Name()!=null){ return gisFeature.getAdm3Name(); } else { return gisFeature.getAdm1Name(); } } else if (level == 4) { if (gisFeature.getAdm4Name()!=null){ return gisFeature.getAdm4Name(); } else { return gisFeature.getAdm1Name(); } }else if (level == 5) { if (gisFeature.getAdm5Name()!=null){ return gisFeature.getAdm5Name(); } else { return gisFeature.getAdm1Name(); } } else { return null; } } if (gisFeature.getAdm1Name() != null) { return gisFeature.getAdm1Name(); } else if (gisFeature.getAdm2Name() != null) { return gisFeature.getAdm2Name(); } else if (gisFeature.getAdm3Name() != null) { return gisFeature.getAdm3Name(); } else if (gisFeature.getAdm4Name() != null) { return gisFeature.getAdm4Name(); }else if (gisFeature.getAdm5Name() != null) { return gisFeature.getAdm5Name(); } else { return null; } } else { return null; } } /*protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) { if (location ==null){ return null; } return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE); }*/ protected List<City> getNearestCities(Point location, String countryCode) { if (location ==null){ return null; } List<City> cities = cityDao.getNearests(location, countryCode, false, DISTANCE,10); if (cities ==null || cities.size()==0){ return null; } else { return cities; } } protected City getNearestCityFromList(List<City> cities, boolean filterMunicipality) { if (cities ==null){ return null; } for (City city:cities){ if (!filterMunicipality ){ return city; } else if (filterMunicipality) if (!city.isMunicipality()){ continue; } else { return city; } } return null; } /** * tests if city is a paris district, if so it is probably a pplx that is newly considered as ppl http://forum.geonames.org/gforum/posts/list/2063.page */ protected String pplxToPPL(String cityName){ if (cityName!=null){ Matcher matcher = pattern.matcher(cityName); if (matcher.find()) { return matcher.group(1); } else { return cityName; } } else { return cityName; } } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped() */ @Override public boolean shouldBeSkipped() { return !importerConfig.isOpenstreetmapImporterEnabled(); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode() */ @Override protected void setCommitFlushMode() { this.openStreetMapDao.setFlushMode(FlushMode.COMMIT); this.cityDao.setFlushMode(FlushMode.COMMIT); } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments() */ @Override protected boolean shouldIgnoreComments() { return true; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine() */ @Override protected boolean shouldIgnoreFirstLine() { return false; } /* (non-Javadoc) * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback() */ public List<NameValueDTO<Integer>> rollback() { List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>(); logger.info("deleting openstreetmap entities..."); int deleted = openStreetMapDao.deleteAll(); if (deleted != 0) { deletedObjectInfo .add(new NameValueDTO<Integer>(openStreetMapDao.getPersistenceClass().getSimpleName(), deleted)); } logger.info(deleted + " openstreetmap entities have been deleted"); resetStatus(); return deletedObjectInfo; } @Override //TODO test protected void tearDown() { super.tearDown(); FullTextSearchEngine.disableLogging=false; GeolocSearchEngine.disableLogging=false; String savedMessage = this.statusMessage; try { this.statusMessage = internationalisationService.getString("import.message.createIndex"); openStreetMapDao.createSpatialIndexes(); this.statusMessage = internationalisationService.getString("import.fulltext.optimize"); //solRSynchroniser.optimize(); } catch (Exception e) { logger.error("an error occured during spatial index creation, we ignore it but you have to manually run it to have good performances : "+e.getMessage(),e); } finally{ this.statusMessage=savedMessage; } } /** * overidded because alternatenames can be null so number of fields can differ * * @see #getNumberOfColumns() * @param fields * The array to check */ @Override protected void checkNumberOfColumn(String[] fields) { if (fields.length != 19 && fields.length != 18) { throw new WrongNumberOfFieldsException( "The number of fields is not correct. expected : " + getNumberOfColumns() + ", founds : " + fields.length+ ". details :"+dumpFields(fields)); } } protected boolean shouldFillIsInField(){ return importerConfig.isOpenStreetMapFillIsIn(); } @Required public void setOpenStreetMapDao(IOpenStreetMapDao openStreetMapDao) { this.openStreetMapDao = openStreetMapDao; } @Required public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) { this.solRSynchroniser = solRSynchroniser; } @Required public void setIdGenerator(IIdGenerator idGenerator) { this.idGenerator = idGenerator; } @Required public void setMunicipalityDetector(IMunicipalityDetector municipalityDetector) { this.municipalityDetector = municipalityDetector; } @Required public void setCityDao(ICityDao cityDao) { this.cityDao = cityDao; } @Required public void setCitySubdivisionDao(ICitySubdivisionDao citySubdivisionDao) { this.citySubdivisionDao = citySubdivisionDao; } }