/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
* David Masclet <davidmasclet@gisgraphy.com>
*
*
*******************************************************************************/
package com.gisgraphy.importer;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.hibernate.FlushMode;
import org.hibernate.exception.ConstraintViolationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Required;
import com.gisgraphy.addressparser.format.BasicAddressFormater;
import com.gisgraphy.domain.geoloc.entity.AlternateName;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.GisFeature;
import com.gisgraphy.domain.geoloc.entity.OpenStreetMap;
import com.gisgraphy.domain.geoloc.entity.PostOffice;
import com.gisgraphy.domain.geoloc.entity.ZipCode;
import com.gisgraphy.domain.repository.ICityDao;
import com.gisgraphy.domain.repository.IGisFeatureDao;
import com.gisgraphy.domain.repository.IIdGenerator;
import com.gisgraphy.domain.repository.ISolRSynchroniser;
import com.gisgraphy.domain.valueobject.AlternateNameSource;
import com.gisgraphy.domain.valueobject.GISSource;
import com.gisgraphy.domain.valueobject.NameValueDTO;
import com.gisgraphy.domain.valueobject.Output;
import com.gisgraphy.domain.valueobject.Output.OutputStyle;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.helper.StringHelper;
import com.vividsolutions.jts.geom.Point;
/**
* Import the POI from an (pre-processed) openStreet map data file.
* The goal of this importer is to cross information between geonames and Openstreetmap.
*
*
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*/
public class OpenStreetMapPoisSimpleImporter extends AbstractSimpleImporterProcessor {
public static final int DISTANCE = 40000;
protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapPoisSimpleImporter.class);
public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT);
private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
BasicAddressFormater formater = BasicAddressFormater.getInstance();
LabelGenerator labelGenerator = LabelGenerator.getInstance();
@Autowired
protected IIdGenerator idGenerator;
@Autowired
protected IGisFeatureDao gisFeatureDao;
@Autowired
protected ISolRSynchroniser solRSynchroniser;
OsmAmenityToPlacetype osmAmenityToPlacetype = new OsmAmenityToPlacetype();
@Autowired
protected ICityDao cityDao;
protected boolean shouldFillIsInField(){
return importerConfig.isOpenStreetMapFillIsIn();
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
*/
@Override
protected void flushAndClear() {
gisFeatureDao.flushAndClear();
}
@Override
protected void setup() {
super.setup();
//temporary disable logging when importing
FullTextSearchEngine.disableLogging=true;
idGenerator.sync();
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
*/
@Override
protected File[] getFiles() {
return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapPoisDir());
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
*/
@Override
protected int getNumberOfColumns() {
return 7;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
*/
@Override
protected void processData(String line) throws ImporterException {
String[] fields = line.split("\t");
String amenityFields = null;
//
// Line table has the following fields :
// ---------------------------------------------------
//0 : Node type; 1 : id; 2 : name; 3 : countrycode;4 : alternatenames;
//5 : location, 6 : amenity;
//
//
checkNumberOfColumn(fields);
//amenity
if (!isEmptyField(fields, 6, true)) {
amenityFields=fields[6].trim();
}
List<GisFeature> pois = createAndpopulatePoi(fields,amenityFields);
if (pois == null){
return;
}
try {
for (GisFeature poi:pois){
gisFeatureDao.save(poi);
}
} catch (ConstraintViolationException e) {
logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e);
}catch (Exception e) {
logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e);
}
}
List<GisFeature> createAndpopulatePoi(String[] fields, String amenity) {
String[] tags = splitTags(amenity);
List<GisFeature> pois = osmAmenityToPlacetype.getObjectsFromTags(tags);
for (GisFeature poi:pois){
poi.setSource(GISSource.OSM);
//osmId
if (!isEmptyField(fields, 1, true)) {
String osmIdAsString =fields[1].trim();
Long osmId;
try {
osmId = Long.parseLong(osmIdAsString);
poi.setOpenstreetmapId(osmId);
} catch (NumberFormatException e) {
logger.error("can not parse openstreetmap id "+osmIdAsString);
return null;
}
}
// name
if (!isEmptyField(fields, 2, false)) {
String name=fields[2].trim();
if (name.length()>=GisFeature.NAME_MAX_LENGTH){
logger.warn(name+ " is a too long");
return null;
}
if (name==null || "".equals(name.trim())|| "\"\"".equals(name.trim())){
poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
}
poi.setName(name);
}else {
poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
}
//countrycode
if (!isEmptyField(fields, 3, true)) {
String countryCode=fields[3].trim().toUpperCase();
poi.setCountryCode(countryCode);
}
//populate alternatenames
if (!isEmptyField(fields, 4, false)) {
String alternateNamesAsString=fields[4].trim();
populateAlternateNames(poi,alternateNamesAsString);
}
//location
if (!isEmptyField(fields, 5, false)) {
try {
Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[5]);
poi.setLocation(location);
} catch (RuntimeException e) {
logger.warn("can not parse location for "+fields[6]+" : "+e);
return null;
}
} else {
return null;
}
if (shouldFillIsInField()) {
//we try to process is_in fields, because we want to fill adm and zip too
setIsInFields(poi);
}
//featureId
poi.setFeatureId(idGenerator.getNextFeatureId());
//labels
poi.setLabel(labelGenerator.generateLabel(poi));
poi.setAlternateLabels(labelGenerator.generateLabels(poi));
poi.setFullyQualifiedName(labelGenerator.getFullyQualifiedName(poi));
}
return pois;
}
protected String[] splitTags(String amenity) {
String[] tags= new String[14];
String[] tagsvalues = amenity.split("___");
for (int j =0;j<tagsvalues.length;j++){
if (!"".equals(tagsvalues[j].trim())){
tags[j]=tagsvalues[j];
}
}
return tags;
}
private void setBestZip(GisFeature gisgeature) {
//we set the zipcode as the best one
if (gisgeature.getIsInZip()!=null && gisgeature.getIsInZip().size() >0 && gisgeature.getZipCode()==null){
gisgeature.setZipCode(labelGenerator.getBestZipString(gisgeature.getIsInZip()));
}
}
protected void setIsInFields(GisFeature poi) {
if (poi != null && poi.getLocation() != null) {
//first searchByShape because it is the more reliable :
City cityByShape = cityDao.getByShape(poi.getLocation(),poi.getCountryCode(),true);
if (cityByShape != null){
poi.setIsIn(cityByShape.getName());
poi.setCityId(cityByShape.getId());
poi.setCityConfident(true);
poi.setPopulation(cityByShape.getPopulation());
if (cityByShape.getZipCodes() != null) {
for (ZipCode zip:cityByShape.getZipCodes()){
poi.addZip(zip.getCode());
}
}
if (cityByShape.getAlternateNames()!=null){
for (AlternateName name : cityByShape.getAlternateNames() ){
if (name!=null && name.getName()!=null){
poi.addIsInCitiesAlternateName(name.getName());
}
}
}
if (cityByShape.getAdm()!=null){
poi.setIsInAdm(cityByShape.getAdm().getName());
}
setBestZip(poi);
return;
}
City city = getNearestCity(poi.getLocation(),poi.getCountryCode(), true);
if (city != null) {
poi.setPopulation(city.getPopulation());
poi.setIsInAdm(getDeeperAdmName(city));
if (city.getZipCodes() != null) {
for (ZipCode zip:city.getZipCodes()){
if (zip != null && zip.getCode()!=null){
poi.addZip(zip.getCode());
}
}
}
if (city.getName() != null && poi.getIsIn()==null) {//only if it has not be set by the openstreetmap is_in field
//we can here have some concordance problem if the city found is not the one populate in the osm is_in fields.
poi.setIsIn(pplxToPPL(city.getName()));
}
if (city.getAlternateNames()!=null){
for (AlternateName name : city.getAlternateNames() ){
if (name!=null && name.getName()!=null){
poi.addIsInCitiesAlternateName(name.getName());
}
}
}
}
City city2 = getNearestCity(poi.getLocation(),poi.getCountryCode(), false);
if (city2 != null) {
if (city != null){
if (city.getFeatureId() == city2.getFeatureId()) {
setBestZip(poi);
return;
}
if (city2.getLocation()!=null && city.getLocation()!=null && GeolocHelper.distance(poi.getLocation(),city2.getLocation())>GeolocHelper.distance(poi.getLocation(),city.getLocation())){
setBestZip(poi);
return;
}
}
//we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed
if (city2.getPopulation() != null && city2.getPopulation() != 0 && (poi.getPopulation() == null || poi.getPopulation() == 0)) {
poi.setPopulation(city2.getPopulation());
}
if (poi.getIsIn() == null) {
poi.setIsIn(pplxToPPL(city2.getName()));
} else {
poi.setIsInPlace(pplxToPPL(city2.getName()));
}
if (poi.getIsInAdm() == null) {
poi.setIsInAdm(getDeeperAdmName(city2));
}
if (city2.getZipCodes() != null ) {//we merge the zipcodes for is_in and is_in_place, so we don't check
//if zipcodes are already filled
for (ZipCode zip:city2.getZipCodes()){
if (zip!=null && zip.getCode()!=null){
poi.addZip(zip.getCode());
}
}
}
if (city==null && city2!=null){//add AN only if there are not added yet
if (city2.getAlternateNames()!=null){
for (AlternateName name : city2.getAlternateNames() ){
if (name!=null && name.getName()!=null){
poi.addIsInCitiesAlternateName(name.getName());
}
}
}
}
}
setBestZip(poi);
}
}
/**
* tests if city is a paris district, if so it is
probably a pplx that is newly considered as ppl
http://forum.geonames.org/gforum/posts/list/2063.page
*/
protected String pplxToPPL(String cityName){
if (cityName!=null){
Matcher matcher = pattern.matcher(cityName);
if (matcher.find()) {
return matcher.group(1);
} else {
return cityName;
}
} else {
return cityName;
}
}
protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) {
if (location ==null){
return null;
}
return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE);
}
protected String getDeeperAdmName(City city) {
if (city != null) {
if (city.getAdm5Name() != null) {
return city.getAdm5Name();
} else if (city.getAdm4Name() != null) {
return city.getAdm4Name();
} else if (city.getAdm3Name() != null) {
return city.getAdm3Name();
} else if (city.getAdm2Name() != null) {
return city.getAdm2Name();
} else if (city.getAdm1Name() != null) {
return city.getAdm1Name();
} else {
return null;
}
} else {
return null;
}
}
GisFeature populateAlternateNames(GisFeature poi,
String alternateNamesAsString) {
return ImporterHelper.populateAlternateNames(poi,alternateNamesAsString);
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
*/
@Override
public boolean shouldBeSkipped() {
return !importerConfig.isOpenstreetmapImporterEnabled();
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
*/
@Override
protected void setCommitFlushMode() {
this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
*/
@Override
protected boolean shouldIgnoreComments() {
return true;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
*/
@Override
protected boolean shouldIgnoreFirstLine() {
return false;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
*/
public List<NameValueDTO<Integer>> rollback() {
List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
logger.info("reseting openstreetmap cities...");
//TODO only POI that have source openstreetmap
deletedObjectInfo
.add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0));
resetStatus();
return deletedObjectInfo;
}
@Override
//TODO test
protected void tearDown() {
super.tearDown();
String savedMessage = this.statusMessage;
/*try {
FullTextSearchEngine.disableLogging=true;
this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
solRSynchroniser.optimize();
} finally {
// we restore message in case of error
this.statusMessage = savedMessage;
}*/
}
@Required
public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
this.solRSynchroniser = solRSynchroniser;
}
@Required
public void setIdGenerator(IIdGenerator idGenerator) {
this.idGenerator = idGenerator;
}
public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
this.gisFeatureDao = gisFeatureDao;
}
@Required
public void setCityDao(ICityDao cityDao) {
this.cityDao = cityDao;
}
}