/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ package com.gisgraphy.importer; import static com.gisgraphy.domain.geoloc.entity.GisFeature.NAME_MAX_LENGTH; import static com.gisgraphy.fulltext.FulltextQuerySolrHelper.MIN_SCORE; import java.io.File; import java.util.ArrayList; import java.util.List; import org.hibernate.FlushMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Required; import com.gisgraphy.domain.geoloc.entity.Adm; import com.gisgraphy.domain.geoloc.entity.City; import com.gisgraphy.domain.geoloc.entity.CitySubdivision; import com.gisgraphy.domain.geoloc.entity.GisFeature; import com.gisgraphy.domain.geoloc.entity.ZipCode; import com.gisgraphy.domain.repository.IAdmDao; import com.gisgraphy.domain.repository.ICityDao; import com.gisgraphy.domain.repository.ICitySubdivisionDao; import com.gisgraphy.domain.repository.IGisFeatureDao; import com.gisgraphy.domain.repository.IIdGenerator; import com.gisgraphy.domain.repository.ISolRSynchroniser; import com.gisgraphy.domain.repository.IZipCodeDao; import com.gisgraphy.domain.valueobject.GISSource; import com.gisgraphy.domain.valueobject.NameValueDTO; import com.gisgraphy.fulltext.FullTextSearchEngine; import com.gisgraphy.fulltext.FulltextQuery; import com.gisgraphy.fulltext.FulltextQuerySolrHelper; import com.gisgraphy.fulltext.FulltextResultsDto; import com.gisgraphy.fulltext.IFullTextSearchEngine; import com.gisgraphy.fulltext.SolrResponseDto; import com.gisgraphy.helper.GeolocHelper; import com.gisgraphy.helper.StringHelper; import com.gisgraphy.service.ServiceException; import com.vividsolutions.jts.geom.Point; /** * Import the zipcode from a Geonames dump file. * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public class GeonamesZipCodeSimpleImporter extends AbstractSimpleImporterProcessor { protected static final Logger logger = LoggerFactory.getLogger(GeonamesZipCodeSimpleImporter.class); protected IGisFeatureDao gisFeatureDao; protected IAdmDao admDao; protected IFullTextSearchEngine fullTextSearchEngine; protected ISolRSynchroniser solRSynchroniser; protected ICityDao cityDao; protected ICitySubdivisionDao citySubdivisionDao; protected IZipCodeDao zipCodeDao; protected IIdGenerator IdGenerator; LabelGenerator labelGenerator = LabelGenerator.getInstance(); protected int[] accuracyToDistance = { 50000, 50000, 40000, 10000, 10000, 5000, 3000 }; /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData * (java.lang.String) */ @Override protected void processData(String line) { String[] fields = line.split("\t"); /* * line table has the following fields : * --------------------------------------------------- 0 country code : * 1 postal code 2 place name 3 admin1 name 4 admin1 code 5 admin2 name * 6 admin2 code2 7 admin3 name 8 admin3 code 9 latitude 10 longitude 11 * accuracy accuracy * * Accuracy is an integer, the higher the better : 1 : estimated as * average from numerically neigbouring postal codes 3 : same postal * code, other name 4 : place name from geonames db 6 : postal code area * centroid */ // check that the csv file line is in a correct format //checkNumberOfColumn(fields); String code = null; int accuracy = 0; Point zipPoint = null; String countryCode=null; //check required field if (!isEmptyField(fields, 0, true)) { countryCode= fields[0]; } if (!isEmptyField(fields, 1, true)) { code = fields[1]; if (ImporterHelper.isUnwantedZipCode(code)){ return; } } //check required field if (!isEmptyField(fields, 2, true)) { //nothing to do just check } if (!isEmptyField(fields, 11, false)) { accuracy = new Integer(fields[11]); } // Location if (!isEmptyField(fields, 10, true) && !isEmptyField(fields, 9, true)) { zipPoint = GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9])); } boolean found = getByShape(countryCode, code, zipPoint); if (found){ //we find a city, we return return; } Long featureId = findFeature(fields, zipPoint, getAccurateDistance(accuracy)); GisFeature gisFeature; if (featureId != null) { logger.info(dumpFields(fields) +" returns "+ featureId ); gisFeature = addAndSaveZipCodeToFeature(code, featureId); logger.info("Adding zip " + fields[1] +" to "+gisFeature); } else { logger.warn(dumpFields(fields) +" returns nothings "); gisFeature = addNewEntityAndZip(fields); logger.info("Adding new zip " + fields[1] +" to "+gisFeature); } } protected boolean getByShape(String countryCode, String code, Point zipPoint) { boolean found = false; GisFeature cityByShape = cityDao.getByShape(zipPoint,countryCode,true); if (cityByShape!=null){ ZipCode zipCode = new ZipCode(code); //if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) { cityByShape.addZipCode(zipCode); cityDao.save((City)cityByShape); found=true; } else { cityByShape = cityDao.getByShape(zipPoint,countryCode,false); if (cityByShape!=null){ ZipCode zipCode = new ZipCode(code); //if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) { cityByShape.addZipCode(zipCode); cityDao.save((City)cityByShape); found=true; } } //try with subdivision too (in addition) CitySubdivision citySubdivision = citySubdivisionDao.getByShape(zipPoint, countryCode); if (citySubdivision!=null){ ZipCode zipCode = new ZipCode(code); citySubdivision.addZipCode(zipCode); citySubdivisionDao.save(citySubdivision); found = true; } return found; } protected Long findFeature(String[] fields, Point zipPoint,int maxDistance) { String query; boolean extendedsearch; /*if (fields[3] != null) {//adm1Name query = fields[2] + " " + fields[3]; extendedsearch = true; } else {*/ query = fields[2];//name extendedsearch = false; //} FulltextResultsDto results = doAFulltextSearch(query,fields[0],zipPoint); if (results.getNumFound() == 0) { /* if (extendedsearch) { // do a basic search results = doAFulltextSearch(fields[2], fields[0],zipPoint); if (results.getResultsSize() == 0) { // oops, no results logger.error("find feature for "+query+" around "+zipPoint+" returns nothing"); return null; } else if (results.getNumFound() == 1) { // we found the one! logger.error("find feature for "+query+" around "+zipPoint+" returns (1) "+results.getResults().get(0).getName()); if (StringHelper.isSameName(fields[2], results.getResults().get(0).getName())){ return results.getResults().get(0).getFeature_id(); } else { return null; } } else { // more than one match iterate and calculate distance and // take the best one by score logger.error("find feature for "+query+" around "+zipPoint+" returns (+)"+results.getResults().get(0).getName()); if (StringHelper.isSameName(fields[2], results.getResults().get(0).getName())){ return results.getResults().get(0).getFeature_id(); } else { return null; } //return findNearest(zipPoint, maxDistance, results); } } else {*/ // no features matches in basic search! return null; // } } else if (results.getResults().size() == 1) { // we found the one! if (StringHelper.isSameName(fields[2], results.getResults().get(0).getName()) || results.getResults().get(0).getScore()> MIN_SCORE || StringHelper.isSameAlternateNames(fields[2], results.getResults().get(0).getName_alternates())){ return results.getResults().get(0).getFeature_id(); } else { return null; } } else { // more than one match iterate and calculate distance and // take the best one by score for (SolrResponseDto result : results.getResults()){ //score is important for case when we search Munchen and city name is Munich if (StringHelper.isSameName(fields[2], result.getName()) || result.getScore()> MIN_SCORE || StringHelper.isSameAlternateNames(fields[2], result.getName_alternates())){ return result.getFeature_id(); } //shortcut : if score is less than this, the next one will be automatically less, if (result.getScore()<15){ return null; } } //else { return null; //} // return findNearest(zipPoint, maxDistance, results); } } protected Long findNearest(Point zipPoint, int maxDistance, FulltextResultsDto results) { Long nearestFeatureId = null; double nearestDistance = 0; for (SolrResponseDto dto : results.getResults()) { Point dtoPoint = GeolocHelper.createPoint(new Float(dto.getLng()), new Float(dto.getLat())); if (nearestFeatureId == null) { nearestFeatureId = dto.getFeature_id(); nearestDistance = GeolocHelper.distance(zipPoint, dtoPoint); } else { double distance = GeolocHelper.distance(zipPoint, dtoPoint); if (distance > maxDistance) { logger.info(dto.getFeature_id() + " is too far and is not candidate"); } else { if (distance < nearestDistance) { logger.info(dto.getFeature_id() + "is nearest than " + nearestFeatureId); nearestFeatureId = dto.getFeature_id(); nearestDistance = distance; } } } } return nearestFeatureId; } protected int getAccurateDistance(int accuracyLevel) { if (accuracyLevel>accuracyToDistance.length-1){ accuracyLevel = accuracyToDistance.length - 1; } else if (accuracyLevel<0){ accuracyLevel = 0; } return accuracyToDistance[accuracyLevel]; } protected GisFeature addNewEntityAndZip(String[] fields) { City city = new City(); Point location = null; long nextFeatureId = IdGenerator.getNextFeatureId(); city.setFeatureId(nextFeatureId); String name = fields[2]; if (name.length() > NAME_MAX_LENGTH){ logger.warn(name + "is too long"); name= name.substring(0, NAME_MAX_LENGTH-1); } city.setName(name); // Location if (!isEmptyField(fields, 9, true) && !isEmptyField(fields, 10, true)) { location = GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9])); city.setLocation(location); city.setAdminCentreLocation(location); } city.setFeatureClass("P"); city.setFeatureCode("PPL"); city.setSource(GISSource.GEONAMES_ZIP); String countryCode=null; if (!isEmptyField(fields, 0, false)){ countryCode = fields[0]; } city.setCountryCode(countryCode); setAdmCodesWithCSVOnes(fields, city); Adm adm = null; /*if (importerConfig.isTryToDetectAdmIfNotFound()) { adm = this.admDao.suggestMostAccurateAdm(fields[0], fields[4], fields[6], fields[8], null, city); logger.info("suggestAdm=" + adm); } else { adm = this.admDao.getAdm(fields[0], fields[4], fields[6], fields[8], null); }*/ List<Adm > adms = admDao.ListByShape(location, countryCode); if (adms.size()>0){ adm = adms.get(adms.size()-1); } /*setAdmCodesWithLinkedAdmOnes(adm, gisFeature, importerConfig .isSyncAdmCodesWithLinkedAdmOnes());*/ setAdmNames(adms, city); city.setAdm(adm); setAdmCodesWithLinkedAdmOnes(adm, city, importerConfig.isSyncAdmCodesWithLinkedAdmOnes()); setAdmNames(adm, city); city.addZipCode(new ZipCode(fields[1])); city.setAlternateLabels(labelGenerator.generateLabels(city)); city.setLabel(labelGenerator.generateLabel(city)); city.setFullyQualifiedName(labelGenerator.getFullyQualifiedName(city)); cityDao.save(city); //we do not return the saved entity for test purpose return city; } protected GisFeature addAndSaveZipCodeToFeature(String code, Long featureId) { GisFeature feature = gisFeatureDao.getByFeatureId(featureId); if (feature == null) { return null; } ZipCode zipCode = new ZipCode(code); //if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) { feature.addZipCode(zipCode); return gisFeatureDao.save(feature); //} else { // logger.warn("the zipcode " + code + " already exists for feature " + featureId); //return feature; //} } private void setAdmNames(List<Adm> adms, GisFeature gisFeature) { if (adms == null) { return; } int level =1; for (Adm adm:adms){ if(adm!=null && level <=5){ gisFeature.setAdmName(level, adm.getName()); level=level+1; } } } protected FulltextResultsDto doAFulltextSearch(String query, String countryCode,Point location) { FulltextQuery fulltextQuery; try { fulltextQuery = new FulltextQuery(query); } catch (IllegalArgumentException e) { logger.error("can not create a fulltext query for "+query); return new FulltextResultsDto(); } fulltextQuery.limitToCountryCode(countryCode).around(location); fulltextQuery.withPlaceTypes(com.gisgraphy.fulltext.Constants.CITY_AND_CITYSUBDIVISION_PLACETYPE); FulltextResultsDto results; try { results = fullTextSearchEngine.executeQuery(fulltextQuery); } catch (ServiceException e) { logger.error("error when executing a fulltext search "+e.getMessage(),e); return new FulltextResultsDto(); } return results; } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped * () */ @Override public boolean shouldBeSkipped() { return !importerConfig.isGeonamesImporterEnabled(); } private void setAdmNames(Adm adm, GisFeature gisFeature) { if (adm == null) { return; } Adm admTemp = adm; do { if (admTemp.getLevel() == 1) { gisFeature.setAdm1Name(admTemp.getName()); } else if (admTemp.getLevel() == 2) { gisFeature.setAdm2Name(admTemp.getName()); } else if (admTemp.getLevel() == 3) { gisFeature.setAdm3Name(admTemp.getName()); } else if (admTemp.getLevel() == 4) { gisFeature.setAdm4Name(admTemp.getName()); } admTemp = admTemp.getParent(); } while (admTemp != null); } private void setAdmCodesWithLinkedAdmOnes(Adm adm, GisFeature gisFeature, boolean syncAdmCodesWithLinkedAdmOnes) { if (syncAdmCodesWithLinkedAdmOnes) { // reset adm code because we might link to an adm3 and adm4 code // have // been set setAdmCodesToNull(gisFeature); if (adm != null) { if (adm.getAdm1Code() != null) { gisFeature.setAdm1Code(adm.getAdm1Code()); } if (adm.getAdm2Code() != null) { gisFeature.setAdm2Code(adm.getAdm2Code()); } if (adm.getAdm3Code() != null) { gisFeature.setAdm3Code(adm.getAdm3Code()); } if (adm.getAdm4Code() != null) { gisFeature.setAdm4Code(adm.getAdm4Code()); } } } } private void setAdmCodesToNull(GisFeature gisFeature) { gisFeature.setAdm1Code(null); gisFeature.setAdm2Code(null); gisFeature.setAdm3Code(null); gisFeature.setAdm4Code(null); } private void setAdmCodesWithCSVOnes(String[] fields, GisFeature gisFeature) { logger.debug("in setAdmCodesWithCSVOnes"); if (!isEmptyField(fields, 4, false)) { gisFeature.setAdm1Code(fields[4]); } if (!isEmptyField(fields, 6, false)) { gisFeature.setAdm2Code(fields[6]); } if (!isEmptyField(fields, 8, false)) { gisFeature.setAdm3Code(fields[8]); } } /* * (non-Javadoc) * * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * shouldIgnoreFirstLine() */ @Override protected boolean shouldIgnoreFirstLine() { return false; } /* * (non-Javadoc) * * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * shouldIgnoreComments() */ @Override protected boolean shouldIgnoreComments() { return true; } /* * (non-Javadoc) * * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * setCommitFlushMode() */ @Override protected void setCommitFlushMode() { this.gisFeatureDao.setFlushMode(FlushMode.COMMIT); this.cityDao.setFlushMode(FlushMode.COMMIT); this.admDao.setFlushMode(FlushMode.COMMIT); this.zipCodeDao.setFlushMode(FlushMode.COMMIT); } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear * () */ @Override protected void flushAndClear() { this.gisFeatureDao.flushAndClear(); this.cityDao.flushAndClear(); this.admDao.flushAndClear(); this.zipCodeDao.flushAndClear(); } /* * (non-Javadoc) * * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * getNumberOfColumns() */ @Override protected int getNumberOfColumns() { return 12; } /** * @param cityDao * The CityDao to set */ @Required public void setCityDao(ICityDao cityDao) { this.cityDao = cityDao; } /** * @param citySubdivisionDao the citySubdivisionDao to set */ @Required public void setCitySubdivisionDao(ICitySubdivisionDao citySubdivisionDao) { this.citySubdivisionDao = citySubdivisionDao; } /** * @param gisFeatureDao * The GisFeatureDao to set */ @Required public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) { this.gisFeatureDao = gisFeatureDao; } /** * @param admDao * the admDao to set */ @Required public void setAdmDao(IAdmDao admDao) { this.admDao = admDao; } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup() */ @Override protected void setup() { super.setup(); FullTextSearchEngine.disableLogging=true; IdGenerator.sync(); } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown() */ @Override protected void tearDown() { String savedMessage = this.statusMessage; FullTextSearchEngine.disableLogging=false; this.statusMessage = internationalisationService .getString("import.teardown"); try { super.tearDown(); if (!solRSynchroniser.commit()) { logger.warn("The commit in tearDown of " + this.getClass().getSimpleName() + " has failed, the uncommitted changes will be commited with the auto commit of solr in few minuts"); } //solRSynchroniser.optimize(); } finally { this.statusMessage = savedMessage; } } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles() */ @Override protected File[] getFiles() { return ImporterHelper.listCountryFilesToImport(importerConfig.getGeonamesZipCodeDir()); } @Override Integer getImportKey() { return 0; } /** * @param solRSynchroniser * the solRSynchroniser to set */ @Required public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) { this.solRSynchroniser = solRSynchroniser; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback() */ public List<NameValueDTO<Integer>> rollback() { List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>(); // we first reset subClass int deletedgis = zipCodeDao.deleteAll(); logger.warn("deleting zipCodes..."); // we don't want to remove adm because some feature can be linked again if (deletedgis != 0) { deletedObjectInfo.add(new NameValueDTO<Integer>(GisFeature.class.getSimpleName(), deletedgis)); } resetStatus(); return deletedObjectInfo; } @Required public void setZipCodeDao(IZipCodeDao zipCodeDao) { this.zipCodeDao = zipCodeDao; } @Required public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) { this.fullTextSearchEngine = fullTextSearchEngine; } @Required public void setIdGenerator(IIdGenerator idGenerator) { IdGenerator = idGenerator; } }