/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ package com.gisgraphy.importer; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.hibernate.FlushMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; import com.gisgraphy.domain.geoloc.entity.HouseNumber; import com.gisgraphy.domain.geoloc.entity.OpenStreetMap; import com.gisgraphy.domain.repository.IIdGenerator; import com.gisgraphy.domain.repository.IOpenStreetMapDao; import com.gisgraphy.domain.repository.ISolRSynchroniser; import com.gisgraphy.domain.repository.IhouseNumberDao; import com.gisgraphy.domain.valueobject.GISSource; import com.gisgraphy.domain.valueobject.NameValueDTO; import com.gisgraphy.domain.valueobject.Output; import com.gisgraphy.domain.valueobject.Output.OutputStyle; import com.gisgraphy.domain.valueobject.Pagination; import com.gisgraphy.fulltext.FullTextSearchEngine; import com.gisgraphy.fulltext.FulltextQuery; import com.gisgraphy.fulltext.FulltextResultsDto; import com.gisgraphy.fulltext.IFullTextSearchEngine; import com.gisgraphy.fulltext.SolrResponseDto; import com.gisgraphy.helper.GeolocHelper; import com.vividsolutions.jts.geom.Point; /** * Import the street from an (pre-processed) openStreet map data file . * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public class OpenAddressesSimpleImporter extends AbstractSimpleImporterProcessor { public static final long DEFAULT_SEARCH_DISTANCE = 1000L; protected static final Logger logger = LoggerFactory.getLogger(OpenAddressesSimpleImporter.class); protected IOpenStreetMapDao openStreetMapDao; protected IhouseNumberDao houseNumberDao; protected ISolRSynchroniser solRSynchroniser; protected IFullTextSearchEngine fullTextSearchEngine; @Autowired protected IIdGenerator idGenerator; protected final static Output MEDIUM_OUTPUT = Output.withDefaultFormat().withStyle(OutputStyle.MEDIUM); long cummulative_db_time = 0; long cummulative_fulltext_time = 0; long cummulative_db_nb_request = 0; long cummulative_fulltext_nb_request = 0; private static final Pattern ALL_ZERO = Pattern.compile("^0+$"); private static final Pattern NOT_VALID_LABEL = Pattern.compile("\\b(NULL|UNDEFINED|UNAVAILABLE)\\b",Pattern.CASE_INSENSITIVE); public static final int MAX_NAME_SIZE = 250; protected boolean isZeroHouseNumber(String houseNumber){ if (houseNumber!=null){ return ALL_ZERO.matcher(houseNumber).matches(); } return false; } protected boolean isUnWantedHouseNumber(String houseNumber){ if (houseNumber!=null){ return isUnWantedStreetName(houseNumber) || isZeroHouseNumber(houseNumber); } return true; } protected boolean isUnWantedStreetName(String streetname){ if (streetname!=null){ return NOT_VALID_LABEL.matcher(streetname).find() || streetname.trim().equals(""); } return true; } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear * () */ @Override protected void flushAndClear() { //openStreetMapDao.flushAndClear(); //houseNumberDao.flushAndClear(); } @Override protected void setup() { //temporary disable logging when importing FullTextSearchEngine.disableLogging=true; idGenerator.sync(); super.setup(); } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles() */ @Override protected File[] getFiles() { return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenAddressesDir()); } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * getNumberOfColumns() */ @Override protected int getNumberOfColumns() { return 11; } private String lastStreetName=null; private OpenStreetMap lastStreet=null; private Point lastPoint=null; private Pattern COUNTRY_EXTRACTION_PATTERN=Pattern.compile("(..):"); /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData * (java.lang.String) */ @Override protected void processData(String line) throws ImporterException { if (line==null || "".equals(line.trim())){ return; } //0:LON 1:LAT 2: NUMBER 3:STREET 4:UNIT 5:CITY 6:DISTRICT 7:REGION 8:POSTCODE 9:ID 10:HASH String[] fields = line.split(","); if (fields.length != getNumberOfColumns()) { logger.error("wrong number of column ("+fields.length+") for "+line); } if (!isAllRequiredFieldspresent(fields)){ logger.warn("some fields are not present for line "+line); }; if (isUnWantedHouseNumber(fields[2])){ logger.warn("invalid house number '"+fields[3]+"'for line "+line); } Point location; try { location = GeolocHelper.createPoint(new Float(fields[0]), new Float(fields[1])); } catch (NumberFormatException e) { logger.error("can not get location for "+line); return; } String countrycode =null; if (fields.length>=10 && isEmptyField(fields, 10, false)){ countrycode= extractCountrycode(fields[10]); } String streetname = null; if (!isUnWantedStreetName(fields[3])){ streetname = cleanupStreetName(fields[3]); } OpenStreetMap street =null; boolean newStreet = false; if (lastStreetName != null & lastStreetName.equals(streetname) && lastPoint!=null && GeolocHelper.distance(lastPoint, location)<1000 && lastStreet !=null){ street = lastStreet; } else { //save the last street openStreetMapDao.save(lastStreet); //search for the new One street = findNearestStreet(streetname, location); lastStreetName = streetname; lastStreet = street; lastPoint = location; newStreet = true; } if (street==null){ street = new OpenStreetMap(); street.setGid(idGenerator.getNextGId()); street.setSource(GISSource.OPENADDRESSES); street.setName(streetname); street.setLocation(lastPoint); if (!isEmptyField(fields,5, false)){ street.setIsIn(fields[5]); } if (!isEmptyField(fields,6, false)){ street.setIsInPlace(fields[6]); } if (!isEmptyField(fields,7, false)){ street.setIsInAdm(fields[7]); } lastStreet = street; lastStreetName = streetname; lastPoint = location; } if (!isEmptyField(fields, 8, false)){ street.setZipCode(fields[8]); } HouseNumber hn = new HouseNumber(fields[2],location); hn.setSource(GISSource.OPENADDRESSES); street.addHouseNumber(hn); street.setCountryCode(null);//todo if (newStreet){ openStreetMapDao.save(street); } } protected String extractCountrycode(String string) { if (string !=null){ Matcher m = COUNTRY_EXTRACTION_PATTERN.matcher(string); if (m.find()){ return m.group(1).toUpperCase(); } } return null; } protected String cleanupStreetName(String streetName){ if (streetName!=null){ if (streetName.length()>MAX_NAME_SIZE){ streetName = streetName.substring(0, MAX_NAME_SIZE); } return streetName.trim().replaceAll("[\\s']+", " ").replaceFirst("^0+(?!$)", "").trim(); } return streetName; } protected boolean isAllRequiredFieldspresent(String[] fields) { if (isEmptyField(fields, 0, false) || isEmptyField(fields, 1, false) || isEmptyField(fields, 2, false) ){ return false; } return true; } protected OpenStreetMap findNearestStreet(String streetName, Point location) { //Openstreetmap has sometimes, for a same street, several segment, so we do a fulltext search and then search for the nearest based on shape,not nearest point logger.error("findNearestStreet :streetname="+streetName+" and location = "+location); if (location == null){ logger.warn("findNearestStreet :location is null"); return null; } if (streetName==null || "".equals(streetName.trim()) || "\"\"".equals(streetName.trim()) || "-".equals(streetName.trim()) || "---".equals(streetName.trim()) || "--".equals(streetName.trim())){ logger.warn("findNearestStreet : no streetname, we search by location "+location); OpenStreetMap osm = openStreetMapDao.getNearestFrom(location,DEFAULT_SEARCH_DISTANCE); logger.error("findNearestStreet :getNearestFrom return "+osm); return osm; } long start = System.currentTimeMillis(); OpenStreetMap osmDB = openStreetMapDao.getNearestFromByName(location, DEFAULT_SEARCH_DISTANCE, streetName); long end = System.currentTimeMillis(); long duration = end - start; cummulative_db_nb_request++; cummulative_db_time+=duration; start = System.currentTimeMillis(); FulltextQuery query; try { query = new FulltextQuery(streetName, Pagination.DEFAULT_PAGINATION, MEDIUM_OUTPUT, com.gisgraphy.fulltext.Constants.STREET_PLACETYPE, null); } catch (IllegalArgumentException e) { logger.error("can not create a fulltext query for "+streetName+", will return the nearest"); return openStreetMapDao.getNearestFrom(location,2000L); } query.withAllWordsRequired(false).withoutSpellChecking(); query.around(location); query.withRadius(DEFAULT_SEARCH_DISTANCE); FulltextResultsDto results; try { results = fullTextSearchEngine.executeQuery(query); } catch (RuntimeException e) { logger.error("error during fulltext search : "+e.getMessage(),e); return null; } int resultsSize = results.getResultsSize(); // logger.warn(query + "returns "+resultsSize +" results"); OpenStreetMap osm =null; List<SolrResponseDto> resultsList = results.getResults(); if (resultsSize == 1) { SolrResponseDto street = resultsList.get(0); if (street!=null){ Long openstreetmapId = street.getOpenstreetmap_id(); //logger.warn("findNearestStreet : find a street with osmId "+openstreetmapId); if (openstreetmapId!=null){ osm = openStreetMapDao.getByOpenStreetMapId(openstreetmapId); if (osm == null) { logger.warn("can not find street for id "+openstreetmapId); } } } } if (resultsSize > 1) { osm = getNearestByIds(resultsList,location,streetName); //logger.warn("findNearestStreet : getNearestByIds returns "+osm+" for "+streetName); } end = System.currentTimeMillis(); duration = end - start; cummulative_fulltext_nb_request++; cummulative_fulltext_time+=duration; if (osmDB!=null && osm!=null && osmDB.getId()!= osm.getId()){ logger.error("notsame street : "+streetName+"/"+location+" returns "+osmDB+" and "+osm); } return osm; } protected OpenStreetMap getNearestByIds(List<SolrResponseDto> results,Point point,String streetname) { List<Long> ids = new ArrayList<Long>(); OpenStreetMap result = null; if (results!=null){ for (SolrResponseDto dto:results){ if (dto!=null && dto.getOpenstreetmap_id()!=null){ ids.add(dto.getOpenstreetmap_id()); } } String idsAsSTring=""; for (Long id:ids){ idsAsSTring = idsAsSTring+","+id; } //logger.warn("getNearestByIds : "+idsAsSTring); result = openStreetMapDao.getNearestByosmIds(point, ids); if (result==null){ logger.warn("getNearestByIds for"+streetname+" and ids "+idsAsSTring+" and point" +point+" return "+result); } } return result; } /* * (non-Javadoc) * * @see * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped * () */ @Override public boolean shouldBeSkipped() { return false; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * setCommitFlushMode() */ @Override protected void setCommitFlushMode() { this.openStreetMapDao.setFlushMode(FlushMode.COMMIT); this.houseNumberDao.setFlushMode(FlushMode.COMMIT); } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * shouldIgnoreComments() */ @Override protected boolean shouldIgnoreComments() { return true; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor# * shouldIgnoreFirstLine() */ @Override protected boolean shouldIgnoreFirstLine() { return true; } /* * (non-Javadoc) * * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback() */ public List<NameValueDTO<Integer>> rollback() { List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>(); return deletedObjectInfo; } @Override // TODO test protected void tearDown() { super.tearDown(); FullTextSearchEngine.disableLogging=false; } @Required public void setHouseNumberDao(IhouseNumberDao houseNumberDao) { this.houseNumberDao = houseNumberDao; } @Required public void setOpenStreetMapDao(IOpenStreetMapDao openStreetMapDao) { this.openStreetMapDao = openStreetMapDao; } @Required public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) { this.fullTextSearchEngine = fullTextSearchEngine; } @Required public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) { this.solRSynchroniser = solRSynchroniser; } }