/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
*
* David Masclet <davidmasclet@gisgraphy.com>
******************************************************************************/
package com.gisgraphy.geocoding;
import static com.gisgraphy.helper.StringHelper.isEmptyString;
import static com.gisgraphy.helper.StringHelper.isNotEmptyString;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.gisgraphy.addressparser.Address;
import com.gisgraphy.addressparser.AddressQuery;
import com.gisgraphy.addressparser.AddressResultsDto;
import com.gisgraphy.addressparser.IAddressParserService;
import com.gisgraphy.addressparser.StructuredAddressQuery;
import com.gisgraphy.addressparser.commons.GeocodingLevels;
import com.gisgraphy.addressparser.exception.AddressParserException;
import com.gisgraphy.addressparser.format.BasicAddressFormater;
import com.gisgraphy.addressparser.format.DisplayMode;
import com.gisgraphy.compound.Decompounder;
import com.gisgraphy.compound.Decompounder.state;
import com.gisgraphy.domain.geoloc.entity.Adm;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.CitySubdivision;
import com.gisgraphy.domain.geoloc.entity.Street;
import com.gisgraphy.domain.valueobject.Constants;
import com.gisgraphy.domain.valueobject.GisgraphyConfig;
import com.gisgraphy.domain.valueobject.HouseNumberAddressDto;
import com.gisgraphy.domain.valueobject.Output;
import com.gisgraphy.domain.valueobject.Output.OutputStyle;
import com.gisgraphy.domain.valueobject.Pagination;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.fulltext.FulltextQuery;
import com.gisgraphy.fulltext.FulltextQuerySolrHelper;
import com.gisgraphy.fulltext.FulltextResultsDto;
import com.gisgraphy.fulltext.SmartStreetDetection;
import com.gisgraphy.fulltext.SolrResponseDto;
import com.gisgraphy.fulltext.SolrResponseDtoDistanceComparator;
import com.gisgraphy.helper.CountryDetector;
import com.gisgraphy.helper.CountryDetectorDto;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.helper.StringHelper;
import com.gisgraphy.importer.ImporterConfig;
import com.gisgraphy.importer.LabelGenerator;
import com.gisgraphy.serializer.UniversalSerializer;
import com.gisgraphy.serializer.common.UniversalSerializerConstant;
import com.gisgraphy.service.IStatsUsageService;
import com.gisgraphy.stats.StatsUsageType;
import com.gisgraphy.street.HouseNumberDto;
import com.gisgraphy.street.HouseNumberUtil;
import com.vividsolutions.jts.geom.Point;
/**
*
* Geocode internationnal address via gisgraphy services
*
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*
*/
@Service
public class GeocodingService implements IGeocodingService {
private static final String FUZZY_ACTIVE = "fuzzy:active";
//private static final Pattern GERMAN_SYNONYM_PATTEN = Pattern.compile("(?<=\\w)(str\\b)[\\.]?",Pattern.CASE_INSENSITIVE);
private static final int INTERPOLATION_CURVE_TOLERANCE = 45;
private IStatsUsageService statsUsageService;
private ImporterConfig importerConfig;
private IAddressParserService addressParser;
private FullTextSearchEngine fullTextSearchEngine;
private GisgraphyConfig gisgraphyConfig;
private LabelGenerator labelGenerator = LabelGenerator.getInstance();
private BasicAddressFormater addressFormater = BasicAddressFormater.getInstance();
CountryDetector countryDetector = new CountryDetector();
SmartStreetDetection smartStreetDetection = new SmartStreetDetection();
public final static int ACCEPT_DISTANCE_BETWEEN_CITY_AND_STREET = 15000;
public final static Output LONG_OUTPUT = Output.withDefaultFormat().withStyle(OutputStyle.LONG);
public final static Output MEDIUM_OUTPUT = Output.withDefaultFormat().withStyle(OutputStyle.MEDIUM);
public final static Output FULL_OUTPUT = Output.withDefaultFormat().withStyle(OutputStyle.FULL);
public final static Output DEFAULT_OUTPUT = LONG_OUTPUT;
public final static Pagination ONE_RESULT_PAGINATION = Pagination.paginate().from(0).to(1);
public final static Pagination FIVE_RESULT_PAGINATION = Pagination.paginate().from(0).to(5);
public final static Pagination TEN_RESULT_PAGINATION = Pagination.paginate().from(0).to(10);
public final static SolrResponseDtoDistanceComparator comparator = new SolrResponseDtoDistanceComparator();
//public final static Pattern HOUSENUMBERPATTERN = Pattern.compile("((((?:\\b\\d{1,4}[\\-\\–\\一]\\d{1,4}))\\b(?:[\\s,;]+)(?!(?:st\\b|th\\b|rd\\b|nd\\b))(?=\\w+)+?))");
public final static Pattern HOUSENUMBERPATTERN = Pattern.compile("((("
+ "(?:\\b\\d{1,4}[\\-\\–\\一]\\d{1,4}))\\b(?:[\\s,\\.;]+)(?!(?:st\\b|th\\b|rd\\b|nd\\b|street\\b|avenue\\b|de\\b|Januar\\b|janvier\\b|enero\\b|Gennaio\\b|Februar\\b|Febbraio\\b|f[ée]vrier\\b|febrero\\b|M[aä]rz\\b|mars\\b|marzo\\b|A[pvb]ril[e]?\\b|Mai\\b|mayo\\b|maggio\\b|juni[o]?\\b|juin\\b|Giugno\\ß|juli[o]?\\b|juillet\\b|Luglio\\b|august\\b|ao[uû]t\\b|agosto\\b|September\\b|sept[i]?embre\\b|Settembre\\b|o[ckt]tober\\b|o[tc]t[ou]bre\\b|november\\b|nov[i]?embre\\b|de[cz]ember\\b|d[ie]ec[i]embre\\b|dicembre\\b))(?=\\w+)+?)"
+ "|(?:^\\b\\d{1,4}(?:\\s?(?:[a-d]\\b\\s)?)\\b)(?:[\\s,\\.;]?(?:bis|ter)?)(?:\\s|,)(?!(?:st\\b|th\\b|rd\\b|nd\\b|street\\b$|avenue\\b$|de\\b|Januar\\b|janvier\\b|enero\\b|Gennaio\\b|Februar\\b|Febbraio\\b|f[ée]vrier\\b|febrero\\b|M[aä]rz\\b|mars\\b|marzo\\b|A[pvb]ril[e]?\\b|Mai\\b|mayo\\b|maggio\\b|juni[o]?\\b|juin\\b|Giugno\\ß|juli[o]?\\b|juillet\\b|Luglio\\b|august\\b|ao[uû]t\\b|agosto\\b|September\\b|sept[i]?embre\\b|Settembre\\b|o[ckt]tober\\b|o[tc]t[ou]bre\\b|november\\b|nov[i]?embre\\b|de[cz]ember\\b|d[ie]ec[i]embre\\b|dicembre\\b))"
+ "|(((?:\\b\\d{1,4}(?:\\s?(?:[a-d]\\b)?)))\\b(?:[\\s,\\.;]+)(?!(?:st\\b|th\\b|rd\\b|nd\\b|street\\b|avenue\\b|de\\b|Januar\\b|janvier\\b|enero\\b|Gennaio\\b|Februar\\b|Febbraio\\b|f[ée]vrier\\b|febrero\\b|M[aä]rz\\b|mars\\b|marzo\\b|A[pvb]ril[e]?\\b|Mai\\b|mayo\\b|maggio\\b|juni[o]?\\b|juin\\b|Giugno\\ß|juli[o]?\\b|juillet\\b|Luglio\\b|august\\b|ao[uû]t\\b|agosto\\b|September\\b|sept[i]?embre\\b|Settembre\\b|o[ckt]tober\\b|o[tc]t[ou]bre\\b|november\\b|nov[i]?embre\\b|de[cz]ember\\b|d[ie]ec[i]embre\\b|dicembre\\b))(?=\\w+)+?)"
+ "|\\s?(?:\\b\\d{1,4}\\s?(?:[a-d])?\\b$)"
+")",
Pattern.CASE_INSENSITIVE);
//
//
public final static Pattern FIRST_NUMBER_EXTRACTION_PATTERN = Pattern.compile("^([0-9]+)");
public final static List<String> countryWithZipIs4Number= new ArrayList<String>(){
{
add("GE");
add("AS");
add("AU");
add("BD");
add("CH");
add("CK");
add("CR");
add("CY");
add("HU");
add("HM");
add("LR");
add("SJ");
add("MK");
add("MZ");
add("NE");
add("NZ");
add("PH");
add("VE");
add("CV");
add("CX");
add("ET");
add("GW");
add("ZA");
add("LI");
add("LU");
add("PY");
}
};
public final static List<String> countryWithZipIs3Number= new ArrayList<String>(){
{
add("GN");
add("IS");
add("LS");
add("OM");
add("PG");
}
};
Decompounder decompounder = new Decompounder();
/**
* The logger
*/
protected static final Logger logger = LoggerFactory.getLogger(GeocodingService.class);
/*
* (non-Javadoc)
*
* @see
* com.gisgraphy.geocoding.IGeocodingService#geocodeAndSerialize(com.gisgraphy
* .addressparser.AddressQuery, java.io.OutputStream)
*/
public void geocodeAndSerialize(AddressQuery query, OutputStream outputStream) throws GeocodingException {
if (query == null) {
throw new GeocodingException("Can not geocode a null query");
}
if (outputStream == null) {
throw new GeocodingException("Can not serialize into a null outputStream");
}
AddressResultsDto geolocResultsDto = geocode(query);
Map<String, Object> extraParameter = new HashMap<String, Object>();
// extraParameter.put(GeolocResultsDtoSerializer.START_PAGINATION_INDEX_EXTRA_PARAMETER,
// query.getFirstPaginationIndex());
extraParameter.put(UniversalSerializerConstant.CALLBACK_METHOD_NAME, query.getCallback());
UniversalSerializer.getInstance().write(outputStream, geolocResultsDto, false, extraParameter, query.getFormat());
}
/*
* (non-Javadoc)
*
* @see
* com.gisgraphy.geocoding.IGeocodingService#geocodeToString(com.gisgraphy
* .addressparser.AddressQuery)
*/
public String geocodeToString(AddressQuery query) throws GeocodingException {
if (query == null) {
throw new GeocodingException("Can not geocode a null query");
}
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
geocodeAndSerialize(query, outputStream);
try {
return outputStream.toString(Constants.CHARSET);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("unknow encoding " + Constants.CHARSET);
}
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.geocoding.IGeocodingService#geocode(java.lang.String)
*/
public AddressResultsDto geocode(AddressQuery query) throws GeocodingException {
if (query == null) {
throw new GeocodingException("Can not geocode a null query");
}
logger.info(query.toString());
String countryCode = query.getCountry();
if (countryCode !=null && countryCode.trim().length() != 2) {
throw new GeocodingException("countrycode should have two letters : " + countryCode);
}
if (query instanceof StructuredAddressQuery){
Address address = ((StructuredAddressQuery)query).getStructuredAddress();
if (logger.isDebugEnabled()) {
logger.debug("structured address to geocode : '" + address + "' for country code : " + countryCode);
}
AddressResultsDto addressResultsDto = geocode(address, countryCode);
if (shouldSetParseAddress(query)){
addressResultsDto.setParsedAddress(address);
}
return addressResultsDto;
}
String rawAddress = query.getAddress();
Long startTime = System.currentTimeMillis();
if (isEmptyString(rawAddress)) {
throw new GeocodingException("Can not geocode a null or empty address");
}
// rawAddress = prepareQuery(rawAddress);
//always search for country to remove it
CountryDetectorDto detectorDto = countryDetector.detectAndRemoveCountry(rawAddress);
if (detectorDto != null && detectorDto.getCountryCode()!=null){
if (detectorDto.getAddress()!= null && !detectorDto.getAddress().trim().equals("")){
rawAddress = detectorDto.getAddress();
if (countryCode ==null){
countryCode = detectorDto.getCountryCode();
}
} else {
//it is a country we use the raw address without countrycode.
List<SolrResponseDto> countries = findInText(rawAddress,null,null,com.gisgraphy.fulltext.Constants.ONLY_COUNTRY_PLACETYPE, false, null);
return buildAddressResultDtoFromSolrResponseDtoCountry(countries);
}
//}
}
if (countryCode !=null && countryCode.trim().length() != 2) {
throw new GeocodingException("countrycode should have two letters : " + countryCode);
}
if (logger.isDebugEnabled()) {
logger.debug("Raw address to geocode : '" + rawAddress + "' for country code : " + countryCode);
}
AddressQuery addressQuery = new AddressQuery(rawAddress, countryCode);
AddressResultsDto addressResultDto = null;
logger.debug("is postal address : " +query.isPostal());
boolean needParsing = needParsing(query.getAddress());
if ((gisgraphyConfig.useAddressParserWhenGeocoding || query.isPostal()) && needParsing) {
try {
logger.debug("address parser is enabled");
addressResultDto = addressParser.execute(addressQuery);
} catch (AddressParserException e) {
logger.error("An error occurs during parsing of address" + e.getMessage(), e);
}
} else {
logger.debug("won't parse "+rawAddress);
}
if (addressResultDto != null && addressResultDto.getResult().size() >= 1 && isGeocodable(addressResultDto.getResult().get(0))) {
if (logger.isDebugEnabled()) {
logger.debug("successfully parsed address : " + rawAddress + " : " + addressResultDto.getResult().get(0));
}
Address address = addressResultDto.getResult().get(0);
AddressResultsDto addressesDto = geocode(address, countryCode);
if (shouldSetParseAddress(query)){
addressesDto.setParsedAddress(address);
}
return addressesDto;
} else
//if (importerConfig.isOpenStreetMapFillIsIn())
{
// logger.debug("is_in is active");
statsUsageService.increaseUsage(StatsUsageType.GEOCODING);
AddressResultsDto results;
rawAddress = StringHelper.prepareQuery(rawAddress);
HouseNumberAddressDto houseNumberAddressDto = findHouseNumber(rawAddress, countryCode);
String newAddress = rawAddress;
String houseNumber = null;
if (houseNumberAddressDto != null){
houseNumber = houseNumberAddressDto.getHouseNumber();
newAddress = houseNumberAddressDto.getAddressWithoutHouseNumber();
}
List<String> streettypes = smartStreetDetection.getStreetTypes(newAddress);
for (String streettype : streettypes){
logger.info("found street type : "+streettype);
}
boolean smartstreetdetection = false;
String alternativeGermanAddress =null;
if (streettypes!=null && streettypes.size()==1){
smartstreetdetection = true;
if (Decompounder.isDecompoudCountryCode(countryCode) || decompounder.isDecompoudName(newAddress)){
/*logger.error("find specific german address");
alternativeGermanAddress = decompounder.getOtherFormatForText(newAddress);
logger.error("alternativeGermanAddress= "+alternativeGermanAddress);
alternativeGermanAddress = replaceGermanSynonyms(alternativeGermanAddress);*/
newAddress = StringHelper.expandStreetType(newAddress, null);
logger.error("new rawAddress with synonyms ="+newAddress);
//logger.error("new alternative with synonyms ="+alternativeGermanAddress);
} else {
logger.error("don't proces specific german address");
}
}
results = doSearch(newAddress,alternativeGermanAddress, countryCode,
needParsing,houseNumber, false, query.getPoint(), query.getRadius(), smartstreetdetection);
//try in fuzzy
if (results.getNumFound()==0 && query.isFuzzy()){
results = doSearch(newAddress,alternativeGermanAddress, countryCode,
needParsing,houseNumber, true, query.getPoint(), query.getRadius(), smartstreetdetection);
}
//------------------------------------------------------------------------------------
/*if (!needParsing && exactMatches!=null && exactMatches.size() >=1){
results = buildAddressResultDtoFromSolrResponseDto(exactMatches, houseNumber);
} else {*/
/*List<SolrResponseDto> cities = null;
cities = findCitiesInText(newAddress, countryCode);
Point cityLocation = null;
String cityName=null;
if (cities != null && cities.size() > 0 && cities.get(0) != null) {
logger.debug("city found "+cities.get(0).getName()+"/"+cities.get(0).getFeature_id());
cityLocation = GeolocHelper.createPoint(cities.get(0).getLng().floatValue(), cities.get(0).getLat().floatValue());
cityName = cities.get(0).getName();
}
List<SolrResponseDto> fulltextResultsDto = null;
*/
/*if (cityName!=null){
/*String addressNormalized = StringHelper.normalize(newAddress);
String cityNormalized = StringHelper.normalize(cityName);
String addressNormalizedWithoutCity = addressNormalized.replace(cityNormalized, "");
//String addressNormalizedWithoutCity = newAddress;
if (isNotEmptyString(addressNormalizedWithoutCity)) {
//if (logger.isDebugEnabled()) {
logger.debug("normalized address without city '+"+cityNormalized+"' : was '" +addressNormalized+"' and is now " + addressNormalizedWithoutCity);
//}
fulltextResultsDto = findStreetInText(addressNormalizedWithoutCity, countryCode, cityLocation);
}
} else {
fulltextResultsDto = findStreetInText(newAddress, countryCode, cityLocation);
}*/
//fulltextResultsDto = findStreetInText(newAddress, countryCode, cityLocation);
//------------------------------------------------------------------------------------------------
/* List<SolrResponseDto> mergedResults = mergeSolrResponseDto(exactMatches, fulltextResultsDto);
results = buildAddressResultDtoFromSolrResponseDto(mergedResults, houseNumber);*/
//results = buildAddressResultDtoFromStreetsAndCities(mergedResults, cities, houseNumber);
//}
Long endTime = System.currentTimeMillis();
long qTime = endTime - startTime;
results.setQTime(qTime);
logger.info("geocoding of "+query + " and country="+countryCode+" took " + (qTime) + " ms and returns "
+ results.getNumFound() + " results");
return results;
}//disable all is_in inactive code
/*else {
logger.debug("is_in is inactive");
// we call the stats here because we don't want to call it twice
// when we call geocode before
statsUsageService.increaseUsage(StatsUsageType.GEOCODING);
if (logger.isDebugEnabled()) {
logger.debug("unsuccessfull address parsing of : '" + rawAddress + "'");
}
List<SolrResponseDto> cities = null;
SolrResponseDto city = null;
cities = findCitiesInText(rawAddress, countryCode);
if (cities != null && cities.size() > 0) {
city = cities.get(0);
}
if (city == null) {
if (logger.isDebugEnabled()) {
logger.debug(" no city found for '" + rawAddress + "'");
}
HouseNumberAddressDto houseNumberAddressDto = findHouseNumber(rawAddress, countryCode);
String newAddress = rawAddress;
String houseNumber = null;
if (houseNumberAddressDto != null){
houseNumber = houseNumberAddressDto.getHouseNumber();
newAddress = houseNumberAddressDto.getAddressWithoutHouseNumber();
}
List<SolrResponseDto> streets = findStreetInText(newAddress, countryCode, null, false, null);
AddressResultsDto results = buildAddressResultDtoFromStreetsAndCities(streets, cities, houseNumber);
Long endTime = System.currentTimeMillis();
long qTime = endTime - startTime;
results.setQTime(qTime);
logger.info("geocoding of "+query + " and country="+countryCode+" took " + (qTime) + " ms and returns "
+ results.getNumFound() + " results");
return results;
} else {
if (logger.isDebugEnabled()) {
logger.debug("found city : " + city.getName() + " for '" + rawAddress + "'");
}
HouseNumberAddressDto houseNumberAddressDto = findHouseNumber(rawAddress, countryCode);
String newAddress = rawAddress;
String houseNumber = null;
if (houseNumberAddressDto != null){
houseNumber = houseNumberAddressDto.getHouseNumber();
newAddress = houseNumberAddressDto.getAddressWithoutHouseNumber();
}
Point cityLocation = GeolocHelper.createPoint(city.getLng().floatValue(), city.getLat().floatValue());
List<SolrResponseDto> streets = null;
if (importerConfig.isOpenStreetMapFillIsIn()) {
streets = findStreetInText(newAddress, countryCode, cityLocation, false, null);
} else {
String addressNormalized = StringHelper.normalize(newAddress);
String cityNormalized = StringHelper.normalize(city.getName());
String addressNormalizedWithoutCity = addressNormalized.replace(cityNormalized, "");
if (isNotEmptyString(addressNormalizedWithoutCity)) {
if (logger.isDebugEnabled()) {
logger.debug("normalized address without city : " + addressNormalizedWithoutCity);
}
streets = findStreetInText(addressNormalizedWithoutCity, countryCode, cityLocation, false, null);
}
}
AddressResultsDto results = buildAddressResultDtoFromStreetsAndCities(streets, cities, houseNumber);
Long endTime = System.currentTimeMillis();
long qTime = endTime - startTime;
results.setQTime(qTime);
logger.info("geocoding of "+query + " and country="+countryCode+" took " + (qTime) + " ms and returns "
+ results.getNumFound() + " results");
return results;
}
}*/
}
/*private String prepareQueryConcatenate(String rawAddress) {
String newAddress = rawAddress;
newAddress = newAddress.toLowerCase();
newAddress = newAddress.replaceAll(" (?:straße|str|strasse)\\b", "straße").replaceAll("[\\s]+", " ");
logger.error("newAddress concatenate ="+newAddress);
return newAddress;
}*/
protected AddressResultsDto doSearch( String rawaddress,String alternativeStreetAddress,
String countryCode, boolean needParsing,String houseNumber, boolean fuzzy, Point point, Double radius, boolean smartStreetDetection) {
AddressResultsDto results;
List<SolrResponseDto> exactMatches ;
if (!smartStreetDetection){
exactMatches = doSearchExact(rawaddress,
countryCode, fuzzy, point, radius, null);
} else {
//bypass exact search
exactMatches =new ArrayList<SolrResponseDto>();
}
//have been probably found by exact match, so we search for address and so a street*/
if (!needParsing && exactMatches!=null && exactMatches.size() >=1){
//only one word and exact match ok
results = buildAddressResultDtoFromSolrResponseDto(exactMatches, houseNumber);
} else {
List<SolrResponseDto> fulltextResultsDto = doSearchStreet(
rawaddress, countryCode, fuzzy, point, radius);
if(alternativeStreetAddress!=null){
logger.error("will search for altenative german Address : "+alternativeStreetAddress);
List<SolrResponseDto> alternativeResults = doSearchStreet(
alternativeStreetAddress, countryCode, fuzzy, point, radius);
if (fulltextResultsDto.size()==0 ||(alternativeResults!=null && alternativeResults.size()>0 && fulltextResultsDto!=null && fulltextResultsDto.size() > 0
&& alternativeResults.get(0)!=null && fulltextResultsDto.get(0)!=null
&& alternativeResults.get(0).getScore()>fulltextResultsDto.get(0).getScore())){
logger.error("alternative results score is higher");
fulltextResultsDto = alternativeResults;
}
}
if (fulltextResultsDto!=null){
exactMatches.addAll(fulltextResultsDto);
}
logger.error("-------------------merged--------------------------");
if (exactMatches!=null){
for (SolrResponseDto result: exactMatches){
logger.error(result.getScore()+" : "+(result.getOpenstreetmap_id()==null?result.getFeature_id():result.getOpenstreetmap_id())+"-"+result.getFully_qualified_name());
}
}
results = buildAddressResultDtoFromSolrResponseDto(exactMatches, houseNumber);
}
if (fuzzy){
results.setMessage(FUZZY_ACTIVE);
}
return results;
}
protected List<SolrResponseDto> doSearchStreet(String rawaddress,
String countryCode, boolean fuzzy, Point point, Double radius) {
logger.debug("will search for street"+(fuzzy?"in fuzzy mode":" in strict mode"));
List<SolrResponseDto> fulltextResultsDto = findStreetInText(rawaddress, countryCode, point, fuzzy, radius); //we search for street because we think that it is not a city nor an adm that
//List<SolrResponseDto> mergedResults = mergeSolrResponseDto(exactMatches, fulltextResultsDto);
return fulltextResultsDto;
}
protected List<SolrResponseDto> doSearchExact(String rawaddress,
String countryCode, boolean fuzzy, Point point, Double radius, Class[] placetype) {
logger.debug("will search for exact match "+(fuzzy?"in fuzzy mode":" in strict mode"));
List<SolrResponseDto> exactMatches = findExactMatches(rawaddress, countryCode, fuzzy, point, radius, placetype);
//filter result where name is not the same
if (exactMatches!=null){
List<SolrResponseDto> filterResults = new ArrayList<SolrResponseDto>();
for (SolrResponseDto result: exactMatches){
boolean added= false;
if(result!=null && result.getName()!=null && StringHelper.isSameName(rawaddress, result.getName(),1)){
filterResults.add(result);
added =true;
logger.error("filter same name, adding "+(result.getOpenstreetmap_id()==null?result.getOpenstreetmap_id():result.getFeature_id())+"-"+result.getName()+" / "+result.getFully_qualified_name() );
}
else if (!added){
for (String nameAlternate : result.getName_alternates()){
if (nameAlternate!=null && StringHelper.isSameName(rawaddress, nameAlternate,1)){
logger.error("filter same name, adding alternate "+(result.getOpenstreetmap_id()==null?result.getOpenstreetmap_id():result.getFeature_id())+" : "+nameAlternate+" / "+result.getFully_qualified_name() );
filterResults.add(result);
added=true;
break;
}
}
}
if (!added){
logger.error("filter same name, ignoring :"+result.getFully_qualified_name() );
}
}
if (!filterResults.isEmpty()){
exactMatches = filterResults;
}
}
logger.error("-------------------exact--------------------------");
if (exactMatches!=null){
for (SolrResponseDto result: exactMatches){
logger.error(result.getScore()+" : "+(result.getOpenstreetmap_id()==null?result.getFeature_id():result.getOpenstreetmap_id())+"-"+result.getFully_qualified_name());
}
}
return exactMatches;
}
protected boolean needParsing(String query) {
if (query !=null){
String str = query.trim();
return str.length() > 0 && (str.indexOf(" ") != -1 || str.indexOf(",") != -1 || str.indexOf(";") != -1);
}
return false;
}
protected boolean shouldSetParseAddress(AddressQuery query){
if (query !=null && query.getParsedAddressUnlockKey()!=0 && importerConfig.getParsedAddressUnlockKey() !=0
&& query.getParsedAddressUnlockKey()==importerConfig.getParsedAddressUnlockKey()){
return true;
}
return false;
}
protected boolean isGeocodable(Address address) {
if (isEmptyString(address.getStreetName()) && isEmptyString(address.getState()) && isEmptyString(address.getCity()) && isEmptyString(address.getZipCode()) && isEmptyString(address.getPostTown()) && isEmptyString(address.getCitySubdivision())) {
logger.info(address+" is no geocodable");
return false;
}
return true;
}
/*
* (non-Javadoc)
*
* @see
* com.gisgraphy.geocoding.IGeocodingService#geocode(com.gisgraphy.addressparser
* .Address)
*/
public AddressResultsDto geocode(Address address, String countryCode) throws GeocodingException {
if (address == null) {
throw new GeocodingException("Can not geocode a null address");
}
if (countryCode!=null && countryCode.trim().length() != 2) {
throw new GeocodingException("wrong countrycode : " + countryCode);
}
if (isIntersection(address)) {
throw new GeocodingException("street intersection is not managed yet");
}
if (!isGeocodable(address)) {
throw new GeocodingException("City, street name, posttown and zip is not set, we got too less informations to geocode ");
}
if (address.getCountryCode()==null && countryCode!=null){
//usefull for formater
address.setCountryCode(countryCode);
}
statsUsageService.increaseUsage(StatsUsageType.GEOCODING);
Long startTime = System.currentTimeMillis();
AddressResultsDto results;
List<SolrResponseDto> streets = new ArrayList<SolrResponseDto>();
String houseNumber = address.getHouseNumber();
address.setHouseNumber(null);
address.setHouseNumberInfo(null);
String rawAddress = addressFormater.getEnvelopeAddress(address, DisplayMode.COMMA);
boolean fuzzy = false;
if (rawAddress!=null){
if (!isEmptyString(address.getStreetName())){
//search for street
//buildAddress string
streets = doSearchStreet(rawAddress,countryCode,false,null,null);
if (streets==null || streets.size()==0){
//retry in fuzzy
streets = doSearchStreet(rawAddress,countryCode,true,null,null);
fuzzy = true;
}
} else {
//not a street, search for city, Adm, subdivision
Class[] placetype = com.gisgraphy.fulltext.Constants.CITY_CITYSUB_ADM_PLACETYPE;
if(address!=null){
if (address.getCity()!=null || address.getZipCode()!=null ||address.getCitySubdivision()!=null){
placetype= com.gisgraphy.fulltext.Constants.CITY_AND_CITYSUBDIVISION_PLACETYPE;
}
if (address.getState()!=null && (address.getCity()!=null && address.getZipCode()!=null && address.getCitySubdivision()!=null)){
placetype=com.gisgraphy.fulltext.Constants.ONLY_ADM_PLACETYPE;
}
}
streets = doSearchExact(rawAddress, countryCode, false, null, null, placetype);
if (streets==null || streets.size()==0){
fuzzy = true;
streets = doSearchExact(rawAddress, countryCode, true, null, null, placetype);
}
}
}
results = buildAddressResultDtoFromSolrResponseDto(streets, houseNumber);
if (fuzzy){
results.setMessage(FUZZY_ACTIVE);
}
/*
if (isEmptyString(address.getCity()) && isEmptyString(address.getZipCode()) && isEmptyString(address.getPostTown())) {
String streetToSearch = address.getStreetName();
if (address.getStreetType()!=null){
streetToSearch = address.getStreetName()+ " "+address.getStreetType();
}
List<SolrResponseDto> streets = findStreetInText(streetToSearch, countryCode, null, false, null);
AddressResultsDto results = buildAddressResultDtoFromStreetsAndCities(streets, null, address.getHouseNumber());
//results.setParsedAddress(address);
Long endTime = System.currentTimeMillis();
long qTime = endTime - startTime;
results.setQTime(qTime);
logger.info("geocoding of "+address + " and country="+countryCode+" took " + (qTime) + " ms and returns "
+ results.getNumFound() + " results");
return results;
} else {
String bestSentence = getBestCitySearchSentence(address);
List<SolrResponseDto> cities = null;
cities = findCitiesInText(bestSentence, countryCode);
Point cityLocation = null;
SolrResponseDto city;
String cityName="";
if (cities != null && cities.size() > 0 && cities.get(0) != null) {
city = cities.get(0);
cityName=city.getName();
logger.debug("city found "+city.getName()+"/"+city.getFeature_id());
cityLocation = GeolocHelper.createPoint(cities.get(0).getLng().floatValue(), cities.get(0).getLat().floatValue());
}
List<SolrResponseDto> fulltextResultsDto = null;
if (address.getStreetName() != null) {
String streetSentenceToSearch = address.getStreetName();//+ "^0.5 "+address.getStreetName();
if (address.getStreetType()!=null){
streetSentenceToSearch = "("+streetSentenceToSearch+ " "+address.getStreetType()+ ")^7 "+streetSentenceToSearch+address.getStreetType()+"^8";
} else {
streetSentenceToSearch = streetSentenceToSearch+ "^7 ";
}
streetSentenceToSearch = streetSentenceToSearch +" "+cityName;
// List<String> streettype = smartStreetDetection.getStreetTypes(streetSentenceToSearch);
//for (String s : streettype){
//streetSentenceToSearch = streetSentenceToSearch.replace(s, " "+s+" ");
//logger.info("splitstreettype ("+s+")"+streetSentenceToSearch);
//}
fulltextResultsDto = findStreetInText(streetSentenceToSearch, countryCode, cityLocation, false, null);
}
AddressResultsDto results = buildAddressResultDtoFromStreetsAndCities(fulltextResultsDto, cities, address.getHouseNumber());*/
//results.setParsedAddress(address);
Long endTime = System.currentTimeMillis();
long qTime = endTime - startTime;
results.setQTime(qTime);
logger.info("geocoding of "+address + " and country="+countryCode+" took " + (qTime) + " ms and returns "
+ results.getNumFound() + " results");
return results;
//}
}
protected String getBestCitySearchSentence(Address address) {
String sentence = "";
if (isNotEmptyString(address.getCity())) {
sentence += " " + address.getCity();
} else if (isNotEmptyString(address.getPostTown())){
sentence += " " + address.getPostTown();
}
if (isNotEmptyString(address.getZipCode())) {
sentence += " " + address.getZipCode();
}
String dependentLocality = address.getDependentLocality();
String state = address.getState();
String choice = "";
if (isEmptyString(state) && isNotEmptyString(dependentLocality)) {
choice = " " + dependentLocality;
} else if (isNotEmptyString(state) && isEmptyString(dependentLocality)) {
choice = " " + state;
} else if (isNotEmptyString(state) && isNotEmptyString(dependentLocality)) {
choice = " " + state + " " + dependentLocality;
}
return new String(sentence + choice).trim();
}
protected HouseNumberDtoInterpolation processApproximativeHouseNumber(String houseNumberToFind, Integer houseNumberToFindAsInt,
HouseNumberDtoInterpolation bestApprox, String countryCode,
HouseNumberDtoInterpolation houseNumberDtoToProcess) {
Point bestApproxLocation = null;
Integer bestApproxHN = null;
Integer bestDif = null;
Integer curentDif = null;
if (bestApprox != null){
bestApproxLocation = bestApprox.getExactLocation();
bestApproxHN = bestApprox.getExactNumber();
bestDif = bestApprox.getHouseNumberDif();
}
if (houseNumberDtoToProcess.getHigherNumber()==null && houseNumberDtoToProcess.getLowerNumber()!=null){
logger.info("approx : there is only a lower "+ houseNumberDtoToProcess.getLowerNumber());
if (bestApproxHN == null || Math.abs(houseNumberDtoToProcess.getLowerNumber()-houseNumberToFindAsInt) < Math.abs(bestApproxHN-houseNumberToFindAsInt)){
logger.info("approx : lower "+ houseNumberDtoToProcess.getLowerNumber()+ " closer than "+bestApproxHN);
bestApproxLocation = houseNumberDtoToProcess.getLowerLocation();
bestApproxHN = houseNumberDtoToProcess.getLowerNumber();
} else {
logger.info("approx : lower "+ houseNumberDtoToProcess.getLowerNumber()+ " not closer than "+bestApproxHN);
}
}
else if (houseNumberDtoToProcess.getHigherNumber()!=null && houseNumberDtoToProcess.getLowerNumber()==null){
logger.info("approx : there is only a higher "+ houseNumberDtoToProcess.getHigherNumber());
if (bestApproxHN == null || Math.abs(houseNumberDtoToProcess.getHigherNumber()-houseNumberToFindAsInt) < Math.abs(bestApproxHN-houseNumberToFindAsInt)){
logger.info("approx : higher "+ houseNumberDtoToProcess.getHigherNumber()+ " closer than "+bestApproxHN);
bestApproxLocation = houseNumberDtoToProcess.getHigherLocation();
bestApproxHN = houseNumberDtoToProcess.getHigherNumber();
} else {
logger.info("approx : higher "+ houseNumberDtoToProcess.getHigherNumber()+ " not closer than "+bestApproxHN);
}
}
else if (houseNumberDtoToProcess.getHigherNumber()!=null && houseNumberDtoToProcess.getLowerNumber()!=null){
logger.info("approx : there is lower "+houseNumberDtoToProcess.getLowerNumber()+ " and higher "+ houseNumberDtoToProcess.getHigherNumber()+ ",bestDif="+bestDif+" and best hn is actually "+bestApproxHN );
curentDif = Math.abs(houseNumberDtoToProcess.getLowerNumber()-houseNumberDtoToProcess.getHigherNumber());
logger.info("currentdif="+curentDif);
if (bestDif == null ||(bestDif != null && curentDif < bestDif)){
logger.debug("approx : curentDif "+curentDif+" < bestDif "+bestDif);
bestApproxLocation = GeolocHelper.interpolatedPoint(houseNumberDtoToProcess.getLowerLocation(), houseNumberDtoToProcess.getHigherLocation(), houseNumberDtoToProcess.getLowerNumber(), houseNumberDtoToProcess.getHigherNumber(), houseNumberToFindAsInt);
bestApproxHN = houseNumberToFindAsInt;
}
}
logger.debug("approx : best house approx found "+bestApproxHN);
HouseNumberDtoInterpolation result = new HouseNumberDtoInterpolation(bestApproxLocation, bestApproxHN) ;
result.setHouseNumberDif(curentDif);
return result;
}
protected HouseNumberDtoInterpolation searchHouseNumber(Integer houseNumberToFindAsInt, List<HouseNumberDto> houseNumbersList,String countryCode, boolean doInterpolation) { //TODO pass the house as int directly
if(houseNumberToFindAsInt==null || houseNumbersList==null || houseNumbersList.size()==0){
logger.info("no house number to search : ");
return null;
}
Integer nearestLower = null;
Integer nearestUpper = null;
HouseNumberDto nearestHouseLower = null;
HouseNumberDto nearestHouseUpper = null;
//for debug purpose, need to be removed
StringBuffer sb = new StringBuffer();
for (HouseNumberDto candidate :houseNumbersList){
if (candidate!=null){
sb.append(candidate.getNumber()).append(",");
}
}
logger.info("will analyze HN : "+sb.toString());
for (HouseNumberDto candidate :houseNumbersList){
if (candidate != null && candidate.getNumber()!=null){
Integer candidateNormalized;
if (countryCode!=null && ("SK".equalsIgnoreCase(countryCode) || "CZ".equalsIgnoreCase(countryCode))){
candidateNormalized = HouseNumberUtil.normalizeSkCzNumberToInt(candidate.getNumber());
} else {
candidateNormalized = HouseNumberUtil.normalizeNumberToInt(candidate.getNumber());
}
if (candidateNormalized!=null && candidateNormalized == houseNumberToFindAsInt){
logger.info("house number candidate found : "+candidate.getNumber());
return new HouseNumberDtoInterpolation(candidate.getLocation(),houseNumberToFindAsInt);
} else if (candidateNormalized < houseNumberToFindAsInt ){
if (nearestLower ==null || candidateNormalized > nearestLower){
nearestLower = candidateNormalized;
nearestHouseLower = candidate;
}
} else if (candidateNormalized > houseNumberToFindAsInt){
if (nearestUpper == null || candidateNormalized < nearestUpper){
nearestUpper = candidateNormalized;
nearestHouseUpper = candidate;
}
}
}
}
logger.info("no exact house number candidate found for "+houseNumberToFindAsInt);
//do interpolation
if (nearestHouseLower == null && nearestHouseUpper ==null){
logger.info(" no lower, nor upper house number found");
return null;
}
HouseNumberDtoInterpolation result = new HouseNumberDtoInterpolation();
if (nearestHouseUpper !=null){
logger.info(" higher : "+nearestUpper);
result.setHigherLocation(nearestHouseUpper.getLocation());
result.setHigherNumber(nearestUpper);
}
if (nearestHouseLower != null){
logger.info(" lower : "+nearestLower);
result.setLowerLocation(nearestHouseLower.getLocation());
result.setLowerNumber(nearestLower);
}
//this do interpolation, but if the street is not a line or is curve the point will be out
if (doInterpolation){
if (nearestHouseLower !=null && nearestHouseUpper != null){
Point location = GeolocHelper.interpolatedPoint(nearestHouseLower.getLocation(), nearestHouseUpper.getLocation(), nearestUpper, nearestLower, houseNumberToFindAsInt);
if (location !=null){
return new HouseNumberDtoInterpolation(location,houseNumberToFindAsInt);
} else {
return null;
}
}
}
return result;
}
protected AddressResultsDto buildAddressResultDtoFromSolrResponseDtoCountry(List<SolrResponseDto> solResponseDtos){
List<Address> addresses = new ArrayList<Address>();
if (solResponseDtos != null && solResponseDtos.size() > 0) {
if (logger.isDebugEnabled()) {
logger.debug("found " + solResponseDtos.size() + " results");
}
SolrResponseDto response = solResponseDtos.get(0);
Address address = new Address();
address.setCountry(response.getName());
address.setName(response.getName());
address.setLat(response.getLat());
address.setLng(response.getLng());
address.setId(response.getFeature_id());
address.setGeocodingLevel(GeocodingLevels.COUNTRY);
addresses.add(address);
}
return new AddressResultsDto(addresses, 0L);
}
protected AddressResultsDto buildAddressResultDtoFromSolrResponseDto(List<SolrResponseDto> solResponseDtos, String houseNumberToFind) {
List<Address> addresses = new ArrayList<Address>();
if (solResponseDtos != null && solResponseDtos.size() > 0) {
if (logger.isDebugEnabled()) {
logger.debug("found " + solResponseDtos.size() + " results");
}
String lastName=null;
String lastIsin=null;
boolean sameStreet = false;
boolean housenumberFound =false;
int count=0;
int numberOfStreetThatHaveTheSameName = 0;
for (SolrResponseDto solrResponseDto : solResponseDtos) {
count++;
Address address = new Address();
if (solrResponseDto == null) {
continue;
}
address.setScore(solrResponseDto.getScore());
if (!solrResponseDto.getPlacetype().equalsIgnoreCase(Street.class.getSimpleName())) {
address.setName(solrResponseDto.getName());
}
if (solrResponseDto.getLat_admin_centre()!=null && solrResponseDto.getLng_admin_centre()!=null){
address.setLat(solrResponseDto.getLat_admin_centre());
address.setLng(solrResponseDto.getLng_admin_centre());
} else {
address.setLat(solrResponseDto.getLat());
address.setLng(solrResponseDto.getLng());
}
if (solrResponseDto.getOpenstreetmap_id()!=null){
address.setId(solrResponseDto.getOpenstreetmap_id());
} else {
address.setId(solrResponseDto.getFeature_id());
}
String countryCode = solrResponseDto.getCountry_code();
address.setCountryCode(countryCode);
if (solrResponseDto.getPlacetype().equalsIgnoreCase(Adm.class.getSimpleName())) {
address.setState(solrResponseDto.getName());
}else if (solrResponseDto.getAdm1_name() != null) {
address.setState(solrResponseDto.getAdm1_name());
} else if (solrResponseDto.getAdm2_name() != null) {
address.setState(solrResponseDto.getAdm2_name());
}
address.setAdm1Name(solrResponseDto.getAdm1_name());
address.setAdm2Name(solrResponseDto.getAdm2_name());
address.setAdm3Name(solrResponseDto.getAdm3_name());
address.setAdm4Name(solrResponseDto.getAdm4_name());
address.setAdm5Name(solrResponseDto.getAdm5_name());
if (solrResponseDto.getZipcodes() != null && solrResponseDto.getZipcodes().size() > 0) {
address.setZipCode(labelGenerator.getBestZipString(solrResponseDto.getZipcodes()));
} else if (solrResponseDto.getIs_in_zip()!=null && solrResponseDto.getIs_in_zip().size()>=1){
address.setZipCode(labelGenerator.getBestZipString(solrResponseDto.getIs_in_zip()));
}
Integer houseNumberToFindAsInt;
if (countryCode!=null && ("SK".equalsIgnoreCase(countryCode) || "CZ".equalsIgnoreCase(countryCode))){
houseNumberToFindAsInt = HouseNumberUtil.normalizeSkCzNumberToInt(houseNumberToFind);
} else {
houseNumberToFindAsInt = HouseNumberUtil.normalizeNumberToInt(houseNumberToFind);
}
if (solrResponseDto.getPlacetype().equalsIgnoreCase(Street.class.getSimpleName())) {
String streetName = solrResponseDto.getName();
String isIn = solrResponseDto.getFully_qualified_name();
if (!isEmptyString(streetName)){
if(streetName.equals(lastName) && isIn!=null && isIn.equalsIgnoreCase(lastIsin)){
sameStreet=true;//probably the same street
if (housenumberFound){
continue;
//do nothing it has already been found in the street
}else {
numberOfStreetThatHaveTheSameName++;
address.setStreetName(solrResponseDto.getName());
address.setStreetRef(solrResponseDto.getStreet_ref());
address.setCity(solrResponseDto.getIs_in());
address.setState(solrResponseDto.getIs_in_adm());
if (solrResponseDto.getIs_in_zip()!=null && solrResponseDto.getIs_in_zip().size()>=1){
address.setZipCode(solrResponseDto.getIs_in_zip().iterator().next());
}
address.setDependentLocality(solrResponseDto.getIs_in_place());
//now search for houseNumber
List<HouseNumberDto> houseNumbersList = solrResponseDto.getHouse_numbers();
//if(houseNumberToFind!=null && houseNumbersList!=null && houseNumbersList.size()>0){ //don't verify if it is null or not because if the first streets have no house number, we won't
//count them as street that has same streetname
boolean doInterpolation = false;
if (allowInterpolation(solrResponseDto) ){
doInterpolation=true;
}
HouseNumberDtoInterpolation houseNumber = searchHouseNumber(houseNumberToFindAsInt,houseNumbersList,countryCode, doInterpolation);
if (houseNumber !=null){
if (houseNumber.isApproximative()){
} else {
housenumberFound=true;
address.setHouseNumber(houseNumber.getExactNumerAsString());
address.setLat(houseNumber.getExactLocation().getY());
address.setLng(houseNumber.getExactLocation().getX());
//remove the last results added
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
}
} else{
housenumberFound=false;
}
//}
}
} else { //the streetName is different,
sameStreet=false;
//remove the last results added
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
numberOfStreetThatHaveTheSameName=0;
//populate fields
address.setStreetName(solrResponseDto.getName());
address.setCity(solrResponseDto.getIs_in());
address.setState(solrResponseDto.getIs_in_adm());
if (solrResponseDto.getIs_in_zip()!=null && solrResponseDto.getIs_in_zip().size()>=1){
address.setZipCode(solrResponseDto.getIs_in_zip().iterator().next());
}
address.setDependentLocality(solrResponseDto.getIs_in_place());
//search for housenumber
List<HouseNumberDto> houseNumbersList = solrResponseDto.getHouse_numbers();
if(houseNumberToFind!=null && houseNumbersList!=null && houseNumbersList.size()>0){
boolean doInterpolation = false;
if (allowInterpolation(solrResponseDto) ){
doInterpolation=true;
}
HouseNumberDtoInterpolation houseNumber = searchHouseNumber(houseNumberToFindAsInt,houseNumbersList,countryCode, doInterpolation);
if (houseNumber !=null){
if (houseNumber.isApproximative()){
} else {
housenumberFound=true;
address.setHouseNumber(houseNumber.getExactNumerAsString());
address.setLat(houseNumber.getExactLocation().getY());
address.setLng(houseNumber.getExactLocation().getX());
}
} else {
housenumberFound=false;
}
}
}
} else {//streetname is null, we search for housenumber anyway
address.setCity(solrResponseDto.getIs_in());
address.setState(solrResponseDto.getIs_in_adm());
if (solrResponseDto.getIs_in_zip()!=null && solrResponseDto.getIs_in_zip().size()>=1){
address.setZipCode(solrResponseDto.getIs_in_zip().iterator().next());
}
address.setDependentLocality(solrResponseDto.getIs_in_place());
List<HouseNumberDto> houseNumbersList = solrResponseDto.getHouse_numbers();
if(houseNumberToFind!=null && houseNumbersList!=null && houseNumbersList.size()>0){
boolean doInterpolation = false;
if (allowInterpolation(solrResponseDto) ){
doInterpolation=true;
}
HouseNumberDtoInterpolation houseNumber = searchHouseNumber(houseNumberToFindAsInt,houseNumbersList,countryCode, doInterpolation);
if (houseNumber !=null){
if (houseNumber.isApproximative()){
} else {
housenumberFound=true;
address.setHouseNumber(houseNumber.getExactNumerAsString());
address.setLat(houseNumber.getExactLocation().getY());
address.setLng(houseNumber.getExactLocation().getX());
}
} else {
housenumberFound=false;
}
}
}
lastName=streetName;
lastIsin = isIn;
} else if (solrResponseDto.getPlacetype().equalsIgnoreCase(City.class.getSimpleName())){
address.setCity(solrResponseDto.getName());
//populateAddressFromCity(solrResponseDto, address);
} else if (solrResponseDto.getPlacetype().equalsIgnoreCase(CitySubdivision.class.getSimpleName())) {
address.setQuarter(solrResponseDto.getName());
}
if (logger.isDebugEnabled() && solrResponseDto != null) {
logger.debug("=>place (" + (solrResponseDto.getOpenstreetmap_id()==null?solrResponseDto.getFeature_id():solrResponseDto.getOpenstreetmap_id())+") : "+solrResponseDto.getName() +" in "+solrResponseDto.getIs_in());
}
address.getGeocodingLevel();//force calculation of geocodingLevel
if ((solrResponseDto.getPlacetype().equalsIgnoreCase(Street.class.getSimpleName()) && address.getHouseNumber()!=null) || solrResponseDto.getFully_qualified_name()==null){
//we need to update the labels
address.setFormatedFull(labelGenerator.getFullyQualifiedName(address));
} else {
address.setFormatedFull(solrResponseDto.getFully_qualified_name());
}
address.setFormatedPostal(addressFormater.getEnvelopeAddress(address, DisplayMode.COMMA));
//set the street type after postal because street type is something like RESIDENTIAL and
//has not the same meaning than with address parsing
address.setStreetType(solrResponseDto.getStreet_type());
//if (!sameStreet){
addresses.add(address);
//}
if (sameStreet && solResponseDtos.size()==count){
//System.out.println("need remove");
//remove the last results added
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
}
sameStreet=false;
}
}
return new AddressResultsDto(addresses, 0L);
}
protected boolean allowInterpolation(SolrResponseDto solrResponseDto) {
return
solrResponseDto.getAzimuth_start()!=null &&
solrResponseDto.getAzimuth_end()!=null &&
Math.abs(solrResponseDto.getAzimuth_start()-solrResponseDto.getAzimuth_end()) < INTERPOLATION_CURVE_TOLERANCE;
}
private boolean isIntersection(Address address) {
return address.getStreetNameIntersection() != null;
}
/*protected List<SolrResponseDto> findCitiesInText(String text, String countryCode) {
return findInText(text, countryCode, null, com.gisgraphy.fulltext.Constants.CITY_AND_CITYSUBDIVISION_PLACETYPE,false, null);
}*/
/*protected List<SolrResponseDto> processAddress(Address address, boolean fuzzy) {
if (address==null) {
return new ArrayList<SolrResponseDto>();
}
Output output;
if (address.getStreetName()!=null) {
output = MEDIUM_OUTPUT;
} else {
output = LONG_OUTPUT;
}
FulltextResultsDto results = fullTextSearchEngine.executeAddressQuery(address, fuzzy);
if (results.getResultsSize() >= 1) {
return results.getResults();
} else {
return new ArrayList<SolrResponseDto>();
}
}*/
protected List<SolrResponseDto> findStreetInText(String text, String countryCode, Point point, boolean fuzzy, Double radius) {
List<SolrResponseDto> streets = findInText(text, countryCode, point, com.gisgraphy.fulltext.Constants.STREET_PLACETYPE, fuzzy, radius);
//now that we use bounding box it is to necessary to sort by distance
/*Point location;
if (point != null) {
for (SolrResponseDto solrResponseDto : streets) {
Double longitude = solrResponseDto.getLng();
Double latitude = solrResponseDto.getLat();
if (latitude != null && longitude != null) {
location = GeolocHelper.createPoint(longitude.floatValue(), latitude.floatValue());
Double distance = GeolocHelper.distance(location, point);
solrResponseDto.setDistance(distance);
}
}
Collections.sort(streets, comparator);
}*/
return streets;
}
protected List<SolrResponseDto> findInText(String text, String countryCode, Point point, Class<?>[] placetypes,boolean fuzzy, Double radius) {
if (isEmptyString(text)) {
return new ArrayList<SolrResponseDto>();
}
FulltextQuery query = new FulltextQuery(text, Pagination.paginate().from(0).to(FulltextQuerySolrHelper.NUMBER_OF_STREET_TO_RETRIEVE), DEFAULT_OUTPUT, placetypes, countryCode);
query.withAllWordsRequired(false).withoutSpellChecking().withFuzzy(fuzzy);
if (fuzzy){
query.withFuzzy(fuzzy);
}
if (point != null) {
query.around(point);
if (radius!=null){
query.withRadius(radius);
}
}
FulltextResultsDto results = fullTextSearchEngine.executeQuery(query);
if (results.getResultsSize() >= 1) {
return results.getResults();
} else {
return new ArrayList<SolrResponseDto>();
}
}
/*
protected AddressResultsDto buildAddressResultDtoFromStreetsAndCities(List<SolrResponseDto> streets, List<SolrResponseDto> cities, String houseNumberToFind) {
List<Address> addresses = new ArrayList<Address>();
if (streets != null && streets.size() > 0) {
if (logger.isDebugEnabled()) {
logger.debug("found " + streets.size() + " streets");
}
Double cityLat=0D;
Double cityLng=0D;
SolrResponseDto city = null;
if (cities != null && cities.size() > 0) {
city = cities.get(0);
cityLat =city.getLat();
cityLng = city.getLng();
}
String lastName=null;
String lastIsIn=null;
boolean housenumberFound =false;
HouseNumberDtoInterpolation bestApproxDto = null;
//Integer bestApproxHN = null;
int numberOfStreetThatHaveTheSameName = 0;
int count = 0;
for (SolrResponseDto street : streets) {
count++;
Address address = new Address();
address.setLat(street.getLat());
address.setLng(street.getLng());
if (street.getOpenstreetmap_id()!=null){
address.setId(street.getOpenstreetmap_id());
} else {
address.setId(street.getFeature_id());
}
address.setCountryCode(street.getCountry_code());
String streetName = street.getName();
if (logger.isDebugEnabled() && streets != null) {
logger.debug("=>street : " + streetName +" ("+street.getScore()+") in "+street.getIs_in()+"/id="+street.getFeature_id());
}
address.setStreetName(streetName);
String is_in = street.getIs_in();
if (!isEmptyString(is_in)) {
address.setCity(is_in);
address.setState(street.getIs_in_adm());
address.setAdm1Name(street.getAdm1_name());
address.setAdm2Name(street.getAdm2_name());
address.setAdm3Name(street.getAdm3_name());
address.setAdm4Name(street.getAdm4_name());
address.setAdm5Name(street.getAdm5_name());
if (street.getZipcodes()!=null && street.getZipcodes().size()==1){
address.setZipCode(street.getZipcodes().iterator().next());
} else if (street.getIs_in_zip()!=null && street.getIs_in_zip().size()>=1){
address.setZipCode(labelGenerator.getBestZipString(street.getZipcodes()));
}
address.setDependentLocality(street.getIs_in_place());
} else if (city!=null){
populateAddressFromCity(city, address);
}
//now search for house number!
List<HouseNumberDto> houseNumbersList = street.getHouse_numbers();
//if(houseNumberToFind!=null && houseNumbersList!=null && houseNumbersList.size()>0){ //don't verify if it is null or not because if the first streets have no house number, we won't
//count them as street that has same streetname
if (!isEmptyString(streetName) && streetName.equalsIgnoreCase(lastName)
//&& city!=null
&& (!isEmptyString(is_in) && is_in.equalsIgnoreCase(lastIsIn))){//probably the same street
logger.info(streetName+"("+numberOfStreetThatHaveTheSameName+") is the same in "+is_in);
if (housenumberFound){
continue;
//do nothing it has already been found in the street that have the same name
}else {
numberOfStreetThatHaveTheSameName++;
Integer houseNumberToFindAsInt;
String countryCode = street.getCountry_code();
if (countryCode!=null && ("SK".equalsIgnoreCase(countryCode) || "CZ".equalsIgnoreCase(countryCode))){
houseNumberToFindAsInt = HouseNumberUtil.normalizeSkCzNumberToInt(houseNumberToFind);
} else {
houseNumberToFindAsInt = HouseNumberUtil.normalizeNumberToInt(houseNumberToFind);
}
HouseNumberDtoInterpolation houseNumberDto = searchHouseNumber(houseNumberToFindAsInt,houseNumbersList,countryCode);
if (houseNumberDto !=null){
if (houseNumberDto.isApproximative()){
bestApproxDto = processApproximativeHouseNumber(houseNumberToFind,houseNumberToFindAsInt, bestApproxDto,street.getCountry_code(), houseNumberDto);
} else {
bestApproxDto = null;
housenumberFound=true;
address.setHouseNumber(houseNumberDto.getExactNumerAsString());
address.setLat(houseNumberDto.getExactLocation().getY());
address.setLng(houseNumberDto.getExactLocation().getX());
//remove the last results added
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
}
}// else { //same street but no house number
//housenumberFound=false;
// bestApproxLocation = null;
// bestApproxHN = null;
//}
}
} else { //the streetName is different or null,
//we overide the lastname with the approx one
logger.info(streetName+"/"+lastName+" ("+numberOfStreetThatHaveTheSameName+") is different in "+is_in+"/"+lastIsIn);
//remove the last results added
logger.info("removing "+numberOfStreetThatHaveTheSameName+" streets");
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
if (bestApproxDto!=null){
Address lastAddressWTheName= addresses.get(addresses.size()-1);
//we set the number of the last address
//lastAddressWTheName.setHouseNumber(bestApproxDto.getExactNumerAsString());
logger.info("the nearest hn is "+bestApproxDto.getExactNumber()+" at "+bestApproxDto.getExactLocation());
lastAddressWTheName.setLat(bestApproxDto.getExactLocation().getY());
lastAddressWTheName.setLng(bestApproxDto.getExactLocation().getX());
}
//reinit parameter for a new loop if it is not the last, if so we need to know how many segment we have to removre
if (streets.size()!= count){
numberOfStreetThatHaveTheSameName=0;
bestApproxDto = null;
}
//we process the new street
Integer houseNumberToFindAsInt;
String countryCode = street.getCountry_code();
if (countryCode!=null && ("SK".equalsIgnoreCase(countryCode) || "CZ".equalsIgnoreCase(countryCode))){
houseNumberToFindAsInt = HouseNumberUtil.normalizeSkCzNumberToInt(houseNumberToFind);
} else {
houseNumberToFindAsInt = HouseNumberUtil.normalizeNumberToInt(houseNumberToFind);
}
HouseNumberDtoInterpolation houseNumberDto = searchHouseNumber(houseNumberToFindAsInt,houseNumbersList,countryCode);
if (houseNumberDto !=null){
if (houseNumberDto.isApproximative()){
//bestapproxdto is null because the streetname is different and we don't want to handle old information
bestApproxDto = processApproximativeHouseNumber(houseNumberToFind, houseNumberToFindAsInt,null,street.getCountry_code(), houseNumberDto);
} else {
housenumberFound=true;
bestApproxDto = null;
logger.info("the nearest hn is "+houseNumberDto.getExactNumerAsString()+" at "+houseNumberDto.getExactLocation());
address.setHouseNumber(houseNumberDto.getExactNumerAsString());
address.setLat(houseNumberDto.getExactLocation().getY());
address.setLng(houseNumberDto.getExactLocation().getX());
}
} else {
housenumberFound=false;
bestApproxDto = null;
}
}
// }
lastName=streetName;
lastIsIn = is_in;
address.getGeocodingLevel();//force calculation of geocodingLevel
if (bestApproxDto!=null && streets.size()== count){ //we are at the end of the loop and we have found an approx number
//remove the last results added
for (numberOfStreetThatHaveTheSameName--;numberOfStreetThatHaveTheSameName>=0;numberOfStreetThatHaveTheSameName--){
addresses.remove(addresses.size()-1-numberOfStreetThatHaveTheSameName);
}
//we set the number of the last address
//lastAddressWTheName.setHouseNumber(bestApproxDto.getExactNumerAsString());
address.setLat(bestApproxDto.getExactLocation().getY());
address.setLng(bestApproxDto.getExactLocation().getX());
}
if (city!=null){
address.setDistance(GeolocHelper.distance(GeolocHelper.createPoint(address.getLng(), address.getLat()), GeolocHelper.createPoint(cityLng, cityLat)));
}
address.setFormatedFull(labelGenerator.getFullyQualifiedName(address));
address.setFormatedPostal(labelGenerator.generatePostal(address));
addresses.add(address);
}
} else {
if (cities != null && cities.size() > 0) {
if (logger.isDebugEnabled()) {
logger.debug("No street found, only cities");
}
for (SolrResponseDto city : cities) {
// the best we can do is city
Address address = buildAddressFromCity(city);
address.getGeocodingLevel();//force calculation of geocodingLevel
addresses.add(address);
}
} else if (logger.isDebugEnabled()) {
logger.debug("No street and no city found");
}
}
return new AddressResultsDto(addresses, 0L);
}*/
/*private Address buildAddressFromCity(SolrResponseDto city) {
Address address = new Address();
address.setLat(city.getLat());
address.setLng(city.getLng());
populateAddressFromCity(city, address);
if (city.getOpenstreetmap_id()!=null){
address.setId(city.getOpenstreetmap_id());
} else {
address.setId(city.getFeature_id());
}
return address;
}*/
/* protected void populateAddressFromCity(SolrResponseDto city, Address address) {
if (city != null) {
address.setCity(city.getName());
if (city.getAdm2_name() != null) {
address.setState(city.getAdm2_name());
} else if (city.getAdm1_name() != null) {
address.setState(city.getAdm1_name());
} else if (city.getIs_in_adm()!=null){
address.setState(city.getIs_in_adm());
}
address.setAdm1Name(city.getAdm1_name());
address.setAdm2Name(city.getAdm2_name());
address.setAdm3Name(city.getAdm3_name());
address.setAdm4Name(city.getAdm4_name());
address.setAdm5Name(city.getAdm5_name());
if (city.getZipcodes() != null && city.getZipcodes().size() > 0) {
address.setZipCode(labelGenerator.getBestZipString(city.getZipcodes()));
} else if (city.getIs_in_zip()!=null && city.getIs_in_zip().size()>=1){
address.setZipCode(labelGenerator.getBestZipString(city.getIs_in_zip()));
}
address.setCountryCode(city.getCountry_code());
address.setDependentLocality(city.getIs_in_place());
address.setFormatedFull(labelGenerator.getFullyQualifiedName(address));
address.setFormatedPostal(addressFormater.getEnvelopeAddress(address, DisplayMode.COMMA));
}
}*/
/* /**
* @param exactMatches
* @param aproximativeMatches
* @return a list of {@link SolrResponseDto} with
* list1[0],list2[0],list1[1],list2[1],... it remove duplicates and
* null
*
protected List<SolrResponseDto> mergeSolrResponseDto(List<SolrResponseDto> exactMatches, List<SolrResponseDto> aproximativeMatches) {
//find common id and put them first
//retirer duplicate de exact (si street)
//retirer duplicate de approximate (si street)
//merger*
if (exactMatches == null || exactMatches.size() == 0) {
if (aproximativeMatches == null) {
return new ArrayList<SolrResponseDto>();
} else {
return aproximativeMatches;
}
} else if (aproximativeMatches == null || aproximativeMatches.size() == 0) {
return exactMatches;
} else {
List<SolrResponseDto> merged = new ArrayList<SolrResponseDto>();
int maxSize = Math.max(exactMatches.size(), aproximativeMatches.size());
for (int i = 0; i < maxSize; i++) {
if (i < exactMatches.size() && !merged.contains(exactMatches.get(i)) && exactMatches.get(i) != null) {
merged.add(exactMatches.get(i));
}
if (i < aproximativeMatches.size() && !merged.contains(aproximativeMatches.get(i)) && aproximativeMatches.get(i) != null) {
merged.add(aproximativeMatches.get(i));
}
}
return merged;
}
}*/
protected List<SolrResponseDto> findExactMatches(String text, String countryCode,boolean fuzzy, Point point, Double radius, Class[] placetypes) {
if (isEmptyString(text)) {
return new ArrayList<SolrResponseDto>();
}
if (placetypes==null){
placetypes = com.gisgraphy.fulltext.Constants.CITY_CITYSUB_ADM_PLACETYPE;
}
FulltextQuery query = new FulltextQuery(text, TEN_RESULT_PAGINATION, DEFAULT_OUTPUT,placetypes , countryCode);
query.withAllWordsRequired(true).withoutSpellChecking().withFuzzy(fuzzy);
if (point!=null){
query.around(point);
query.withRadius(radius);
}
FulltextResultsDto results = fullTextSearchEngine.executeQuery(query);
if (results.getResultsSize() >= 1) {
return results.getResults();
} else {
return new ArrayList<SolrResponseDto>();
}
}
protected HouseNumberAddressDto findHouseNumber(String address,
String countryCode) {
if (address == null) {
return null;
}
Matcher m = HOUSENUMBERPATTERN.matcher(address);
if (m.find()) {
String houseNumber = m.group().trim();
if (houseNumber.length() >=4 && (address.trim().indexOf(houseNumber)+houseNumber.length()) >= address.length()-3){
//it is probably a zip code
return null;
}
if (houseNumber != null) {
Matcher m2 = FIRST_NUMBER_EXTRACTION_PATTERN
.matcher(houseNumber);
if (m2.find()) {
houseNumber = m2.group();
}
}
String newAddress;
if (countryCode !=null){
countryCode = countryCode.toUpperCase();
}
if (houseNumber.length() == 4 && (countryCode == null || (countryCode!= null && countryWithZipIs4Number.contains(countryCode)))
|| houseNumber.length() == 3 && (countryCode!= null && countryWithZipIs3Number.contains(countryCode))
){
logger.info("found house number " + houseNumber + " in '" + address
+ "' for country '"+countryCode+"' but we don't remove it since it can be a zipcode");
newAddress = address;
} else {
newAddress = m.replaceFirst("").trim();
newAddress = newAddress.replaceFirst("^[,\\s]+", "");
}
HouseNumberAddressDto houseNumberAddressDto = new HouseNumberAddressDto(
newAddress, address, houseNumber);
logger.info("found house number " + houseNumber + " in '" + address
+ "' for countrycode = '"+countryCode+"', new address wo housenumber = " + newAddress);
return houseNumberAddressDto;
} else {
logger.info("no house number found in " + address);
return null;
}
}
@Autowired
public void setAddressParser(IAddressParserService addressParser) {
this.addressParser = addressParser;
}
@Autowired
public void setFullTextSearchEngine(FullTextSearchEngine fullTextSearchEngine) {
this.fullTextSearchEngine = fullTextSearchEngine;
}
@Autowired
public void setStatsUsageService(IStatsUsageService statsUsageService) {
this.statsUsageService = statsUsageService;
}
@Autowired
public void setImporterConfig(ImporterConfig importerConfig) {
this.importerConfig = importerConfig;
}
@Autowired
public void setGisgraphyConfig(GisgraphyConfig gisgraphyConfig) {
this.gisgraphyConfig = gisgraphyConfig;
}
}