/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
* David Masclet <davidmasclet@gisgraphy.com>
*
*
*******************************************************************************/
package com.gisgraphy.importer;
import static com.gisgraphy.domain.geoloc.entity.GisFeature.NAME_MAX_LENGTH;
import static com.gisgraphy.fulltext.Constants.ONLY_ADM_PLACETYPE;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.hibernate.FlushMode;
import org.hibernate.exception.ConstraintViolationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import com.gisgraphy.domain.geoloc.entity.Adm;
import com.gisgraphy.domain.geoloc.entity.AlternateName;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.CitySubdivision;
import com.gisgraphy.domain.geoloc.entity.GisFeature;
import com.gisgraphy.domain.geoloc.entity.ZipCode;
import com.gisgraphy.domain.repository.IAdmDao;
import com.gisgraphy.domain.repository.ICityDao;
import com.gisgraphy.domain.repository.ICitySubdivisionDao;
import com.gisgraphy.domain.repository.IGisFeatureDao;
import com.gisgraphy.domain.repository.IIdGenerator;
import com.gisgraphy.domain.repository.ISolRSynchroniser;
import com.gisgraphy.domain.valueobject.AlternateNameSource;
import com.gisgraphy.domain.valueobject.GISSource;
import com.gisgraphy.domain.valueobject.NameValueDTO;
import com.gisgraphy.domain.valueobject.Output;
import com.gisgraphy.domain.valueobject.Output.OutputStyle;
import com.gisgraphy.domain.valueobject.Pagination;
import com.gisgraphy.fulltext.Constants;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.fulltext.FulltextQuery;
import com.gisgraphy.fulltext.FulltextResultsDto;
import com.gisgraphy.fulltext.IFullTextSearchEngine;
import com.gisgraphy.fulltext.SolrResponseDto;
import com.gisgraphy.helper.AdmStateLevelInfo;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.util.StringUtil;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Point;
/**
* Import the administrative from an (pre-processed) openStreet map data file.
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*/
public class OpenStreetMapAdmSimpleImporter extends AbstractSimpleImporterProcessor {
public static final int SCORE_LIMIT = 1;
public final static int BATCH_UPDATE_SIZE = 100;
protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapAdmSimpleImporter.class);
public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT);
protected IIdGenerator idGenerator;
protected ICityDao cityDao;
protected ICitySubdivisionDao citySubdivisionDao;
protected IAdmDao admDao;
protected ISolRSynchroniser solRSynchroniser;
protected IFullTextSearchEngine fullTextSearchEngine;
LabelGenerator generator = LabelGenerator.getInstance();
protected int currentOsmlevel =0;
protected int calculatedLevel =1;
protected String currentCountryCode = null;
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
*/
@Override
protected void flushAndClear() {
admDao.flushAndClear();
}
@Override
protected void setup() {
super.setup();
//temporary disable logging when importing
FullTextSearchEngine.disableLogging=true;
logger.info("sync idgenerator");
idGenerator.sync();
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
*/
@Override
protected File[] getFiles() {
return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapAdmDir());
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
*/
@Override
protected int getNumberOfColumns() {
return 9;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
*/
@Override
protected void processData(String line) throws ImporterException {
String[] fields = line.split("\t");
String name=null;
int adminLevelOsm=1;
Point location=null;
Adm place=null;
String countrycode=null;
// new Line table has the following fields :
// ---------------------------------------------------
//0: id; 1 : name; 2: shape; 3: location; 4: countrycode;5: the administrative level
//6: type;7: alternatenames;8 is_in_adm
checkNumberOfColumn(fields);
// name
if (!isEmptyField(fields, 1, false)) {
name=fields[1].trim();
if (name.length() > NAME_MAX_LENGTH){
logger.warn(name + "is too long");
name= name.substring(0, NAME_MAX_LENGTH-1);
}
}
if (name==null){
return;
}
//countrycode
if (!isEmptyField(fields, 4, true)) {
countrycode=fields[4].trim().toUpperCase();
}
//admin level
if (!isEmptyField(fields, 5, false)) {
String adminLevelAsString =fields[5].trim();
try {
adminLevelOsm = Integer.parseInt(adminLevelAsString);
int level = calculateAdmLevel(countrycode,adminLevelOsm);
if (!shouldBeImported(countrycode,adminLevelOsm)){
return;
}
place = new Adm(level);
place.setCountryCode(countrycode);
place.setName(name);
} catch (NumberFormatException e) {
logger.error("can not parse admin level id "+adminLevelAsString);
return;
}
}
//osmId
if (!isEmptyField(fields, 0, true)) {
String osmIdAsString =fields[0].trim();
try {
Long osmId = Long.parseLong(osmIdAsString);
place.setOpenstreetmapId(osmId);
} catch (NumberFormatException e) {
logger.error("can not parse openstreetmap id "+ osmIdAsString);
}
}
//shape
if(!isEmptyField(fields, 2, false)){
try {
Geometry shape = (Geometry) GeolocHelper.convertFromHEXEWKBToGeometry(fields[2]);
place.setShape(shape);
} catch (RuntimeException e) {
logger.warn("can not parse shape for id "+fields[1]+" : "+e);
}
}
//location
if (!isEmptyField(fields, 3, false)) {
try {
location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[3]);
place.setLocation(location);
} catch (RuntimeException e) {
logger.warn("can not parse location for "+fields[3]+" : "+e);
return;
}
}
if(!isEmptyField(fields, 6, false)){
place.setAmenity(fields[6]);
}
//populate alternatenames
if (!isEmptyField(fields, 7, false)) {
String alternateNamesAsString=fields[7].trim();
populateAlternateNames(place,alternateNamesAsString);
}
//isinadm
if(!isEmptyField(fields, 8, false)){
List<AdmDTO> admDTOs = ImporterHelper.parseIsInAdm(fields[8]);
//current level is the osm one!
populateAdmNames(place,adminLevelOsm,admDTOs);
setParent(place,admDTOs);
}
place.setAlternateLabels(generator.generateLabels(place));
place.setLabel(generator.generateLabel(place));
place.setFullyQualifiedName(generator.getFullyQualifiedName(place));
//postal is not set because it is only for street
place.setFeatureId(idGenerator.getNextFeatureId());
place.setSource(GISSource.OSM);
try {
save(place);
} catch (ConstraintViolationException e) {
logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e);
}catch (Exception e) {
logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e);
}
}
protected boolean shouldBeImported(String countryCode,int osmLevel) {
return AdmStateLevelInfo.shouldBeImportedAsAdm(countryCode, osmLevel);
}
@Override
protected int getMaxInsertsBeforeFlush() {
return 1;
}
protected int calculateAdmLevel(String countryCode,int adminLevel) {
boolean countryHasChanged =false;
if (currentOsmlevel==0){
currentOsmlevel=adminLevel;
calculatedLevel = 1;
}
if (currentCountryCode == null){
currentCountryCode = countryCode;
calculatedLevel = 1;
}
if (!countryCode.equals(currentCountryCode)){
currentCountryCode=countryCode;
calculatedLevel = 1;
countryHasChanged=true;
}
if (adminLevel>currentOsmlevel && !countryHasChanged){
calculatedLevel++;
currentOsmlevel = adminLevel;
}
if (calculatedLevel>5){
return 5;
}
return calculatedLevel;
}
protected GisFeature populateAdmNames(Adm adm, int currentLevel, List<AdmDTO> admdtos) {
return ImporterHelper.populateAdmNames(adm, currentLevel, admdtos);
}
protected Adm setParent(Adm adm, List<AdmDTO> adms){
if (adms!=null && adms!=null && adms.size()>=1){
AdmDTO last = adms.get(adms.size()-1);
if (last!=null && last.getAdmOpenstreetMapId()!=0){
Adm parent = admDao.getByOpenStreetMapId(last.getAdmOpenstreetMapId());
if (parent!=null && parent.getLevel()< adm.getLevel()){
adm.setParent(parent);
}
}
}
return adm;
}
/**
* @param fields
* The array to process
* @return a string which represent a human readable string of the Array but without shape because it is useless in logs
*/
protected static String dumpFields(String[] fields) {
String result = "[";
for (int i=0;i<fields.length;i++) {
if (i==2){
result= result+"THE_SHAPE;";
}else {
result = result + fields[i] + ";";
}
}
return result + "]";
}
void save(GisFeature feature) {
if (feature!=null){
if (feature instanceof Adm){
admDao.save((Adm)feature);
} else if (feature instanceof CitySubdivision){
citySubdivisionDao.save((CitySubdivision)feature);
}
}
}
GisFeature populateAlternateNames(GisFeature feature,
String alternateNamesAsString) {
return ImporterHelper.populateAlternateNames(feature,alternateNamesAsString);
}
protected GisFeature populateAdmNames(GisFeature gisFeature, int currentLevel, List<AdmDTO> admdtos){
return ImporterHelper.populateAdmNames(gisFeature, currentLevel, admdtos);
}
protected SolrResponseDto getAdm(String name, String countryCode) {
if (name==null){
return null;
}
FulltextQuery query;
try {
query = (FulltextQuery)new FulltextQuery(name).withAllWordsRequired(false).withoutSpellChecking().
withPlaceTypes(ONLY_ADM_PLACETYPE).withOutput(MINIMUM_OUTPUT_STYLE).withPagination(Pagination.ONE_RESULT);
} catch (IllegalArgumentException e) {
logger.error("can not create a fulltext query for "+name);
return null;
}
if (countryCode != null){
query.limitToCountryCode(countryCode);
}
FulltextResultsDto results = fullTextSearchEngine.executeQuery(query);
if (results != null){
for (SolrResponseDto solrResponseDto : results.getResults()) {
return solrResponseDto;
}
}
return null;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
*/
@Override
public boolean shouldBeSkipped() {
return !importerConfig.isOpenstreetmapImporterEnabled();
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
*/
@Override
protected void setCommitFlushMode() {
this.cityDao.setFlushMode(FlushMode.COMMIT);
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
*/
@Override
protected boolean shouldIgnoreComments() {
return true;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
*/
@Override
protected boolean shouldIgnoreFirstLine() {
return false;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
*/
public List<NameValueDTO<Integer>> rollback() {
List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
logger.info("reseting openstreetmap cities...");
//TODO only cities that have source openstreetmap
deletedObjectInfo
.add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0));
resetStatus();
return deletedObjectInfo;
}
@Override
//TODO test
protected void tearDown() {
super.tearDown();
String savedMessage = this.statusMessage;
FullTextSearchEngine.disableLogging=false;
/*try {
this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
solRSynchroniser.optimize();
logger.warn("fulltext engine has been optimized");
} catch (Exception e){
logger.error("error durin fulltext optimization",e);
}finally {
// we restore message in case of error
this.statusMessage = savedMessage;
}*/
}
@Required
public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
this.solRSynchroniser = solRSynchroniser;
}
@Required
public void setIdGenerator(IIdGenerator idGenerator) {
this.idGenerator = idGenerator;
}
@Required
public void setCityDao(ICityDao cityDao) {
this.cityDao = cityDao;
}
@Required
public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) {
this.fullTextSearchEngine = fullTextSearchEngine;
}
@Required
public void setAdmDao(IAdmDao admDao) {
this.admDao = admDao;
}
@Required
public void setCitySubdivisionDao(ICitySubdivisionDao citySubdivisionDao) {
this.citySubdivisionDao = citySubdivisionDao;
}
}