/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
* David Masclet <davidmasclet@gisgraphy.com>
*
*
*******************************************************************************/
package com.gisgraphy.importer;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Required;
import com.gisgraphy.domain.repository.IAdmDao;
import com.gisgraphy.domain.repository.ICountryDao;
import com.gisgraphy.domain.valueobject.Constants;
import com.gisgraphy.domain.valueobject.NameValueDTO;
/**
* Extract the alternateNames into separate files : one for country, one for adm1 and one for adm2
*
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*/
public class GeonamesAlternateNamesExtracter extends AbstractSimpleImporterProcessor {
protected static final Logger logger = LoggerFactory.getLogger(GeonamesAlternateNamesExtracter.class);
protected File adm1file;
protected File adm2file;
protected File countryFile;
protected File featuresFile;
protected OutputStreamWriter adm1fileOutputStreamWriter;
protected OutputStreamWriter adm2fileOutputStreamWriter;
protected OutputStreamWriter countryfileOutputStreamWriter;
protected OutputStreamWriter featuresfileOutputStreamWriter;
@Autowired
private IAdmDao admDao;
@Autowired
private ICountryDao countryDao;
protected Map<Long, String> countryMap;
protected Map<Long, String> adm1Map;
protected Map<Long, String> adm2Map;
/**
* Default Constructor
*/
public GeonamesAlternateNamesExtracter() {
super();
}
@Override
protected void onFileProcessed(File file){
//we overrride because we don't want to rename files
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
*/
@Override
protected void processData(String line) {
String[] fields = line.split("\t");
/*
* line table has the following fields :
* ----------------------------------------- 0 : alternateNameId : 1 :
* geonameid : 2 : isolanguage : iso 639-2 or 3 or or 'post' 3 :
* alternate name 4 : isPreferredName 5 : isShortName
*/
if (!isEmptyField(fields, 1, false)) {
Long featureId;
try {
featureId = new Long(fields[1]);
} catch (NumberFormatException e) {
logger.warn("geonamesid "+fields[1]+" is not a number for line "+line);
return;
}
if (lineIsAnAlternateNameForCountry(featureId)) {
writeAlternateName(countryfileOutputStreamWriter,line);
} else if (lineIsAnAlternateNameForAdm1(featureId)) {
writeAlternateName(adm1fileOutputStreamWriter,line);
} else if (lineIsAnAlternatNameForAdm2(featureId)) {
writeAlternateName(adm2fileOutputStreamWriter,line);
}else {
writeAlternateName(featuresfileOutputStreamWriter,line);
}
} else {
logger.info("geonameid is null for geonames alternateNameId" + fields[0]);
}
}
protected boolean lineIsAnAlternatNameForAdm2(Long featureId) {
return adm2Map.get(featureId)!=null;
}
protected boolean lineIsAnAlternateNameForAdm1(Long featureId) {
return adm1Map.get(featureId)!=null;
}
protected boolean lineIsAnAlternateNameForCountry(Long featureId) {
return countryMap.get(featureId)!=null;
}
/* (non-Javadoc)
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
*/
@Override
public boolean shouldBeSkipped() {
if (importerConfig.isImportGisFeatureEmbededAlternateNames() || !importerConfig.isGeonamesImporterEnabled()){
return true ;
}
return false;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
*/
@Override
protected void tearDown() {
super.tearDown();
closeOutputStreams();
}
private void writeAlternateName(OutputStreamWriter outputStreamWriter, String line) {
if (outputStreamWriter != null) {
try {
outputStreamWriter.write(line);
outputStreamWriter.write("\r\n");
flushAndClear();
} catch (IOException e) {
throw new RuntimeException(
"an error has occurred when writing in adm4 file",
e);
}
}
}
private void closeOutputStreams() {
if (adm1fileOutputStreamWriter != null) {
try {
adm1fileOutputStreamWriter.close();
} catch (IOException e) {
throw new RuntimeException("can not close adm1 outputStream", e);
}
}
if (adm2fileOutputStreamWriter != null) {
try {
adm2fileOutputStreamWriter.close();
} catch (IOException e) {
throw new RuntimeException("can not close adm2 outputStream", e);
}
}
if (countryfileOutputStreamWriter != null) {
try {
countryfileOutputStreamWriter.close();
} catch (IOException e) {
throw new RuntimeException("can not close country outputStream", e);
}
}
if (featuresfileOutputStreamWriter != null) {
try {
featuresfileOutputStreamWriter.close();
} catch (IOException e) {
throw new RuntimeException("can not close features outputStream", e);
}
}
}
private OutputStreamWriter getWriter(File file)
throws FileNotFoundException {
OutputStream o = null;
OutputStreamWriter w = null;
try {
if (file.exists()) {
checkWriteRights(file);
if (!file.delete()){
throw new RuntimeException("The file "+file.getAbsolutePath()+" exists but we can not delete it, to recreate it");
}
}
o = new BufferedOutputStream(new FileOutputStream(file));
w = new OutputStreamWriter(o, Constants.CHARSET);
return w;
} catch (UnsupportedEncodingException e) {
logger.warn("UnsupportedEncodingException for " + Constants.CHARSET
+ " : Can not extract Alternate names");
return null;
}
}
protected void initFiles() {
adm1file = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameAdm1FileName());
adm2file = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameAdm2FileName());
countryFile = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameCountryFileName());
featuresFile = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameFeaturesFileName());
try {
adm1fileOutputStreamWriter = getWriter(adm1file);
adm2fileOutputStreamWriter = getWriter(adm2file);
countryfileOutputStreamWriter = getWriter(countryFile);
featuresfileOutputStreamWriter = getWriter(featuresFile);
} catch (FileNotFoundException e) {
closeOutputStreams();
throw new RuntimeException(
"An error has occurred during creation of outpuStream : "
+ e.getMessage(), e);
}
}
/**
*
/**
* @param file
*/
private void checkWriteRights(File file) {
if (!file.canWrite()) {
throw new RuntimeException(
"you must have write rights in order to export adm in file "
+ file.getAbsolutePath());
}
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
*/
@Override
public void setup() {
super.setup();
List<Long> countriesIDs = countryDao.listFeatureIds();
List<Long> adm1IDs = admDao.listFeatureIdByLevel(1);
List<Long> adm2IDs = admDao.listFeatureIdByLevel(2);
adm1Map = populateMapFromList(adm1IDs);
adm2Map = populateMapFromList(adm2IDs);
countryMap = populateMapFromList(countriesIDs);
initFiles();
}
protected Map<Long,String> populateMapFromList(List<Long> list){
Map<Long,String> map = new HashMap<Long,String>(list.size()+1);
for(Long id: list){
map.put(id, "");
}
return map;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
*/
@Override
protected boolean shouldIgnoreFirstLine() {
return false;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
*/
@Override
protected boolean shouldIgnoreComments() {
return true;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
*/
@Override
protected void setCommitFlushMode() {
return;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
*/
@Override
protected void flushAndClear() {
if (adm1fileOutputStreamWriter != null) {
try {
adm1fileOutputStreamWriter.flush();
} catch (IOException e) {
closeOutputStreams();
throw new RuntimeException("can not flush adm1file : "
+ e.getMessage(), e);
}
}
if (adm2fileOutputStreamWriter != null) {
try {
adm2fileOutputStreamWriter.flush();
} catch (IOException e) {
closeOutputStreams();
throw new RuntimeException("can not flush adm2file : "
+ e.getMessage(), e);
}
}
if (countryfileOutputStreamWriter != null) {
try {
countryfileOutputStreamWriter.flush();
} catch (IOException e) {
closeOutputStreams();
throw new RuntimeException("can not flush countryfile : "
+ e.getMessage(), e);
}
}
if (featuresfileOutputStreamWriter != null) {
try {
featuresfileOutputStreamWriter.flush();
} catch (IOException e) {
closeOutputStreams();
throw new RuntimeException("can not flush featuresfile : "
+ e.getMessage(), e);
}
}
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
*/
@Override
protected int getNumberOfColumns() {
return 6;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
*/
@Override
protected File[] getFiles() {
if (importerConfig.isImportGisFeatureEmbededAlternateNames()) {
logger
.info("ImportGisFeatureEmbededAlternateNames = true, we do not need to extract alternatenames from "
+ importerConfig.getAlternateNamesFileName());
return new File[0];
}
File[] files = new File[1];
files[0] = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNamesFileName());
return files;
}
@Override
Integer getImportKey() {
return 0;
}
/*
* (non-Javadoc)
*
* @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
*/
public List<NameValueDTO<Integer>> rollback() {
List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
adm1file = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameAdm1FileName());
deleteFile(adm1file, deletedObjectInfo);
adm2file = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameAdm2FileName());
deleteFile(adm2file, deletedObjectInfo);
countryFile = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameCountryFileName());
deleteFile(countryFile, deletedObjectInfo);
featuresFile = new File(importerConfig.getGeonamesDir()
+ importerConfig.getAlternateNameFeaturesFileName());
deleteFile(featuresFile, deletedObjectInfo);
resetStatus();
return deletedObjectInfo;
}
private void deleteFile(File file,
List<NameValueDTO<Integer>> deletedObjectInfo) {
if (file.delete()) {
deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 1));
logger.info("File " + file.getName() + " has been deleted");
} else {
deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 0));
logger.info("File " + file.getName() + " has not been deleted");
}
}
@Required
public void setAdmDao(IAdmDao admDao) {
this.admDao = admDao;
}
@Required
public void setCountryDao(ICountryDao countryDao) {
this.countryDao = countryDao;
}
}