/*******************************************************************************
* Gisgraphy Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
*
* Copyright 2008 Gisgraphy project
* David Masclet <davidmasclet@gisgraphy.com>
*
*
*******************************************************************************/
/**
*
*/
package com.gisgraphy.importer;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gisgraphy.compound.Decompounder;
import com.gisgraphy.compound.Decompounder.state;
import com.gisgraphy.domain.geoloc.entity.Adm;
import com.gisgraphy.domain.geoloc.entity.AlternateName;
import com.gisgraphy.domain.geoloc.entity.AlternateOsmName;
import com.gisgraphy.domain.geoloc.entity.GisFeature;
import com.gisgraphy.domain.geoloc.entity.OpenStreetMap;
import com.gisgraphy.domain.valueobject.AlternateNameSource;
import com.gisgraphy.helper.FeatureClassCodeHelper;
/**
* Useful methods for importer
* @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
*/
public class ImporterHelper {
/**
* The readme filename (it must not be processed)
*/
public static final String EXCLUDED_README_FILENAME = "readme.txt";
/**
* the all country dump file name
*/
public static final String ALLCOUTRY_FILENAME = "allCountries.txt";
/**
* The regexp that every country file dump matches
*/
public static final String GEONAMES_COUNTRY_FILE_ACCEPT_REGEX_STRING = "[A-Z][A-Z](.txt)";
public static final String OPENSTREETMAP_FILE_ACCEPT_REGEX_STRING = "[A-Z][A-Z](.dat|.txt)";
public static final String QUATTROSHAPES_FILE_ACCEPT_REGEX_STRING = "(localities.txt)";
public static final String SPLITED_FILE_ACCEPT_REGEX_STRING = "[A-Z][A-Z](.)[0-9]+(.txt|.dat)";
//2 letter but not us, it is managed by SPLITED_OPENSTREETMAP_US_FILE_ACCEPT_REGEX_STRING
public static final String SPLITED_OPENSTREETMAP_FILE_ACCEPT_REGEX_STRING = "((?!(?:US))[A-Z][A-Z])(.)[0-9]+(.txt)";
public static final String SPLITED_GEONAMES_ALTERNATENAMES_FILE_ACCEPT_REGEX_STRING = "(US.)[0-9]+(.txt)";
public static final String SPLITED_ALLCOUNTRIES_FILE_ACCEPT_REGEX_STRING = "(allCountries)(.)[0-9]+(.txt)";
public static final String UNWANTED_ZIPCODE_REGEXP = "(.*(?:CEDEX).*|(?:\\d{5}\\sSP\\s\\d+))";
public static final Pattern UNWANTED_ZIPCODE_PATTERN = Pattern.compile(UNWANTED_ZIPCODE_REGEXP,Pattern.CASE_INSENSITIVE);
private static Decompounder decompounder = new Decompounder();
public static boolean isUnwantedAlternateName(String alternateName){
if (alternateName!=null){
alternateName= alternateName.toLowerCase().trim();
}
if (alternateName == null || alternateName.length()==0 || alternateName.contains("source")||
alternateName.contains("fixme")||
alternateName.contains("prefix")||
alternateName.contains("suffix")||
alternateName.contains("postfix") ||
alternateName.contains("remove") ||
alternateName.contains("erroneous") ||
alternateName.contains("pronunciation") ||
alternateName.contains("systemname") ||
alternateName.contains("wikidata") ||
alternateName.contains("note")
){
return true;
}
return false;
}
/* public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "(?:\"\\{\"\")?"//beginning of string
+ "(?:[_]{0,3})"//not the underscore (optionaly)
+ "[,]?(?:(?!(?:(?:name|(?:___)))).)*" //something not name or ___ =>alt for instance
+ "(?:(?:(?:[a-z_]{0,12})?name)[:]?)"//name:
+ "((?:(?:(?!===).)*)"//lang: something not ===
+ ")"
+ "(?:===)"// don't take the 3 equals sign ===
+ "((?:(?!___|\"|}|(?:,\\w+(?=(?:_name)))|(?:,(?=(?:name)))).)+)[,]?[}]?"// the name
*/
public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "(?:\"\\{\"\")?"//beginning of string
+ "(?:[_]{0,3})"//not the underscore (optionaly)
+ "([,]?(?:(?!(?:(?:name|(?:___)))).)*" //something not name or ___ =>alt for instance
+ "(?:(?:(?:[a-z_]{0,12})?name)[:]?)"//name:
+ "((?:(?:(?!===).)*)"//lang: something not ===
+ "))"
+ "(?:===)"// don't take the 3 equals sign ===
+ "((?:(?!___|\"\"|}|(?:,\\w+(?=(?:_name)))|(?:,(?=(?:name)))).)+)[,]?[}]?"// the name
;
public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern.compile(ALTERNATENAMES_EXTRACTION_REGEXP,Pattern.CASE_INSENSITIVE);
public static final String ISINADM_EXTRACTION_REGEXP = "((?:(?!___).)+)(?=(?:___|$))(?:___|$)"
+ "((?:(?!___)\\d)*)(?=(?:___|$))(?:___|$)"
+ "(\\d+)(?:___|$)?";
public static final Pattern ISINADM_EXTRACTION_PATTERN = Pattern.compile(ISINADM_EXTRACTION_REGEXP);
/**
* The regexp that every zipped country file dump matches
*/
public static final String ZIP_FILE_ACCEPT_REGEX_STRING = ".*(.zip)";
public static final String GIS_FILE_ACCEPT_REGEX_STRING = ".*(.tar.bz2)|.*(.gis)";
protected static final Logger logger = LoggerFactory.getLogger(ImporterHelper.class);
private static HttpClientParams params = new HttpClientParams(){{
setConnectionManagerTimeout(2000);
setSoTimeout(2000);
}
};
private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
private static HttpClient client = new HttpClient(connectionManager){{
setParams(params);
}};
public static FileFilter countryFileFilter = new FileFilter() {
public boolean accept(File file) {
Pattern patternGeonames = Pattern.compile(GEONAMES_COUNTRY_FILE_ACCEPT_REGEX_STRING);
Pattern patternOpenStreetMapUS = Pattern.compile(OPENSTREETMAP_FILE_ACCEPT_REGEX_STRING);
Pattern patternQuattroshapes = Pattern.compile(QUATTROSHAPES_FILE_ACCEPT_REGEX_STRING);
return (file.isFile() && file.exists()) && !EXCLUDED_README_FILENAME.equals(file.getName())
&& ( patternGeonames.matcher(file.getName()).matches() || ALLCOUTRY_FILENAME.equals(file.getName()) || patternOpenStreetMapUS.matcher(file.getName()).matches() || patternQuattroshapes.matcher(file.getName()).matches());
}
};
public static FileFilter splitedFileFilter = new FileFilter() {
public boolean accept(File file) {
Pattern patternSplit = Pattern.compile(SPLITED_FILE_ACCEPT_REGEX_STRING);
Pattern patternAllCountriesSplit = Pattern.compile(SPLITED_ALLCOUNTRIES_FILE_ACCEPT_REGEX_STRING);
return (file.isFile() && file.exists()) && !EXCLUDED_README_FILENAME.equals(file.getName()) && (patternAllCountriesSplit.matcher(file.getName()).matches() || patternSplit.matcher(file.getName()).matches());
}
};
private static FileFilter ZipFileFilter = new FileFilter() {
public boolean accept(File file) {
Pattern pattern = Pattern.compile(ZIP_FILE_ACCEPT_REGEX_STRING);
return (file.isFile() && file.exists()) && pattern.matcher(file.getName()).matches();
}
};
private static FileFilter gisFileFilter = new FileFilter() {
public boolean accept(File file) {
Pattern pattern = Pattern.compile(GIS_FILE_ACCEPT_REGEX_STRING);
return (file.isFile() && file.exists()) && pattern.matcher(file.getName()).matches();
}
};
/**
* @param directoryPath
* The directory where files are
* @see #GEONAMES_COUNTRY_FILE_ACCEPT_REGEX_STRING
* @return the allcountries.txt (@see {@linkplain #ALLCOUTRY_FILENAME} file
* if present or the list of country file to Import or an empty
* array if there is no file
*/
public static File[] listCountryFilesToImport(String directoryPath) {
File dir = new File(directoryPath);
File[] files = dir.listFiles(countryFileFilter);
if (files == null) {
return new File[0];
}
for (File file : files) {
if (ALLCOUTRY_FILENAME.equals(file.getName())) {
files = new File[1];
files[0] = file;
logger.info(ALLCOUTRY_FILENAME + " is present. Only this file will be imported. all other country files will be ignore");
break;
}
}
if (files.length==0){
logger.warn("there is no file to import in "+directoryPath);
}
// for Log purpose
for (int i = 0; i < files.length; i++) {
logger.info(files[i].getName() + " is an importable File");
}
logger.info(files.length +" files are importable files");
return files;
}
/**
* @param directoryPath
* The directory where splited files are
*
*/
public static File[] listSplitedFilesToImport(String directoryPath) {
File dir = new File(directoryPath);
File[] files = dir.listFiles(splitedFileFilter);
if (files == null) {
return new File[0];
}
if (files.length==0){
logger.warn("there is no file to import in "+directoryPath);
}
// for Log purpose
for (int i = 0; i < files.length; i++) {
logger.info(files[i].getName() + " is a Geonames splited importable File");
}
logger.info(files.length +" files are Geonames importable files");
return files;
}
/**
* @param directoryPath
* The directory where Geonames files are to be downloaded in
* order to be processed
* @see #ZIP_FILE_ACCEPT_REGEX_STRING
* @return all the zip files present in the specified directory or an empty
* array if there is no file
*/
public static File[] listZipFiles(String directoryPath) {
File dir = new File(directoryPath);
File[] files = dir.listFiles(ZipFileFilter);
return files == null ? new File[0] : files;
}
/**
* @param directoryPath
* The directory where openstreetmap files are to be downloaded
* in order to be processed
* @see #GIS_FILE_ACCEPT_REGEX_STRING
* @return all the zip files present in the specified directory or an empty
* array if there is no file
*/
public static File[] listGisFiles(String directoryPath) {
File dir = new File(directoryPath);
File[] files = dir.listFiles(gisFileFilter);
return files == null ? new File[0] : files;
}
/**
* @param URL the HTTP URL
* @return The size of the HTTP file using HTTP head method
* or -1 if error or the file doesn't exists
*/
public static long getHttpFileSize(String URL){
HeadMethod headMethod = new HeadMethod(URL);
headMethod.setRequestHeader(new Header("User-Agent", "gisgraphy_"));
//we can not follow redirect because Geonames send a 302 found HTTP status code when a file doen't exists
headMethod.setFollowRedirects(true);
try {
int code = client.executeMethod(headMethod);
int firstDigitOfCode = code/100;
switch (firstDigitOfCode) {
case 4 :
logger.error("Can not determine HTTP file size of "+URL+" because it does not exists ("+code+")");
return -1;
//needed to catch 3XX code because Geonames send a 302 found HTTP status code when a file doen't exists
case 3 :
logger.error("Can not determine HTTP file size of "+URL+" because it does not exists ("+code+")");
return -1;
case 5:
logger.error("Can not determine HTTP file size of "+URL+" because the server send an error "+code);
return -1;
default:
break;
}
Header[] contentLengthHeaders = headMethod.getResponseHeaders("Content-Length");
if (contentLengthHeaders.length ==1){
logger.info("HTTP file size of "+URL+" = "+contentLengthHeaders[0].getValue());
return new Long(contentLengthHeaders[0].getValue());
} else if (contentLengthHeaders.length <= 0){
return -1L;
}
} catch (HttpException e) {
logger.error("can not execute head method for "+URL+" : "+e.getMessage(),e);
} catch (IOException e) {
logger.error("can not execute head method for "+URL+" : "+e.getMessage(),e);
} finally {
headMethod.releaseConnection();
}
return -1;
}
/**
* @param urlsAsString
* @return true if ALL the url doesn't retrun 200 or 3XX code
* and are valids
*/
public static boolean checkUrls(List<String> urlsAsString){
if (urlsAsString==null){
return false;
}
for (String url:urlsAsString){
if (!checkUrl(url)){
return false;
}
}
return true;
}
/**
* check if an url doesn't return 200 or 3XX code
* @param urlAsString the url to check
* @return true if the url exists and is valid
*/
public static boolean checkUrl(String urlAsString){
if (urlAsString==null){
logger.error("can not check null URL");
return false;
}
URL url;
try {
url = new URL(urlAsString);
} catch (MalformedURLException e) {
logger.error(urlAsString+" is not a valid url, can not check.");
return false;
}
int responseCode;
String responseMessage = "NO RESPONSE MESSAGE";
Object content = "NO CONTENT";
HttpURLConnection huc;
try {
huc = (HttpURLConnection) url.openConnection();
huc.setRequestMethod("HEAD");
responseCode = huc.getResponseCode();
content = huc.getContent();
responseMessage = huc.getResponseMessage();
} catch (ProtocolException e) {
logger.error("can not check url "+e.getMessage(),e);
return false;
} catch (IOException e) {
logger.error("can not check url "+e.getMessage(),e);
return false;
}
if (responseCode == 200 || (responseCode >300 && responseCode < 400)) {
logger.info("URL "+urlAsString+ " exists");
return true;
} else {
logger.error(urlAsString+" return a "+responseCode+" : "+content+"/"+responseMessage);
return false;
}
}
/**
* @param address
* the address of the file to be downloaded
* @param localFileName
* the local file name (with absolute path)
*/
public static void download(String address, String localFileName) throws FileNotFoundException{
logger.info("download file " + address + " to " + localFileName);
OutputStream out = null;
HttpURLConnection conn = null;
InputStream in = null;
try {
URL url = new URL(address);
conn = (HttpURLConnection) url.openConnection();
if (conn instanceof HttpURLConnection) {
conn.setRequestProperty("User-Agent", "gisgraphy_");
((HttpURLConnection) conn).setInstanceFollowRedirects(false);
int responseCode = ((HttpURLConnection) conn).getResponseCode();
//manage most frequent error code and Gisgraphy specific one
switch (responseCode) {
case 509:
throw new RuntimeException("Sorry, there is too many users connected for "+address+", this site has limmited resources, please try again later");
case 500:
throw new RuntimeException("Sorry, the server return an 500 status code for "+address+", an internal error has occured");
case 404:
throw new FileNotFoundException("Sorry, the server return an 404 status code for "+address+", the file probably not exists or the URL is not correct");
case 302:
throw new FileNotFoundException("Sorry, the server return an 302 status code for "+address+", the file is not at the correct URL");
default:
break;
}
}
in = conn.getInputStream();
out = new BufferedOutputStream(new FileOutputStream(localFileName));
byte[] buffer = new byte[1024];
int numRead;
long numWritten = 0;
while ((numRead = in.read(buffer)) != -1) {
out.write(buffer, 0, numRead);
numWritten += numRead;
}
logger.info(localFileName + "\t" + numWritten);
} catch (UnknownHostException e) {
String errorMessage = "can not download " + address + " to " + localFileName + " : " + e.getMessage() + ". if the host exists and is reachable," + " maybe this links can help : http://www.gisgraphy.com/forum/viewtopic.php?f=3&t=64 ";
logger.warn(errorMessage);
throw new ImporterException(errorMessage, e);
} catch (FileNotFoundException e) {
throw e;
} catch (Exception e) {
logger.warn("can not download " + address + " to " + localFileName + " : " + e.getMessage());
throw new ImporterException(e);
} finally {
try {
if (in != null) {
in.close();
}
if (out != null) {
out.flush();
out.close();
}
} catch (IOException ioe) {
logger.error("cannot close streams");
}
}
}
/**
* unzip a file in the same directory as the zipped file
*
* @param file
* The file to unzip
*/
public static void unzipFile(File file) {
logger.info("will Extracting file: " + file.getName());
Enumeration<? extends ZipEntry> entries;
ZipFile zipFile;
try {
zipFile = new ZipFile(file);
entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry) entries.nextElement();
if (entry.isDirectory()) {
// Assume directories are stored parents first then
// children.
(new File(entry.getName())).mkdir();
continue;
}
logger.info("Extracting file: " + entry.getName() + " to " + file.getParent() + File.separator + entry.getName());
copyInputStream(zipFile.getInputStream(entry), new BufferedOutputStream(new FileOutputStream(file.getParent() + File.separator + entry.getName())));
}
zipFile.close();
} catch (IOException e) {
logger.error("can not unzip " + file.getName() + " : " + e.getMessage(),e);
throw new ImporterException(e);
}
}
private static final void copyInputStream(InputStream in, OutputStream out) throws IOException {
byte[] buffer = new byte[1024];
int len;
while ((len = in.read(buffer)) >= 0) {
out.write(buffer, 0, len);
}
in.close();
out.close();
}
/**
* @param fields
* the fields corresponding to a split line of the csv geonames file
* @return the modified fields whith the feature code change to
* ADM1,ADM2,ADM3,ADM4 according to the ADMcodes. e.g id adm1code
* and Adm2 code are not null : the feature code will be change to
* ADM2.
*/
public static String[] virtualizeADMD(String[] fields) {
if (fields[7] != null && "ADMD".equals(fields[7]) && fields[6] != null && "A".equals(fields[6])) {
// it is an ADMD, will try to detect level
int level = Adm.getProcessedLevelFromCodes(fields[10], fields[11], fields[12], fields[13]);
if (level != 0) {
fields[7] = "ADM" + level;
}
}
return fields;
}
public static void callURL(String address){
if (address == null || address.trim().equals("")){
logger.error("can not call a null URL");
return;
}
HttpURLConnection conn = null;
try {
URL url = new URL(address);
conn = (HttpURLConnection) url.openConnection();
if (conn instanceof HttpURLConnection) {
conn.setRequestProperty("User-Agent", "gisgraphy_");
((HttpURLConnection) conn).setInstanceFollowRedirects(false);
}
((HttpURLConnection) conn).getResponseCode();
} catch (Exception e) {
logger.error("error when calling "+address+" "+e.getMessage(),e);
}
}
public static String[] correctLastAdmCodeIfPossible(String[] fields) {
if (FeatureClassCodeHelper.is_Adm(fields[6], fields[7]) && !AbstractSimpleImporterProcessor.isEmptyField(fields, 0, false)) {
int level = Adm.getProcessedLevelFromFeatureClassCode(fields[6], fields[7]);
switch (level) {
case 0:
return fields;
case 1:
if (AbstractSimpleImporterProcessor.isEmptyField(fields, 10, false)) {
fields[10] = fields[0];// asign adm1code with featureid
}
return fields;
case 2:
if (!AbstractSimpleImporterProcessor.isEmptyField(fields, 10, false) && AbstractSimpleImporterProcessor.isEmptyField(fields, 11, false)) {
fields[11] = fields[0];// asign adm2code with featureid
}
return fields;
case 3:
if (!AbstractSimpleImporterProcessor.isEmptyField(fields, 10, false) && !AbstractSimpleImporterProcessor.isEmptyField(fields, 11, false) && AbstractSimpleImporterProcessor.isEmptyField(fields, 12, false)) {
fields[12] = fields[0];// asign adm3code with featureid
}
return fields;
case 4:
if (!AbstractSimpleImporterProcessor.isEmptyField(fields, 10, false) && !AbstractSimpleImporterProcessor.isEmptyField(fields, 11, false) && !AbstractSimpleImporterProcessor.isEmptyField(fields, 12, false)
&& AbstractSimpleImporterProcessor.isEmptyField(fields, 13, false)) {
fields[13] = fields[0];// asign adm4code with featureid
}
return fields;
default:
return fields;
}
}
return fields;
}
/**
* @param regexp
* a regexp
* @return A {@link Pattern} or null if the regexp are not corrects
*/
public static Pattern compileRegex(String regexp) {
try {
if (regexp != null && !regexp.trim().equals("")) {
return Pattern.compile(regexp,Pattern.CASE_INSENSITIVE);
} else {
return null;
}
} catch (RuntimeException e) {
return null;
}
}
/**
* @param secsIn
* the number of seconds
* @return a human reading strings. example :1 hour 6 minuts 40 seconds.
*/
public static String formatSeconds(long secsIn) {
long hours = secsIn / 3600,
remainder = secsIn % 3600, minutes = remainder / 60, seconds = remainder % 60;
String displayhours = hours == 0 ? "" : hours + " hour" + getPlural(hours);
String displayMin = minutes == 0 ? "" : minutes + " minut" + getPlural(minutes);
String displaySec = seconds == 0 ? "" : seconds + " second" + getPlural(seconds);
return displayhours + displayMin + displaySec;
}
private static String getPlural(long count) {
return count > 1 ? "s " : " ";
}
public final static GisFeature populateAlternateNames(GisFeature feature,
String alternateNamesAsString) {
if (feature ==null || alternateNamesAsString ==null){
return feature;
}
if (alternateNamesAsString.startsWith("\"") && alternateNamesAsString.endsWith("\"")){
alternateNamesAsString = alternateNamesAsString.replace("(?<!{)\"\"", "\'");
}
Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
int i = 0;
while (matcher.find()){
/*for (int j=1;j<matcher.groupCount()+1;j++){
System.out.println(matcher.group(j));
}
System.out.println("");*/
if (matcher.groupCount() != 3) {
logger.warn("wrong number of fields for alternatename no " + i + " for line " + alternateNamesAsString);
continue;
}
if (matcher.group(1)!=null && isUnwantedAlternateName(matcher.group(1))){
logger.info(matcher.group(1)+" is not an alternate name we want for line " + alternateNamesAsString);
continue;
}
String lang = matcher.group(2);
String alternateName = matcher.group(3);
if (alternateName!= null && !"".equals(alternateName.trim())){
String[] alternateNames = alternateName.split(";|\\||,|:");
//check for duplicates
if (feature.getAlternateNames()!=null){
int counter=0;
String[] alternateNamesWODuplicates = new String[alternateNames.length];
toCheckNames :
for (String name:alternateNames){
currentNames :
for (AlternateName an :feature.getAlternateNames()){
if (an != null && an.getName() != null && an.getName().equals(name) && lang !=null && an.getLanguage()!=null && an.getLanguage().equals(lang)){
continue currentNames;
} else {
alternateNamesWODuplicates[counter]=name;
counter++;
continue toCheckNames;
}
}
}
alternateNames = alternateNamesWODuplicates;
}
List<AlternateName> toBeAdded = new ArrayList<AlternateName>();
for (String name:alternateNames){
if (name!=null && name.length()<GisFeature.MAX_ALTERNATENAME_SIZE){
if (lang!=null && !"".equals(lang.trim()) && lang.length()<29){
AlternateName alternateName2 = new AlternateName(name.trim(),lang.trim().toLowerCase(),AlternateNameSource.OPENSTREETMAP);
alternateName2.setGisFeature(feature);
toBeAdded.add(alternateName2);
//feature.addAlternateName(alternateName2);
} else {
AlternateName alternateName2 = new AlternateName(name.trim(),AlternateNameSource.OPENSTREETMAP);
alternateName2.setGisFeature(feature);
toBeAdded.add(alternateName2);
//feature.addAlternateName(alternateName2);
}
}
}
feature.addAlternateNames(toBeAdded);
}
}
return feature;
}
public final static OpenStreetMap populateAlternateNames(OpenStreetMap street,
String alternateNamesAsString) {
if (street ==null || alternateNamesAsString ==null){
return street;
}
if (alternateNamesAsString.startsWith("\"") && alternateNamesAsString.endsWith("\"")){
alternateNamesAsString = alternateNamesAsString.replace("\"\"", "\"");
}
Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
int i = 0;
while (matcher.find()){
/*for (int j=1;j<matcher.groupCount()+1;j++){
System.out.println(matcher.group(j));
}
System.out.println("");*/
if (matcher.groupCount() != 3) {
logger.warn("wrong number of fields for alternatename no " + i + " for line " + alternateNamesAsString);
continue;
}
if (matcher.group(1)!=null && isUnwantedAlternateName(matcher.group(1))){
logger.warn(matcher.group(1)+" is not an alternate name we want for line " + alternateNamesAsString);
continue;
}
String lang = matcher.group(2);
String alternateName = matcher.group(3);
if (alternateName!= null && !"".equals(alternateName.trim())){
String[] alternateNames = alternateName.split(";|\\||,|:");
/*boolean german = false;
if (street.getName()!=null ){
german = decompounder.getSate(street.getName())!=state.NOT_APPLICABLE;
}*/
//check for duplicates
/*if (street.getAlternateNames()!=null){
int counter=0;
String[] alternateNamesWODuplicates;
//if (german){
alternateNamesWODuplicates = new String[(alternateNames.length)];
} else {
alternateNamesWODuplicates = new String[alternateNames.length];
}
toCheckNames :
for (String name:alternateNames){//check if actual an already contains the ones wa are to add
currentNames :
for (AlternateOsmName an :street.getAlternateNames()){
if (an !=null && an.getName()!= null && an.getName().equals(name) && lang !=null && an.getLanguage()!= null && an.getLanguage().equals(lang)){
continue currentNames;
} else {
alternateNamesWODuplicates[counter]=name;
counter++;
continue toCheckNames;
}
}
}
alternateNames = alternateNamesWODuplicates;
alternateNames = new HashSet<String>(Arrays.asList(alternateNames)).toArray(new String[0]);
}*/
for (String name:alternateNames){
AlternateOsmName alternateNameToAdd;
if (name!=null && !"".equals(name) && name.length()<OpenStreetMap.MAX_ALTERNATENAME_SIZE){
if (street.getName()==null){
street.setName(name);
if (lang.equals("de") && decompounder.isDecompoudName(name)){
String otherFormat = decompounder.getOtherFormat(name);
alternateNameToAdd = new AlternateOsmName(otherFormat,lang.trim().toLowerCase(),AlternateNameSource.OPENSTREETMAP);
if (street.getAlternateNames() == null || !street.getAlternateNames().contains(alternateNameToAdd)){
street.addAlternateName(alternateNameToAdd);
}
}
continue;
}
if (lang!=null && !"".equals(lang.trim()) && lang.length()<29){
alternateNameToAdd = new AlternateOsmName(name.trim(),lang.trim().toLowerCase(),AlternateNameSource.OPENSTREETMAP);
if (street.getAlternateNames() == null || !street.getAlternateNames().contains(alternateNameToAdd)){
street.addAlternateName(alternateNameToAdd);
}
if (lang.equals("de") && decompounder.isDecompoudName(name)){
String otherFormat = decompounder.getOtherFormat(name);
alternateNameToAdd = new AlternateOsmName(otherFormat,lang.trim().toLowerCase(),AlternateNameSource.OPENSTREETMAP);
if (street.getAlternateNames() == null || !street.getAlternateNames().contains(alternateNameToAdd)){
street.addAlternateName(alternateNameToAdd);
}
}
} else {
alternateNameToAdd = new AlternateOsmName(name.trim(),AlternateNameSource.OPENSTREETMAP);
if (street.getAlternateNames() == null || !street.getAlternateNames().contains(alternateNameToAdd)){
street.addAlternateName(alternateNameToAdd);
}
}
}
}
}
}
if (street.getName()!=null && street.getCountryCode()!=null && street.getCountryCode().equals("DE") && decompounder.isDecompoudName(street.getName())){
AlternateOsmName alternateNameOtherFormat = new AlternateOsmName(decompounder.getOtherFormat(street.getName()),"DE",AlternateNameSource.OPENSTREETMAP);
if (street.getAlternateNames() == null || !street.getAlternateNames().contains(alternateNameOtherFormat)){
street.addAlternateName(alternateNameOtherFormat);
}
}
return street;
}
public final static List<AdmDTO> parseIsInAdm(String isInAdm){
List<AdmDTO> adms = new ArrayList<AdmDTO>();
if (isInAdm ==null ){
return adms;
}
Matcher matcher = ISINADM_EXTRACTION_PATTERN.matcher(isInAdm);
int i = 0;
while (matcher.find()){
if (matcher.groupCount() != 3) {
logger.warn("wrong number of fields for isInAdm no " + i + "for line " + isInAdm);
continue;
}
String alternateName = matcher.group(1);
int level;
try {
level = Integer.valueOf(matcher.group(2));
} catch (NumberFormatException e) {
logger.warn("wrong adm level for isInAdm no " + i + "for line " + isInAdm);
continue;
}
int openstreetmapId=0;
try {
openstreetmapId = Integer.valueOf(matcher.group(3));
} catch (NumberFormatException e) {
logger.warn("wrong openstreetmapId for isInAdm no " + i + "for line " + isInAdm);
}
adms.add(new AdmDTO(alternateName, level, openstreetmapId));
}
Collections.sort(adms);
return adms;
}
public static GisFeature populateAdmNames(GisFeature gisFeature, int currentOsmLevel, List<AdmDTO> admdtos){
if (gisFeature ==null || admdtos ==null || admdtos.size() == 0){
return gisFeature;
}
int level = 1;
String lastName="";
for (AdmDTO dto: admdtos){
if ((dto.getLevel() < currentOsmLevel || currentOsmLevel == 0) && !lastName.equalsIgnoreCase(dto.getAdmName()) ){
//only if adm level < or not set
gisFeature.setAdmName(level++,dto.getAdmName() );
lastName = dto.getAdmName();
}
}
return gisFeature;
}
public static boolean isUnwantedZipCode(String zipcode){
if (zipcode == null || "".equals(zipcode.trim()) || UNWANTED_ZIPCODE_PATTERN.matcher(zipcode).matches()){
return true ;
}
return false;
}
}