/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * * David Masclet <davidmasclet@gisgraphy.com> ******************************************************************************/ package com.gisgraphy.geoloc; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class ZipcodeNormalizer { private static Logger logger = LoggerFactory.getLogger(ZipcodeNormalizer.class); private final static int REGEXP_CASEINSENSITIVE_FLAG = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; private final static String CA_PATTERN_EXPRESSION = "(?<=[a-z]\\d[a-z])[\\s–\\-]?\\d[a-z]\\d"; private final static Pattern CA_PATTERN = Pattern.compile(CA_PATTERN_EXPRESSION, REGEXP_CASEINSENSITIVE_FLAG); private final static String GB_PATTERN_EXPRESSION = "(?<=[A-Z]{2}\\d[A-Z])\\s?\\d[A-Z]{2}|(?<=[A-Z]{2}\\d{2})\\s?\\d[A-Z]{2}|(?<=[A-Z]\\d)\\s?\\d[A-Z]{2}|(?<=[A-Z]{2}\\d)\\s?\\d[A-Z]{2}|(?<=[A-Z]\\d[A-Z])\\s?\\d[A-Z]{2}|(?<=[A-Z]\\d{2})\\s?\\d[A-Z]{2}|(?<=GIR)\\s?0AA|(?<=[A-Z]{4})\\s?1ZZ"; //LLNL NLL|LLNN NLL|LN NLL|LLN NLL|LNL NLL|LNN NLL| private final static Pattern GB_PATTERN = Pattern.compile(GB_PATTERN_EXPRESSION, REGEXP_CASEINSENSITIVE_FLAG); public static String normalize_ca(String string) { return normalize_country(string, CA_PATTERN); } public static String normalize_gb(String string) { return normalize_country(string, GB_PATTERN); } /** * @return a string that prepare zipcode to be search * because for canada we only got first char and so does for GB */ public static String normalize(String string,String countryCode){ if (string==null){ return null; } if (countryCode == null || "".equals(countryCode.trim())){ return normalize_ca(normalize_gb(string)); } else if("GB".equalsIgnoreCase(countryCode)){ return normalize_gb(string); } else if ("CA".equalsIgnoreCase(countryCode)){ return normalize_ca(string); } else { return string; } } private static String normalize_country(String string, Pattern pattern) { if (string==null){ return null; } Matcher matcher = pattern.matcher(string); if (logger.isInfoEnabled()) { if (matcher.find()) { logger.info("found one or more zipcode to normalize"); String[] splitedString = new String[matcher.groupCount()]; for (int j = 1; j <= matcher.groupCount(); j++) { String group = matcher.group(j); if (group != null) { group = group.trim(); } splitedString[j - 1] = group; if (logger.isInfoEnabled()) { logger.info("[" + (j - 1) + "]=" + group); } } } } return pattern.matcher(string).replaceAll("").trim(); } }