/*******************************************************************************
* Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser Public License v3
* which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt
******************************************************************************/
package com.opendoorlogistics.core.gis.postcodes;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.opendoorlogistics.core.utils.strings.Strings;
/**
* TO DO - Tidy this up...
*
* Standard postcode strings don't include the extra digit used in some London
* districts - e.g. district E1 contains 'district' E1W. All postcode
* strings include this.
* @author Phil
*
*/
final public class UKPostcodes {
public static final String area = "[a-z][a-z]?";
public static final String stdDistrict = area + "\\d\\d?";
public static final String allDistrict = area + "\\d\\d?[a-z]?";
public static final String std2ndPart = "\\d[a-z][a-z]";
// public static final String stdSector = stdDistrict + "\\s+" + "\\d";
public static final String allSector = allDistrict + "\\s+" + "\\d";
// public static final String stdUnit= stdSector + "[a-z][a-z]";
public static final String allUnit= allSector + "[a-z][a-z]";
public static final Pattern sectorFromUnit = Pattern.compile("^(" + allSector + ")[a-z][a-z]$",Pattern.CASE_INSENSITIVE);
public static final Pattern districtFromAnyLevelPC = Pattern.compile("^(" + allDistrict + ").*",Pattern.CASE_INSENSITIVE);
public static final Pattern areaFromAnyLevelPC = Pattern.compile("^(" + area + ").*",Pattern.CASE_INSENSITIVE);
public static final Pattern LondonExtraDigitFormatSectorLevel = Pattern.compile("^" + "("+ stdDistrict+ ")"+ "[a-z]\\s" + "(" + std2ndPart +")"+ "$",Pattern.CASE_INSENSITIVE);
public static final Pattern isArea = Pattern.compile("^(" + area + ")$",Pattern.CASE_INSENSITIVE);
public static final Pattern isDistrict = Pattern.compile("^(" + allDistrict + ")$",Pattern.CASE_INSENSITIVE);
public static final Pattern isSector = Pattern.compile("^(" + allSector + ")$",Pattern.CASE_INSENSITIVE);
public static final Pattern isUnit = Pattern.compile("^(" + allDistrict + ")\\s*(" + std2ndPart + ")$",Pattern.CASE_INSENSITIVE);
/**
* Match a UK unit postcode only, with or without a space, with groups that give the before space and after space parts separately.
*/
public static final Pattern unitWithWithoutSpaceGroupedForSpace = Pattern.compile("([a-z][a-z]?\\d\\d?[a-z]?)\\s*(\\d[a-z][a-z])", Pattern.CASE_INSENSITIVE);
public static String standardisePostcode(String s, boolean removeExtraDistrictLetter){
// run basic standardise
s = Strings.std(s);
// if its a UK unit postcode then ensure its got the space
Matcher ukUnit = unitWithWithoutSpaceGroupedForSpace.matcher(s);
if(ukUnit.matches()){
s = ukUnit.group(1) + " " + ukUnit.group(2);
}
// remove the extra postcode digits used for some areas in London
if(removeExtraDistrictLetter){
Matcher special = UKPostcodes.LondonExtraDigitFormatSectorLevel.matcher(s);
if(special.find()){
s = special.group(1) + " " + special.group(2);
}
}
// ensure one space between the last 3 characters and the first, if its a unit pc
Matcher unitMatcher= UKPostcodes.isUnit.matcher(s);
if(unitMatcher.matches()){
s = unitMatcher.group(1) + " " + unitMatcher.group(2);
}
// should be upper case
s = s.toUpperCase();
return s;
}
public enum UKPostcodeLevel{
Area,
District,
Sector,
Unit
}
public static void main(String[]args){
for(String s : new String[]{"LE119FZ" , "LE11 9FZ" , "B28BQ" , "B2A8TG"}){
Matcher matcher = UKPostcodes.unitWithWithoutSpaceGroupedForSpace.matcher(s);
if(matcher.matches()){
System.out.println(s + " -> " + matcher.group(1) + " " + matcher.group(2));
}else{
System.out.println(s + " -> no match");
}
}
}
}