package se.kodapan.osm.sweden.ext.se.posten.postnummer;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.cyberneko.html.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.xpath.*;
import java.io.*;
import java.net.URLEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author kalle
* @since 2013-01-02 00:07
*/
public class PostenPostnummerService {
private static Logger log = LoggerFactory.getLogger(PostenPostnummerService.class);
public PostenPostnummerService() throws Exception {
}
public void open() throws Exception {
}
public void close() throws Exception {
}
private static String defaultUserAgent = "Unnamed instance of " + PostenPostnummerService.class.getName() + ", https://github.com/karlwettin/osm-common/";
private String userAgent = defaultUserAgent;
private HttpClient client = new DefaultHttpClient();
private DOMParser p = new DOMParser();
private XPath xpath = XPathFactory.newInstance().newXPath();
private XPathExpression gatunamnExpression = xpath.compile("parent::TR/TD[1]");
private XPathExpression husnummerserieExpression = xpath.compile("parent::TR/TD[2]");
private XPathExpression postnummerExpression = xpath.compile("//TBODY/TR/TD[3]");
private XPathExpression postortExpression = xpath.compile("parent::TR/TD[4]");
private Pattern husnummerseriePattern = Pattern.compile("([0-9]+)\\W*-\\W*([0-9]+)");
public PostenPostnummerServiceResponse queryStreetAddress(String streetAddress, String city) throws Exception {
StringBuilder url = new StringBuilder();
url.append("http://www.posten.se/soktjanst/postnummersok/resultat.jspv?");
if (streetAddress != null) {
url.append("gatunamn=");
url.append(URLEncoder.encode(streetAddress, "iso-8859-1").replaceAll("\\+", "%20"));
url.append("&");
}
if (city != null) {
url.append("po=");
url.append(URLEncoder.encode(city, "iso-8859-1").replaceAll("\\+", "%20"));
url.append("&");
}
HttpGet get = new HttpGet(url.toString());
log.info("Executing street address query: " + url);
return parseResponse(get);
}
public PostenPostnummerServiceResponse queryPostnummer(String postnummer) throws Exception {
StringBuilder url = new StringBuilder();
url.append("http://www.posten.se/soktjanst/postnummersok/resultat.jspv?");
url.append("pnr=");
url.append(URLEncoder.encode(postnummer, "iso-8859-1"));
url.append("&");
HttpGet get = new HttpGet(url.toString());
log.info("Executing postnummer query: " + url);
return parseResponse(get);
}
private PostenPostnummerServiceResponse parseResponse(HttpGet get) throws IOException, SAXException, XPathExpressionException {
if (defaultUserAgent.equals(userAgent)) {
throw new NullPointerException("PostenPostnummerService HTTP header User-Agent not set!");
}
get.setHeader("User-Agent", userAgent);
HttpResponse httpResponse = client.execute(get);
StringWriter buffer = new StringWriter();
InputStream httpResponseStream = httpResponse.getEntity().getContent();
try {
IOUtils.copy(new InputStreamReader(httpResponseStream, "iso-8859-1"), buffer);
} finally {
httpResponse.getEntity().getContent().close();
}
log.debug("Postnummer response received");
List<PostenPostnummerServiceRecord> records = new ArrayList<PostenPostnummerServiceRecord>();
PostenPostnummerServiceResponse response = new PostenPostnummerServiceResponse(records);
if (buffer.toString().contains("Sökningen gav ett stort antal träffar")) {
response.setLimitedResults(true);
}
p.setFeature("http://xml.org/sax/features/namespaces", false);
p.parse(new InputSource(new StringReader(buffer.toString())));
NodeList results = (NodeList) postnummerExpression.evaluate(p.getDocument(), XPathConstants.NODESET);
if (results.getLength() > 0) {
for (int i = 0; i < results.getLength(); i++) {
PostenPostnummerServiceRecord record = new PostenPostnummerServiceRecord();
records.add(record);
record.setPostnummer(results.item(i).getTextContent().replaceAll("[^0-9]+", ""));
Matcher matcher = husnummerseriePattern.matcher(husnummerserieExpression.evaluate(results.item(i)));
if (matcher.find()) {
record.setStarthusnummer(Integer.valueOf(matcher.group(1)));
record.setSluthusnummer(Integer.valueOf(matcher.group(2)));
}
record.setGatunamn(gatunamnExpression.evaluate(results.item(i)));
record.setPostort(postortExpression.evaluate(results.item(i)));
}
}
log.debug("Postnummer response resolved");
return response;
}
private static char[] resolverAlphabet = new char[]{
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'å', 'ä', 'ö',
};
/**
* Creates a response that contains all records (house number segements of streets, poboxes etc) for a given postnummer.
* @param postnummer
* @return
* @throws Exception
*/
public PostenPostnummerServiceResponse resolveAllServiceRecordsInPostnummer(String postnummer) throws Exception {
PostenPostnummerServiceResponse response = queryPostnummer(postnummer);
if (!response.getRecords().isEmpty() && response.isLimitedResults()) {
Set<PostenPostnummerServiceRecord> records = new LinkedHashSet<PostenPostnummerServiceRecord>(1000);
for (char character : resolverAlphabet) {
resolvePostnummer(String.valueOf(character), records, response.getRecords().get(0).getPostort(), response.getRecords().get(0).getPostnummer());
}
response = new PostenPostnummerServiceResponse();
response.setRecords(new ArrayList<PostenPostnummerServiceRecord>(records));
}
return response;
}
private void resolvePostnummer(String streetNamePrefix, Set<PostenPostnummerServiceRecord> records, String postort, String postnummer) throws Exception {
PostenPostnummerServiceResponse partsResponse = queryStreetAddress(streetNamePrefix.trim(), postort);
for (PostenPostnummerServiceRecord record : partsResponse.getRecords()) {
if (postnummer.equals(record.getPostnummer())) {
records.add(record);
}
}
if (partsResponse.isLimitedResults()) {
for (char character : resolverAlphabet) {
resolvePostnummer(streetNamePrefix + character, records, postort, postnummer);
}
if (!streetNamePrefix.endsWith(" ")) {
resolvePostnummer(streetNamePrefix + " ", records, postort, postnummer);
}
}
}
public Set<String> gatherAllPostorter() throws Exception {
return gatherAllPostnummerByPostorter().keySet();
}
public Map<String, Set<String>> gatherAllPostnummerByPostorter() throws Exception {
return gatherAllPostnummerByPostorter(10000, 99999);
}
public Map<String, Set<String>> gatherAllPostnummerByPostorter(int start, int end) throws Exception {
log.info("Gathering all postnummer by postorter...");
Map<String, Set<String>> postnummerByPostorter = new HashMap<String, Set<String>>();
for (int i=start; i<=end; i++) {
PostenPostnummerServiceResponse response = queryPostnummer(String.valueOf(i));
for (PostenPostnummerServiceRecord record : response.getRecords()) {
Set<String> postnummer = postnummerByPostorter.get(record.getPostort());
if (postnummer == null) {
postnummer = new HashSet<String>();
postnummerByPostorter.put(record.getPostort(), postnummer);
}
postnummer.add(record.getPostnummer());
}
}
return postnummerByPostorter;
}
public String getUserAgent() {
return userAgent;
}
public void setUserAgent(String userAgent) {
this.userAgent = userAgent;
}
}