package me.osm.gazetter.addresses.impl;
import static me.osm.gazetter.addresses.AddressesLevelsMatcher.ADDR_LVL;
import static me.osm.gazetter.addresses.AddressesLevelsMatcher.ADDR_LVL_SIZE;
import static me.osm.gazetter.addresses.AddressesLevelsMatcher.ADDR_NAME;
import static me.osm.gazetter.addresses.AddressesLevelsMatcher.ADDR_NAMES;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import me.osm.gazetter.addresses.AddrLevelsComparator;
import me.osm.gazetter.addresses.AddrLevelsSorting;
import me.osm.gazetter.addresses.AddrTextFormatter;
import me.osm.gazetter.addresses.AddressesLevelsMatcher;
import me.osm.gazetter.addresses.AddressesParser;
import me.osm.gazetter.addresses.AddressesSchemesParser;
import me.osm.gazetter.addresses.AddressesUtils;
import me.osm.gazetter.addresses.Constants;
import me.osm.gazetter.addresses.sorters.CityStreetHNComparator;
import me.osm.gazetter.addresses.sorters.HNStreetCityComparator;
import me.osm.gazetter.addresses.sorters.StreetHNCityComparator;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;
/**
* Default implementation for
* {@link AddressesParser}
* */
public class AddressesParserImpl implements AddressesParser {
//DI meaning dependencies
protected AddressesSchemesParser schemesParser;
protected AddressesLevelsMatcher levelsMatcher;
protected AddrTextFormatter textFormatter;
protected AddrLevelsComparator addrLevelComparator;
protected Set<String> skipInFullText;
protected List<String> cityBoundaries;
protected boolean findLangs;
protected static final Set<String> langCodes = new HashSet<>(Arrays.asList(Locale.getISOLanguages()));
protected static final Set<String> hashedBoundariesLelvels = new HashSet<String>();
static {
hashedBoundariesLelvels.addAll(Arrays.asList(
"place:hamlet",
"place:village",
"place:town",
"place:city",
"boundary:8",
"boundary:7",
"boundary:6",
"boundary:5",
"boundary:4",
"boundary:3",
"boundary:2"
));
}
/**
* Create parser with parameters
*
* @param schemesParser
* @param levelsMatcher
* @param textFormatter
* @param sorting
* @param skipInFullText
* @param findLangs
*/
public AddressesParserImpl(AddressesSchemesParser schemesParser,
AddressesLevelsMatcher levelsMatcher, AddrTextFormatter textFormatter,
AddrLevelsSorting sorting, Set<String> skipInFullText, boolean findLangs) {
this.schemesParser = schemesParser;
this.levelsMatcher = levelsMatcher;
this.textFormatter = textFormatter;
if(AddrLevelsSorting.HN_STREET_CITY == sorting) {
addrLevelComparator = new HNStreetCityComparator();
}
else if (AddrLevelsSorting.CITY_STREET_HN == sorting) {
addrLevelComparator = new CityStreetHNComparator();
}
else {
addrLevelComparator = new StreetHNCityComparator();
}
this.skipInFullText = skipInFullText;
this.cityBoundaries =
Arrays.asList("place:hamlet", "place:village", "place:town", "place:city", "boundary:8");
this.findLangs = findLangs;
}
/**
* Default implementation
*/
public AddressesParserImpl() {
this.cityBoundaries =
Arrays.asList("place:hamlet", "place:village", "place:town", "place:city", "boundary:8");
addrLevelComparator = new HNStreetCityComparator();
schemesParser = new AddressesSchemesParserImpl();
levelsMatcher = new AddressesLevelsMatcherImpl(
addrLevelComparator,
new NamesMatcherImpl(),
this.cityBoundaries);
textFormatter = new AddrTextFormatterImpl();
skipInFullText = new HashSet<>();
this.findLangs = false;
}
protected static final String ADDR_PARTS = "parts";
protected static final String ADDR_TEXT = "text";
protected static final String ADDR_TEXT_LONG = "longText";
protected static final String ADDR_FULL = "addr:full";
@Override
public JSONArray parse(JSONObject addrPoint, List<JSONObject> boundaries,
List<JSONObject> nearbyStreets, JSONObject nearestPlace,
JSONObject nearestNeighbour, JSONObject associatedStreet) {
Map<String, JSONObject> level2Boundary = new HashMap<String, JSONObject>();
for(JSONObject b : boundaries) {
String addrLevel = getAddrLevel(b);
if(addrLevel != null) {
level2Boundary.put(addrLevel, b);
}
}
JSONArray result = new JSONArray();
JSONObject properties = addrPoint.getJSONObject("properties");
List<JSONObject> addresses = schemesParser.parseSchemes(properties);
for(JSONObject addrRow : addresses) {
List<JSONObject> addrJsonRow = new ArrayList<>();
Set<String> matchedBoundaries = new HashSet<>();
JSONObject postCodeJSON = levelsMatcher.postCodeAsJSON(addrPoint, addrRow);
if(postCodeJSON != null) {
addrJsonRow.add(postCodeJSON);
}
addrJsonRow.add(levelsMatcher.hnAsJSON(addrPoint, addrRow));
JSONObject streetAsJSON = levelsMatcher.streetAsJSON(
addrPoint, addrRow, associatedStreet, nearbyStreets, hashBoundaries(boundaries));
if(streetAsJSON != null) {
addrJsonRow.add(streetAsJSON);
}
JSONObject quarterJSON = levelsMatcher.quarterAsJSON(addrPoint, addrRow, level2Boundary, nearestNeighbour);
if(quarterJSON != null) {
addrJsonRow.add(quarterJSON);
String bndryLNK = quarterJSON.optString("lnk");
if(!StringUtils.isEmpty(bndryLNK)) {
matchedBoundaries.add(bndryLNK);
}
}
JSONObject cityJSON = levelsMatcher.cityAsJSON(addrPoint, addrRow, level2Boundary,
nearestPlace, getAddrLevel(nearestPlace));
if(cityJSON != null) {
addrJsonRow.add(cityJSON);
String bndryLNK = cityJSON.optString("lnk");
if(!StringUtils.isEmpty(bndryLNK)) {
matchedBoundaries.add(bndryLNK);
}
}
for(JSONObject bndry : boundaries) {
String addrLevel = getAddrLevel(bndry);
if(addrLevel != null) {
Map<String, String> nTags = AddressesUtils.filterNameTags(bndry);
//skip unnamed
if(!nTags.containsKey(AddressesLevelsMatcher.ADDR_NAME)) {
continue;
}
//already added
if(matchedBoundaries.contains(bndry.getString("id"))) {
continue;
}
JSONObject addrLVL = new JSONObject();
addrLVL.put("lnk", bndry.getString("id"));
addrLVL.put(ADDR_LVL, addrLevel);
addrLVL.put(ADDR_LVL_SIZE, addrLevelComparator.getLVLSize(addrLevel));
addrLVL.put(ADDR_NAME, nTags.get(ADDR_NAME));
addrLVL.put(ADDR_NAMES, new JSONObject(nTags));
addrJsonRow.add(addrLVL);
}
}
result.put(createAddressRow(properties, addrRow, addrJsonRow));
}
return result;
}
protected JSONObject createAddressRow(JSONObject properties,
JSONObject addrRow, List<JSONObject> addrJsonRow) {
List<JSONObject> filtered = filterForFullText(addrJsonRow);
Set<String> langs = getLangs(filtered);
Collections.sort(filtered, addrLevelComparator);
Collections.sort(addrJsonRow, addrLevelComparator);
JSONObject fullAddressRow = new JSONObject();
fullAddressRow.put(ADDR_TEXT, textFormatter.joinNames(filtered, properties, null));
fullAddressRow.put(ADDR_TEXT_LONG, textFormatter.joinNames(addrJsonRow, properties, null));
fullAddressRow.put("langs", langs);
for(String lang : langs) {
fullAddressRow.put(ADDR_TEXT + ":" + lang, textFormatter.joinNames(filtered, properties, lang));
}
fullAddressRow.put(ADDR_PARTS, new JSONArray(addrJsonRow));
fullAddressRow.put(AddressesSchemesParser.ADDR_SCHEME,
addrRow.optString(AddressesSchemesParser.ADDR_SCHEME));
if(StringUtils.isNotBlank(properties.optString(ADDR_FULL))) {
fullAddressRow.put(ADDR_FULL, properties.optString(ADDR_FULL));
}
return fullAddressRow;
}
@SuppressWarnings("unchecked")
protected Set<String> getLangs(List<JSONObject> filtered) {
if(!this.findLangs) {
return Collections.emptySet();
}
Set<String> result = new HashSet<>();
for(JSONObject lvl : filtered) {
int lvlSize = lvl.optInt(AddressesLevelsMatcher.ADDR_LVL_SIZE);
if(lvlSize > Constants.HN_LVL_SIZE && lvlSize != Constants.POSTCODE_LVL_SIZE) {
JSONObject names = lvl.optJSONObject("names");
//streets lvl, init
if(lvlSize == Constants.STREET_LVL_SIZE) {
//if there is no translation for street - return;
if(names == null) {
return Collections.emptySet();
}
result.addAll(getLangsFromTags(names.keySet()));
}
if(lvlSize > Constants.STREET_LVL_SIZE) {
if(names == null) {
return Collections.emptySet();
}
Set<String> langs = getLangsFromTags(names.keySet());
Iterator<String> it = result.iterator();
while (it.hasNext()) {
if(!langs.contains(it.next())) {
it.remove();
}
}
if(result.isEmpty()) {
return result;
}
}
}
}
return result;
}
protected Set<String> getLangsFromTags(Set<String> keySet) {
Set<String> result = new HashSet<>();
for(String key : keySet) {
String[] split = StringUtils.split(key, ':');
if(split.length > 1) {
String lang = StringUtils.split(split[1], "-_")[0];
if(langCodes.contains(lang)) {
result.add(lang);
}
}
}
return result;
}
protected List<JSONObject> filterForFullText(List<JSONObject> addrJsonRow) {
List<JSONObject> list = new ArrayList<>(addrJsonRow);
Collections.sort(list, new Comparator<JSONObject>() {
@Override
public int compare(JSONObject o1, JSONObject o2) {
int s1 = o1.optInt(AddressesLevelsMatcher.ADDR_LVL_SIZE);
int s2 = o2.optInt(AddressesLevelsMatcher.ADDR_LVL_SIZE);
return Integer.compare(s1, s2);
}
});
JSONObject prevAddrLvl = null;
Iterator<JSONObject> iterator = list.iterator();
while(iterator.hasNext()) {
JSONObject lvl = iterator.next();
if(lvl.getInt(AddressesLevelsMatcher.ADDR_LVL_SIZE) > 55) {
boolean skip = skipInFullText.contains(lvl.optString(AddressesLevelsMatcher.ADDR_LVL));
if(skip || addrLlvlsMatch(prevAddrLvl, lvl)) {
iterator.remove();
}
else {
prevAddrLvl = lvl;
}
}
}
return list;
}
@SuppressWarnings("unchecked")
protected boolean addrLlvlsMatch(JSONObject prevAddrLvl, JSONObject lvl) {
if(prevAddrLvl == null || lvl == null) {
return false;
}
String name = prevAddrLvl.optString("name", null);
String name2 = lvl.optString("name", null);
if(name != null && name2 != null) {
if(StringUtils.containsIgnoreCase(name2, name)) {
return true;
}
JSONObject optNames = lvl.optJSONObject("names");
if(optNames != null) {
for(String key : (Set<String>) optNames.keySet()) {
String optName = optNames.getString(key);
if(StringUtils.containsIgnoreCase(optName, name)) {
return true;
}
}
}
}
return false;
}
@Override
public String getAddrLevel(JSONObject obj) {
if(obj != null) {
JSONObject properties = obj.optJSONObject("properties");
if(properties == null) {
properties = obj;
}
String pk = "place:" + properties.optString("place");
if(addrLevelComparator.supports(pk)) {
return pk;
}
String bk = "boundary:" + properties.optString("admin_level").trim();
if(addrLevelComparator.supports(bk)) {
return bk;
}
if(properties.has("highway")) {
return "street";
}
}
return null;
}
@Override
public JSONObject boundariesAsArray(JSONObject subj, List<JSONObject> input) {
List<JSONObject> result = new ArrayList<>();
for(JSONObject bndry : input) {
String addrLevel = getAddrLevel(bndry);
if(addrLevel != null) {
JSONObject addrLVL = new JSONObject();
addrLVL.put("lnk", bndry.getString("id"));
Map<String, String> nTags = AddressesUtils.filterNameTags(bndry);
if(!nTags.containsKey(ADDR_NAME)) {
continue;
}
addrLVL.put(ADDR_LVL, addrLevel);
addrLVL.put(ADDR_LVL_SIZE, addrLevelComparator.getLVLSize(addrLevel));
addrLVL.put(ADDR_NAME, nTags.get(ADDR_NAME));
addrLVL.put(ADDR_NAMES, new JSONObject(nTags));
result.add(addrLVL);
}
}
{
String addrLevel = getAddrLevel(subj);
if(addrLevel != null) {
JSONObject addrLVL = new JSONObject();
addrLVL.put("lnk", subj.getString("id"));
Map<String, String> nTags = AddressesUtils.filterNameTags(subj);
if(nTags.containsKey(ADDR_NAME)) {
addrLVL.put(ADDR_LVL, addrLevel);
addrLVL.put(ADDR_LVL_SIZE, addrLevelComparator.getLVLSize(addrLevel));
addrLVL.put(ADDR_NAME, nTags.get(ADDR_NAME));
addrLVL.put(ADDR_NAMES, new JSONObject(nTags));
result.add(addrLVL);
}
}
}
JSONObject fullAddressRow = createBoundaryAddrRow(result, subj);
fullAddressRow.put("boundariesHash", hashBoundaries(input));
return fullAddressRow;
}
/**
* Calculate unique number for boundaries
*
* @param input boundaries
* @return hash
* */
public int hashBoundaries(List<JSONObject> input) {
if(input != null) {
StringBuilder hashString = new StringBuilder();
for(JSONObject bndry : input) {
String addrLevel = getAddrLevel(bndry);
if(addrLevel != null && hashedBoundariesLelvels.contains(addrLevel)) {
hashString.append(bndry.getString("id"));
}
}
if(hashString.length() > 0) {
return hashString.toString().hashCode();
}
}
return 0;
}
protected JSONObject createBoundaryAddrRow(List<JSONObject> result, JSONObject subj) {
List<JSONObject> filtered = filterForFullText(result);
JSONObject fullAddressRow = new JSONObject();
Collections.sort(result, addrLevelComparator);
Set<String> langs = getLangs(filtered);
Collections.sort(filtered, addrLevelComparator);
fullAddressRow.put(ADDR_TEXT, textFormatter.joinBoundariesNames(filtered, null));
fullAddressRow.put(ADDR_TEXT_LONG, textFormatter.joinBoundariesNames(result, null));
fullAddressRow.put(ADDR_PARTS, new JSONArray(result));
fullAddressRow.put("langs", langs);
for(String lang : langs) {
fullAddressRow.put(ADDR_TEXT + ":" + lang, textFormatter.joinBoundariesNames(filtered, lang));
}
return fullAddressRow;
}
}