package me.osm.gazetteer.web.api;
import static me.osm.gazetteer.web.api.utils.RequestUtils.getDoubleHeader;
import static me.osm.gazetteer.web.api.utils.RequestUtils.getList;
import static me.osm.gazetteer.web.api.utils.RequestUtils.getSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import me.osm.gazetteer.web.ESNodeHolder;
import me.osm.gazetteer.web.GazetteerWeb;
import me.osm.gazetteer.web.api.meta.Endpoint;
import me.osm.gazetteer.web.api.meta.Parameter;
import me.osm.gazetteer.web.api.query.Query;
import me.osm.gazetteer.web.api.query.QueryAnalyzer;
import me.osm.gazetteer.web.api.search.SearchBuilder;
import me.osm.gazetteer.web.api.utils.APIUtils;
import me.osm.gazetteer.web.api.utils.BuildSearchQContext;
import me.osm.gazetteer.web.api.utils.Paginator;
import me.osm.gazetteer.web.api.utils.RequestUtils;
import me.osm.gazetteer.web.imp.IndexHolder;
import me.osm.gazetteer.web.utils.OSMDocSinglton;
import me.osm.osmdoc.model.Feature;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.lucene.search.function.CombineFunction;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.OrFilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.sort.SortBuilders;
import org.json.JSONArray;
import org.json.JSONObject;
import org.restexpress.Request;
import org.restexpress.Response;
import org.restexpress.domain.metadata.UriMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.inject.Inject;
public class SearchAPI implements DocumentedApi {
/**
* OSM Doc hierarchy name.
* */
public static final String HIERARCHY_CODE_HEADER = "hierarchy";
/**
* Create strict query.
* Default value is false.
* */
public static final String STRICT_SEARCH_HEADER = "strict";
/**
* Explain search results or not
* (<code>true<code> for explain)
* default is false
* */
public static final String EXPLAIN_HEADER = "explain";
/**
* Querry string
* */
public static final String Q_HEADER = "q";
/**
* Type of feature. [adrpnt, poipnt etc]
* */
public static final String TYPE_HEADER = "type";
/**
* Include or not object full geometry
* (<code>true<code> to include)
* default is false
* */
public static final String FULL_GEOMETRY_HEADER = "full_geometry";
/**
* Search inside given BBOX
* west, south, east, north'
* */
public static String BBOX_HEADER = "bbox";
/**
* Look for poi of exact types
* */
public static final String POI_CLASS_HEADER = "poiclass";
/**
* Look for poi of exact types (from hierarchy branch)
* */
public static final String POI_GROUP_HEADER = "poigroup";
/**
* Poi tag filters
* */
public static final String POI_TAGS_FILTER_HEADER = "tag_filters";
/**
* Latitude of map center
* */
public static final String LAT_HEADER = "lat";
/**
* Longitude of map center
* */
public static final String LON_HEADER = "lon";
/**
* Features id's of higher objects to filter results.
* Array members will be added using OR
* */
public static final String REFERENCES_HEADER = "filter";
/**
* Use it, if you have separate addresses parts texts, to search over.
* Not Supported yet.
* */
public static final String PARTS_HEADER = "parts";
/**
* Don't search for POIs
* */
public static final String ADDRESSES_ONLY_HEADER = "only_address";
/**
* How many details should contains answer.
* */
public static final String ANSWER_DETALIZATION_HEADER = "detalization";
private static final Logger log = LoggerFactory.getLogger(SearchAPI.class);
public SearchAPI() {
GazetteerWeb.injector().injectMembers(this);
}
@Inject
protected QueryAnalyzer queryAnalyzer;
@Inject
private Paginator paginator;
@Inject
private SearchBuilder searchBuilder;
private String apiDefaultHierarchy;
/**
* REST Express read routine method
*
* @param request REST Express request
* @param response REST Express response
* */
public JSONObject read(Request request, Response response) throws IOException {
return read(request, response, false);
}
/**
* Parse request, create and execute query, encode and return results.
*
* @param request RESTExpress request
* @param response RestExpress response
* @param resendedAfterFail shows that it is a second request, sent after strict request failed
*
* @return Search results encoded with {@link JSONObject}}
* */
public JSONObject read(Request request, Response response, boolean resendedAfterFail)
throws IOException {
boolean explain = "true".equals(request.getHeader(EXPLAIN_HEADER));
String querryString = StringUtils.stripToNull(request.getHeader(Q_HEADER));
Set<String> types = getSet(request, TYPE_HEADER);
apiDefaultHierarchy = GazetteerWeb.osmdocProperties().getApiDefaultHierarchy();
String hname = request.getHeader(HIERARCHY_CODE_HEADER);
if (hname == null) {
hname = apiDefaultHierarchy;
}
Set<String> poiClass = getSet(request, POI_CLASS_HEADER);
addPOIGroups(request, poiClass, hname);
Double lat = getDoubleHeader(LAT_HEADER, request);
Double lon = getDoubleHeader(LON_HEADER, request);
Set<String> refs = getSet(request, REFERENCES_HEADER);
boolean strictRequested = RequestUtils.getBooleanHeader(request, STRICT_SEARCH_HEADER, false);
boolean fullGeometry = RequestUtils.getBooleanHeader(request, FULL_GEOMETRY_HEADER, false);
boolean addressesOnly = RequestUtils.getBooleanHeader(request, ADDRESSES_ONLY_HEADER, false);
AnswerDetalization detalization = RequestUtils.getEnumHeader(request,
ANSWER_DETALIZATION_HEADER, AnswerDetalization.class, AnswerDetalization.FULL);
JSONObject poiTagFilters = null;
String poiTagFiltersString = request.getHeader(POI_TAGS_FILTER_HEADER);
if(StringUtils.isNotEmpty(poiTagFiltersString)) {
poiTagFilters = new JSONObject(poiTagFiltersString);
}
List<String> bbox = getList(request, BBOX_HEADER);
try {
JSONObject answer = internalSearch(request, response,
resendedAfterFail, explain, querryString, types, poiClass,
lat, lon, refs, strictRequested, fullGeometry,
addressesOnly, detalization, bbox, poiTagFilters);
return answer;
}
catch (Exception e) {
e.printStackTrace();
response.setException(e);
response.setResponseCode(500);
return null;
}
}
public JSONObject internalSearch(
boolean explain, String querryString,
Set<String> types, Set<String> poiClass, Double lat, Double lon,
Set<String> refs, boolean strict, boolean fullGeometry,
boolean addressesOnly, AnswerDetalization detalization,
List<String> bbox, JSONObject poiTagFilters) throws IOException {
if(types == null) {
types = new HashSet<>();
}
if(poiClass == null) {
poiClass = new HashSet<>();
}
if(refs == null) {
refs = new HashSet<>();
}
if(bbox == null) {
bbox = new ArrayList<>();
}
if(detalization == null) {
detalization = AnswerDetalization.FULL;
}
boolean resended = false;
if(!strict) {
resended = true;
}
return internalSearch(
null, null, resended, explain, querryString, types,
poiClass, lat, lon, refs, strict, fullGeometry,
addressesOnly, detalization, bbox, poiTagFilters);
}
private JSONObject internalSearch(Request request, Response response,
boolean resendedAfterFail, boolean explain, String querryString,
Set<String> types, Set<String> poiClass, Double lat, Double lon,
Set<String> refs, boolean strictRequested, boolean fullGeometry,
boolean addressesOnly, AnswerDetalization detalization,
List<String> bbox, JSONObject poiTagFilters) throws IOException {
if(querryString == null && poiClass.isEmpty() && types.isEmpty() && refs.isEmpty()) {
return null;
}
Query query = queryAnalyzer.getQuery(querryString);
List<JSONObject> poiType = null;
//don't look for poi type if we search only for addresses
if(query != null && !addressesOnly) {
poiType = findPoiClass(query);
}
// Strict if strict is requested or this query wasn't yet been resended after fail
boolean strict = strictRequested ? true : !resendedAfterFail;
SearchRequestBuilder searchRequest = buildSearchRequest(bbox, strict,
explain, types, poiClass, poiTagFilters, addressesOnly,
lat, lon, refs, query);
log.trace("Search request: {}", searchRequest);
if (request != null) {
paginator.patchSearchQ(request, searchRequest);
}
try {
SearchResponse searchResponse = searchRequest.execute().actionGet();
if(request != null && response != null) {
if(searchResponse.getHits().getHits().length == 0) {
if(GazetteerWeb.config().isReRestrict() && !strictRequested && !resendedAfterFail) {
return read(request, response, true);
}
}
}
if(explain) {
for(SearchHit hit : searchResponse.getHits().getHits()) {
log.debug("{} strict={} result={} \nexplanations={}",
query.toString(), strict, hit.getSource().get("search"), hit.explanation().toString());
}
}
JSONObject answer = APIUtils.encodeSearchResult(
searchResponse, fullGeometry, explain, detalization);
answer.put("request", StringEscapeUtils.escapeHtml4(querryString));
if(poiType != null && !poiType.isEmpty()) {
answer.put("matched_type", new JSONArray(poiType));
}
answer.put("strict", strict);
if (request != null) {
paginator.patchAnswer(request, answer);
}
return answer;
}
catch(SearchPhaseExecutionException spe) {
log.error("Failed to execute query: {}", searchRequest, spe);
throw spe;
}
}
/**
* Create search request
*
* @param bbox
* @param strict create a strict request
* @param explain add query results explanations
* @param types restrict query with types (poipnt, adrpnt and so on)
* @param poiClass restrict query with poi classes
* @param poiTagFilters
* @param lat latitude of user's viewport center
* @param lon longitude of user's viewport center
* @param refs restrict request with refs
* @param query analyzed query
* @param addressesOnly don't search for POIs
*
* @return ElasticSearch SearchRequest
* */
public SearchRequestBuilder buildSearchRequest(List<String> bbox, boolean strict,
boolean explain, Set<String> types, Set<String> poiClass,
JSONObject poiTagFilters, boolean addressesOnly, Double lat, Double lon,
Set<String> refs, Query query) {
BoolQueryBuilder q = null;
BuildSearchQContext buildSearchQContext = new BuildSearchQContext();
if(query != null) {
q = getSearchQuerry(query, strict, buildSearchQContext);
}
else {
q = QueryBuilders.boolQuery();
}
if(!types.isEmpty()) {
q.must(QueryBuilders.termsQuery("type", types));
}
if(!poiClass.isEmpty()) {
q.must(QueryBuilders.termsQuery("poi_class", poiClass));
}
if(poiTagFilters != null) {
q.must(buildPoiTagsFilter(poiTagFilters));
}
if(addressesOnly) {
q.mustNot(QueryBuilders.termQuery("type", "poipnt"));
}
// if poiClass.isEmpty() try to search over objcts names
// Otherwise q should contains filters over poi types
QueryBuilder qb = null;
if(poiClass.isEmpty() && !addressesOnly) {
qb = QueryBuilders.filteredQuery(q, createPoiFilter(query));
}
else {
qb = q;
}
boolean sortByHNVariants = false;
if(buildSearchQContext.getHousenumberVariants() != null && strict) {
sortByHNVariants = buildSearchQContext.getHousenumberVariants().size() == 1;
}
// Do not rescore with distance if we are in debug mode,
// because rescore will erase match query scoring
if(!explain && (lat != null || lon != null || poiClass != null)) {
qb = rescore(qb, lat, lon, poiClass, sortByHNVariants);
}
if(!bbox.isEmpty() && bbox.size() == 4) {
qb = addBBOXRestriction(qb, bbox);
}
if(!refs.isEmpty()) {
qb = addRefsRestriction(qb, refs);
}
Client client = ESNodeHolder.getClient();
SearchRequestBuilder searchRequest = client
.prepareSearch("gazetteer").setTypes(IndexHolder.LOCATION)
.setQuery(qb)
.setExplain(explain);
searchRequest.addSort(SortBuilders.scoreSort());
searchRequest.setFetchSource(true);
return searchRequest;
}
@SuppressWarnings("unchecked")
private QueryBuilder buildPoiTagsFilter(JSONObject poiTagFilters) {
BoolQueryBuilder res = QueryBuilders.boolQuery();
for(String key : (Set<String>)poiTagFilters.keySet()) {
if("opening_hours".equals(key)) {
JSONArray val = poiTagFilters.optJSONArray(key);
if(val.length() > 0 && "24_7".equals(val.getString(0))) {
res.must(QueryBuilders.termQuery("more_tags.opening_hours.24_7", true));
}
}
else {
Object val = poiTagFilters.get(key);
if(val instanceof JSONArray) {
JSONArray valArray = ((JSONArray)val);
List<String> options = new ArrayList<>();
for(int i = 0; i < valArray.length(); i++ ) {
options.add(valArray.getString(i));
}
res.must(QueryBuilders.termsQuery("more_tags." + key, options));
}
else {
res.must(QueryBuilders.termQuery("more_tags." + key, true));
}
}
}
return res;
}
/**
* Search for poi classes
*
* @param analyzed query
*
* @return List of matched poi classes
* */
protected List<JSONObject> findPoiClass(Query query) {
Client client = ESNodeHolder.getClient();
String qs = query.required().woNumbers().toString();
SearchRequestBuilder searchRequest = client.prepareSearch("gazetteer").setTypes(IndexHolder.POI_CLASS)
.setQuery(QueryBuilders.multiMatchQuery(qs, "translated_title", "keywords"));
SearchHit[] hits = searchRequest.get().getHits().getHits();
List<JSONObject> result = new ArrayList<JSONObject>(hits.length);
if(hits.length > 0) {
result.add(new JSONObject(hits[0].sourceAsString()));
}
return result;
}
/**
* Restrict query with provided reference.
*
* Allows you to search only within certain boundaries.
* References will be conjuncted using OR.
* Adds terms filter over 'refs' field.
*
* @param qb parent query builder
* @param refs set of references
*
* @return modified query
* */
private QueryBuilder addRefsRestriction(QueryBuilder qb, Set<String> refs) {
qb = QueryBuilders.filteredQuery(qb, FilterBuilders.termsFilter("refs", refs));
return qb;
}
/**
* Filter to search over pois' names.
*
* Looks over name.text, poi_class and poi_class_trans.
*
* @param querry analyzed query
*
* @return filter to search over pois
* */
private FilterBuilder createPoiFilter(Query querry) {
// Мне нужны только те пои, для которых совпал name и/или тип.
BoolQueryBuilder filterQ = QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("type", "poipnt"))
.must(QueryBuilders.multiMatchQuery(querry.toString(), "name.text", "poi_class", "poi_class_trans"));
OrFilterBuilder orFilter = FilterBuilders.orFilter(
FilterBuilders.queryFilter(filterQ),
FilterBuilders.notFilter(FilterBuilders.termsFilter("type", "poipnt")));
return orFilter;
}
/**
* Add bounding box restriction to main query.
* Will fails, if strings can't be parsed as double.
*
* @param qb main query builde
* @param bbox list of bbox coordinates
*
* @return restricted query
* */
private QueryBuilder addBBOXRestriction(QueryBuilder qb, List<String> bbox) {
qb = QueryBuilders.filteredQuery(qb,
FilterBuilders.geoBoundingBoxFilter("center_point")
.bottomLeft(Double.parseDouble(bbox.get(1)), Double.parseDouble(bbox.get(0)))
.topRight(Double.parseDouble(bbox.get(3)), Double.parseDouble(bbox.get(2))));
return qb;
}
/**
* Setup scoring.
*
* Replace default scoring, with combination of
* original score, geo-distance and weight (object type)
*
* @param q original query
* @param lat latitude of center for geo-distance scoring
* @param lon longitude of center for geo-distance scoring
* @param poiClass poi classes
*
* @return Builder with rescored query
* */
private static QueryBuilder rescore(QueryBuilder q, Double lat,
Double lon, Set<String> poiClass, boolean shortHNFirst) {
FunctionScoreQueryBuilder qb =
QueryBuilders.functionScoreQuery(q)
.scoreMode("avg")
.boostMode(CombineFunction.REPLACE);
if(lat != null && lon != null) {
qb.add(ScoreFunctionBuilders.linearDecayFunction("center_point",
new GeoPoint(lat, lon), "5km").setWeight(poiClass.isEmpty() ? 5 : 25));
}
qb.add(ScoreFunctionBuilders.fieldValueFactorFunction("weight").setWeight(0.005f));
qb.add(ScoreFunctionBuilders.scriptFunction("score", "expression").setWeight(10));
if(shortHNFirst) {
String script = "def s = doc['housenumber'].values.size(); \n return (s == 0 ? 1 : 100/s)";
qb.add(ScoreFunctionBuilders.scriptFunction(script, "groovy").setWeight(0.1f));
}
return qb;
}
/**
* Add all poi_classes from poi group
*
* @param request REST Express request
* @param poiClass set of strings where parsed poi classes will be added
* @param hname name of osm-doc hierarchy which contains group and poi classes
* */
public static void addPOIGroups(Request request, Set<String> poiClass, String hname) {
for(String s : getSet(request, POI_GROUP_HEADER)) {
Collection<? extends Feature> hierarcyBranch =
OSMDocSinglton.get().getReader().getHierarcyBranch(hname, s);
if(hierarcyBranch != null) {
for(Feature f : hierarcyBranch) {
poiClass.add(f.getName());
}
}
}
}
/**
* Add commonSearchQ result into main query
* used for override from subclasses
*
* @param query analyzed user query
* @param strict create strict request
* @param buildSearchQContext context (will be filled with some additional info)
*
* @return query builder
* */
public BoolQueryBuilder getSearchQuerry(Query query, boolean strict, BuildSearchQContext buildSearchQContext) {
BoolQueryBuilder resultQuery = QueryBuilders.boolQuery();
searchBuilder.mainSearchQ(query, resultQuery, strict, buildSearchQContext);
return resultQuery;
}
@Override
public Endpoint getMeta(UriMetadata uriMetadata) {
Endpoint meta = new Endpoint(uriMetadata.getPattern(), "locations search",
"Searches for objects.");
meta.getUrlParameters().add(new Parameter(Q_HEADER, "Querry text"));
meta.getUrlParameters().add(new Parameter(STRICT_SEARCH_HEADER,
"Create strict query. Default value is false."));
meta.getUrlParameters().add(new Parameter(EXPLAIN_HEADER,
"Explain search results score. Default value is false."));
meta.getUrlParameters().add(new Parameter(TYPE_HEADER,
"Type of feature. [adrpnt, poipnt, hghnet, plcpnt, admbnd]"
+ " Multiple values are combined via OR."));
meta.getUrlParameters().add(new Parameter(FULL_GEOMETRY_HEADER,
"Include or not full geometry of object. Default is not include."));
meta.getUrlParameters().add(new Parameter(BBOX_HEADER,
"Search inside given BBOX only. [west, south, east, north]"));
meta.getUrlParameters().add(new Parameter(POI_CLASS_HEADER,
"Look for pois of exact types. May contains multiple values. "
+ "Codes are searched among poi hierarchy provided via '" + HIERARCHY_CODE_HEADER + "'"));
meta.getUrlParameters().add(new Parameter(POI_GROUP_HEADER,
"Look for pois of exact types "
+ "(Groups will be expanded and merged with '" + POI_CLASS_HEADER + "' header). "
+ "May contains multiple values. "
+ "Codes are searched among poi hierarchy provided via '" + HIERARCHY_CODE_HEADER + "'"));
meta.getUrlParameters().add(new Parameter(HIERARCHY_CODE_HEADER,
"Code of OSM Doc hierarchy. Used for POI types search."));
meta.getUrlParameters().add(new Parameter(LAT_HEADER,
"Latitude of map center, used for distance scoring. "
+ "Switch off distance score if absent."));
meta.getUrlParameters().add(new Parameter(LON_HEADER,
"Longitude of map center, used for distance scoring. "
+ "Switch off distance score if absent."));
meta.getUrlParameters().add(new Parameter(REFERENCES_HEADER,
"Features id's of higher objects to filter results. "
+ "In other words will search over those object, "
+ "which have provided boundaries or street as part of address. "
+ "Array members will be added using OR."
+ "Switch off distance score if absent."));
meta.getUrlParameters().add(new Parameter(ADDRESSES_ONLY_HEADER,
"Search only for addresses, don't search for POIs."));
meta.getUrlParameters().add(new Parameter(ANSWER_DETALIZATION_HEADER,
"How many details should contains answer. full/short"));
return meta;
}
}