/* * Copyright 2011 Global Biodiversity Information Facility (GBIF) * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gbif.checklistbank.index.service; import org.gbif.api.model.checklistbank.search.NameUsageSearchParameter; import org.gbif.api.model.checklistbank.search.NameUsageSearchRequest; import org.gbif.api.model.checklistbank.search.NameUsageSuggestRequest; import org.gbif.api.model.common.paging.Pageable; import org.gbif.api.model.common.search.FacetedSearchRequest; import org.gbif.api.model.common.search.SearchRequest; import org.gbif.api.util.VocabularyUtils; import org.gbif.api.vocabulary.Rank; import org.gbif.common.search.solr.QueryUtils; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import com.google.common.base.MoreObjects; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.FacetParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.gbif.checklistbank.index.service.SolrMapping.FACET_MAPPING; import static org.gbif.common.search.solr.QueryUtils.DEFAULT_FACET_COUNT; import static org.gbif.common.search.solr.QueryUtils.DEFAULT_FACET_SORT; import static org.gbif.common.search.solr.QueryUtils.NOT_OP; import static org.gbif.common.search.solr.QueryUtils.PARAMS_JOINER; import static org.gbif.common.search.solr.QueryUtils.PARAMS_OR_JOINER; import static org.gbif.common.search.solr.QueryUtils.perFieldParamName; import static org.gbif.common.search.solr.QueryUtils.toParenthesesQuery; import static org.gbif.common.search.solr.QueryUtils.toPhraseQuery; import static org.gbif.common.search.solr.SolrConstants.BLANK; import static org.gbif.common.search.solr.SolrConstants.DEFAULT_QUERY; import static org.gbif.common.search.solr.SolrConstants.NUM_HL_SNIPPETS; import static org.gbif.ws.util.WebserviceParameter.DEFAULT_SEARCH_PARAM_VALUE; /** * Builder class to generate solr queries based on the dismax query parser. */ public class SolrQueryBuilder { private static final Logger LOG = LoggerFactory.getLogger(SolrQueryBuilder.class); private static final String QUERY_PARSER = "dismax"; private static final Map<NameUsageSearchRequest.QueryField, String> QUERY_FIELDS = ImmutableMap.of( NameUsageSearchRequest.QueryField.DESCRIPTION, "description^0.1", NameUsageSearchRequest.QueryField.VERNACULAR, "vernacular_name^3", NameUsageSearchRequest.QueryField.SCIENTIFIC, "canonical_name^5 scientific_name^2 species subgenus family" ); private static final Map<NameUsageSearchRequest.QueryField, String> PHRASE_FIELDS = ImmutableMap.of( NameUsageSearchRequest.QueryField.DESCRIPTION, "description^2", NameUsageSearchRequest.QueryField.VERNACULAR, "vernacular_name^20", NameUsageSearchRequest.QueryField.SCIENTIFIC, "scientific_name^100 canonical_name^50" ); // boost accepted taxa and scientific names private static final String BOOST_QUERY = "taxonomic_status_key:0^1.5 name_type:0^2"; private static final String BOOST_FUNCTION = "sub(" + Rank.values().length + ",rank_key)"; private static final String SUGGEST_QUERY_FIELDS = "canonical_name_ngram canonical_name_ngram_tokenized^0.8 canonical_name^10 scientific_name^2"; private static final String SUGGEST_PHRASE_FIELDS = "canonical_name^50"; private static final Integer FRAGMENT_SIZE = 100; private static String prepareQ(String q) { if (Strings.isNullOrEmpty(q)) return null; q = q.trim(); // the common-ws utils replaces empty queries with * as the default - this does not work for dismax, remove it if (q.equals(DEFAULT_SEARCH_PARAM_VALUE)) return null; return q.contains(BLANK) ? toPhraseQuery(q) : q; } private static String buildFields(Map<NameUsageSearchRequest.QueryField, String> config, Set<NameUsageSearchRequest.QueryField> fields) { StringBuilder sb = new StringBuilder(); for (NameUsageSearchRequest.QueryField f : fields) { sb.append(config.get(f)); sb.append(" "); } return sb.toString().trim(); } public SolrQuery build(NameUsageSearchRequest request) { SolrQuery query = buildBase(request); // dismax fields query.set(DisMaxParams.QF, buildFields(QUERY_FIELDS, request.getQueryFields())); query.set(DisMaxParams.PF, buildFields(PHRASE_FIELDS, request.getQueryFields())); // request facets requestFacets(request, query); // highlight setHighLightParams(request, query); LOG.debug("Solr search query build: {}", query); return query; } public SolrQuery build(NameUsageSuggestRequest request) { SolrQuery query = buildBase(request); // dismax fields query.set(DisMaxParams.QF, SUGGEST_QUERY_FIELDS); query.set(DisMaxParams.PF, SUGGEST_PHRASE_FIELDS); LOG.debug("Solr suggest query build: {}", query); return query; } private SolrQuery buildBase(SearchRequest<NameUsageSearchParameter> request) { SolrQuery query = new SolrQuery(); // q param String q = prepareQ(request.getQ()); if (!Strings.isNullOrEmpty(q)) { query.setQuery(q); } // use dismax query parser query.set("defType", QUERY_PARSER); // sets the default catch all, alternative query if q above is empty query.set(DisMaxParams.ALTQ, DEFAULT_QUERY); // facet based filter query setFacetFilterQuery(request, query); // boost accepted status query.set(DisMaxParams.BQ, BOOST_QUERY); // boost higher ranks query.set(DisMaxParams.BF, BOOST_FUNCTION); // paging QueryUtils.setQueryPaging(request, query); return query; } /** * Helper method that sets the highlighting parameters. * * @param searchRequest the searchRequest used to extract the parameters * @param solrQuery this object is modified by adding the facets parameters */ private void setHighLightParams(NameUsageSearchRequest searchRequest, SolrQuery solrQuery) { if (searchRequest.isHighlight()) { solrQuery.setHighlight(searchRequest.isHighlight()); solrQuery.setHighlightSnippets(NUM_HL_SNIPPETS); solrQuery.setHighlightFragsize(FRAGMENT_SIZE); for (NameUsageSearchRequest.QueryField hlField : searchRequest.getHighlightFields()) { solrQuery.addHighlightField(SolrMapping.HIGHLIGHT_FIELDS.get(hlField)); } } } /** * Adds the filter query to SolrQuery object. * Creates a conjunction of disjunctions: disjunctions(ORs) are created for the filter applied to the same field; * those disjunctions are joint in a big conjunction. */ private static void setFacetFilterQuery(SearchRequest<NameUsageSearchParameter> request, SolrQuery solrQuery) { Multimap<NameUsageSearchParameter, String> params = request.getParameters(); if (params != null) { for (NameUsageSearchParameter param : params.keySet()) { String solrField = FACET_MAPPING.get(param); if (solrField != null) { List<String> predicates = Lists.newArrayList(); Boolean negated = null; for (String value : params.get(param)) { if (Strings.isNullOrEmpty(value)) { throw new IllegalArgumentException("Null value not allowed for filter parameter " + param); } // treat negation if (negated == null) { negated = QueryUtils.isNegated(value); } else { // make sure we do not mix negated and unnegated filters for the same parameter - this is too complex and not supported if (QueryUtils.isNegated(value) != negated) { throw new IllegalArgumentException("Mixing of negated and not negated filters for the same parameter " + param.name() + " is not allowed"); } } // strip off negation symbol before we parse the value if (negated) { value = QueryUtils.removeNegation(value); } // parse value into typed instance String filterVal; if (Enum.class.isAssignableFrom(param.type())) { Enum<?> e = VocabularyUtils.lookupEnum(value, (Class<? extends Enum<?>>) param.type()); filterVal = String.valueOf(e.ordinal()); } else if (UUID.class.isAssignableFrom(param.type())) { filterVal = UUID.fromString(value).toString(); } else if (Double.class.isAssignableFrom(param.type())) { filterVal = String.valueOf(Double.parseDouble(value)); } else if (Integer.class.isAssignableFrom(param.type())) { filterVal = String.valueOf(Integer.parseInt(value)); } else if (Boolean.class.isAssignableFrom(param.type())) { filterVal = String.valueOf(Boolean.parseBoolean(value)); } else { filterVal = toPhraseQuery(value); } final String predicate = PARAMS_JOINER.join(solrField, filterVal); predicates.add(predicate); } // combine all parameter predicates with OR if (!predicates.isEmpty()) { String parenthesis = toParenthesesQuery(PARAMS_OR_JOINER.join(predicates)); // tag filter queries so we can exclude them later for multi value faceting // http://yonik.com/multi-select-faceting/ solrQuery.addFilterQuery(tag(solrField, negated ? NOT_OP + parenthesis : parenthesis)); } } } } } /** * Helper method that sets the parameter for a faceted query. * * @param searchRequest the searchRequest used to extract the parameters * @param solrQuery this object is modified by adding the facets parameters */ private void requestFacets(FacetedSearchRequest<NameUsageSearchParameter> searchRequest, SolrQuery solrQuery) { if (!searchRequest.getFacets().isEmpty()) { // Only show facets that contains at least 1 record solrQuery.setFacet(true); // defaults if not overridden on per field basis solrQuery.setFacetMinCount(MoreObjects.firstNonNull(searchRequest.getFacetMinCount(), DEFAULT_FACET_COUNT)); solrQuery.setFacetMissing(false); solrQuery.setFacetSort(DEFAULT_FACET_SORT.toString().toLowerCase()); if(searchRequest.getFacetLimit() != null) { solrQuery.setFacetLimit(searchRequest.getFacetLimit()); } if(searchRequest.getFacetOffset() != null) { solrQuery.setParam(FacetParams.FACET_OFFSET, searchRequest.getFacetOffset().toString()); } for (final NameUsageSearchParameter facet : searchRequest.getFacets()) { if (!FACET_MAPPING.containsKey(facet)) { LOG.warn("{} is no valid facet. Ignore", facet); continue; } final String field = FACET_MAPPING.get(facet); if (searchRequest.isMultiSelectFacets()) { // use exclusion filter with same name as used in filter query // http://wiki.apache.org/solr/SimpleFacetParameters#Tagging_and_excluding_Filters // http://yonik.com/multi-select-faceting/ solrQuery.addFacetField(ex(field, field)); } else { solrQuery.addFacetField(field); } Pageable facetPage = searchRequest.getFacetPage(facet); if (facetPage != null) { solrQuery.setParam(perFieldParamName(field, FacetParams.FACET_OFFSET), Long.toString(facetPage.getOffset())); solrQuery.setParam(perFieldParamName(field, FacetParams.FACET_LIMIT), Integer.toString(facetPage.getLimit())); } } } } private static String ex(String tag, String filter) { return "{!ex=" + tag + "}" + filter; } private static String tag(String tag, String filter) { return "{!tag=" + tag + "}" + filter; } }