/* * Copyright 2015 herd contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.finra.herd.dao.impl; import static org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.BEST_FIELDS; import static org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.PHRASE_PREFIX; import static org.elasticsearch.index.query.QueryBuilders.disMaxQuery; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang.BooleanUtils; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.sort.SortBuilders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import org.finra.herd.core.HerdStringUtils; import org.finra.herd.core.helper.ConfigurationHelper; import org.finra.herd.dao.IndexSearchDao; import org.finra.herd.dao.TransportClientFactory; import org.finra.herd.dao.helper.ElasticsearchHelper; import org.finra.herd.dao.helper.JsonHelper; import org.finra.herd.model.api.xml.BusinessObjectDefinitionKey; import org.finra.herd.model.api.xml.Facet; import org.finra.herd.model.api.xml.Field; import org.finra.herd.model.api.xml.Highlight; import org.finra.herd.model.api.xml.IndexSearchRequest; import org.finra.herd.model.api.xml.IndexSearchResponse; import org.finra.herd.model.api.xml.IndexSearchResult; import org.finra.herd.model.api.xml.IndexSearchResultKey; import org.finra.herd.model.api.xml.TagKey; import org.finra.herd.model.dto.ConfigurationValue; import org.finra.herd.model.dto.ElasticsearchResponseDto; import org.finra.herd.model.dto.IndexSearchHighlightFields; /** * IndexSearchDaoImpl */ @Repository public class IndexSearchDaoImpl implements IndexSearchDao { private static final Logger LOGGER = LoggerFactory.getLogger(IndexSearchDaoImpl.class); /** * Best fields query boost */ private static final float BEST_FIELDS_QUERY_BOOST = 1f; /** * The business object definition index */ private static final String BUSINESS_OBJECT_DEFINITION_INDEX = "bdef"; /** * The boost amount for the business object definition index */ private static final float BUSINESS_OBJECT_DEFINITION_INDEX_BOOST = 1f; /** * String to select the tag type code and namespace code */ private static final String CODE = "code"; /** * Source string for the description */ private static final String DESCRIPTION_SOURCE = "description"; /** * Constant to hold the display name option for the business object definition search */ private static final String DISPLAY_NAME_FIELD = "displayname"; /** * Source string for the display name */ private static final String DISPLAY_NAME_SOURCE = "displayName"; /** * String to select the namespace */ private static final String NAMESPACE = "namespace"; /** * Source string for the namespace code */ private static final String NAMESPACE_CODE_SOURCE = "namespace.code"; /** * Source string for the name */ private static final String NAME_SOURCE = "name"; /** * N-Grams field type */ private static final String FIELD_TYPE_NGRAMS = "ngrams"; /** * Phrase prefix query boost */ private static final float PHRASE_PREFIX_QUERY_BOOST = 10f; /** * The number of the indexSearch results to return */ private static final int SEARCH_RESULT_SIZE = 200; /** * Constant to hold the short description option for the business object definition search */ private static final String SHORT_DESCRIPTION_FIELD = "shortdescription"; /** * Stemmed field type */ private static final String FIELD_TYPE_STEMMED = "stemmed"; /** * Source string for the tagCode */ private static final String TAG_CODE_SOURCE = "tagCode"; /** * The tag index */ private static final String TAG_INDEX = "tag"; /** * The boost amount for the tag index */ private static final float TAG_INDEX_BOOST = 1000f; /** * String to select the tag type */ private static final String TAG_TYPE = "tagType"; /** * Source string for the tagType.code */ private static final String TAG_TYPE_CODE_SOURCE = "tagType.code"; /** * The configuration helper used to retrieve configuration values */ @Autowired private ConfigurationHelper configurationHelper; /** * Helper to deserialize JSON values */ @Autowired private JsonHelper jsonHelper; /** * The transport client factory will create a transport client which is a connection to the elasticsearch index */ @Autowired private TransportClientFactory transportClientFactory; @Autowired private ElasticsearchHelper elasticsearchHelper; @Override public IndexSearchResponse indexSearch(final IndexSearchRequest request, final Set<String> fields) { final Integer tagShortDescMaxLength = configurationHelper.getProperty(ConfigurationValue.TAG_SHORT_DESCRIPTION_LENGTH, Integer.class); final Integer businessObjectDefinitionShortDescMaxLength = configurationHelper.getProperty(ConfigurationValue.BUSINESS_OBJECT_DEFINITION_SHORT_DESCRIPTION_LENGTH, Integer.class); // Build two multi match queries, one with phrase prefix, and one with best fields, but boost the phrase prefix final MultiMatchQueryBuilder phrasePrefixMultiMatchQueryBuilder = buildMultiMatchQuery(request.getSearchTerm(), PHRASE_PREFIX, PHRASE_PREFIX_QUERY_BOOST, FIELD_TYPE_STEMMED); final MultiMatchQueryBuilder bestFieldsMultiMatchQueryBuilder = buildMultiMatchQuery(request.getSearchTerm(), BEST_FIELDS, BEST_FIELDS_QUERY_BOOST, FIELD_TYPE_NGRAMS); QueryBuilder queryBuilder; // Add filter clauses if index search filters are specified in the request if (CollectionUtils.isNotEmpty(request.getIndexSearchFilters())) { BoolQueryBuilder indexSearchQueryBuilder = elasticsearchHelper.addIndexSearchFilterBooleanClause(request.getIndexSearchFilters()); // Add the multi match queries to a dis max query and wrap within a bool query, then apply filters to it queryBuilder = QueryBuilders.boolQuery().must(disMaxQuery().add(phrasePrefixMultiMatchQueryBuilder).add(bestFieldsMultiMatchQueryBuilder)) .filter(indexSearchQueryBuilder); } else { // Add only the multi match queries to a dis max query if no filters are specified queryBuilder = disMaxQuery().add(phrasePrefixMultiMatchQueryBuilder).add(bestFieldsMultiMatchQueryBuilder); } // The fields in the search indexes to return final String[] searchSources = {NAME_SOURCE, NAMESPACE_CODE_SOURCE, TAG_CODE_SOURCE, TAG_TYPE_CODE_SOURCE, DISPLAY_NAME_SOURCE, DESCRIPTION_SOURCE}; // Create a new indexSearch source builder final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); // Fetch only the required fields searchSourceBuilder.fetchSource(searchSources, null); searchSourceBuilder.query(queryBuilder); // Create a indexSearch request builder final TransportClient transportClient = transportClientFactory.getTransportClient(); SearchRequestBuilder searchRequestBuilder = transportClient.prepareSearch(BUSINESS_OBJECT_DEFINITION_INDEX, TAG_INDEX); searchRequestBuilder.setSource(searchSourceBuilder).setSize(SEARCH_RESULT_SIZE) .addIndexBoost(BUSINESS_OBJECT_DEFINITION_INDEX, BUSINESS_OBJECT_DEFINITION_INDEX_BOOST).addIndexBoost(TAG_INDEX, TAG_INDEX_BOOST) .addSort(SortBuilders.scoreSort()); String preTag = null; String postTag = null; // Add highlighting if specified in the request if (BooleanUtils.isTrue(request.isEnableHitHighlighting())) { // Fetch configured 'tag' values for highlighting preTag = configurationHelper.getProperty(ConfigurationValue.ELASTICSEARCH_HIGHLIGHT_PRETAGS); postTag = configurationHelper.getProperty(ConfigurationValue.ELASTICSEARCH_HIGHLIGHT_POSTTAGS); searchRequestBuilder.highlighter(buildHighlightQuery(preTag, postTag)); } // Add facet aggregations if specified in the request if (CollectionUtils.isNotEmpty(request.getFacetFields())) { searchRequestBuilder = elasticsearchHelper.addFacetFieldAggregations(new HashSet<>(request.getFacetFields()), searchRequestBuilder); } // Log the actual elasticsearch query when debug is enabled LOGGER.debug("indexSearchRequest={}", searchRequestBuilder.toString()); // Retrieve the indexSearch response final SearchResponse searchResponse = searchRequestBuilder.execute().actionGet(); final SearchHits searchHits = searchResponse.getHits(); final SearchHit[] searchHitArray = searchHits.hits(); final List<IndexSearchResult> indexSearchResults = new ArrayList<>(); // For each indexSearch hit for (final SearchHit searchHit : searchHitArray) { // Get the source map from the indexSearch hit final Map<String, Object> sourceMap = searchHit.sourceAsMap(); // Get the index from which this result is from final String index = searchHit.getShard().getIndex(); // Create a new document to populate with the indexSearch results final IndexSearchResult indexSearchResult = new IndexSearchResult(); // Populate the results indexSearchResult.setIndexSearchResultType(index); if (fields.contains(DISPLAY_NAME_FIELD)) { indexSearchResult.setDisplayName((String) sourceMap.get(DISPLAY_NAME_SOURCE)); } // Populate tag index specific key if (index.equals(TAG_INDEX)) { if (fields.contains(SHORT_DESCRIPTION_FIELD)) { indexSearchResult .setShortDescription(HerdStringUtils.getShortDescription((String) sourceMap.get(DESCRIPTION_SOURCE), tagShortDescMaxLength)); } final TagKey tagKey = new TagKey(); tagKey.setTagCode((String) sourceMap.get(TAG_CODE_SOURCE)); tagKey.setTagTypeCode((String) ((Map) sourceMap.get(TAG_TYPE)).get(CODE)); indexSearchResult.setIndexSearchResultKey(new IndexSearchResultKey(tagKey, null)); } // Populate business object definition key else if (index.equals(BUSINESS_OBJECT_DEFINITION_INDEX)) { if (fields.contains(SHORT_DESCRIPTION_FIELD)) { indexSearchResult.setShortDescription( HerdStringUtils.getShortDescription((String) sourceMap.get(DESCRIPTION_SOURCE), businessObjectDefinitionShortDescMaxLength)); } final BusinessObjectDefinitionKey businessObjectDefinitionKey = new BusinessObjectDefinitionKey(); businessObjectDefinitionKey.setNamespace((String) ((Map) sourceMap.get(NAMESPACE)).get(CODE)); businessObjectDefinitionKey.setBusinessObjectDefinitionName((String) sourceMap.get(NAME_SOURCE)); indexSearchResult.setIndexSearchResultKey(new IndexSearchResultKey(null, businessObjectDefinitionKey)); } if (BooleanUtils.isTrue(request.isEnableHitHighlighting())) { // Extract highlighted content from the search hit and clean html tags except the pre/post-tags as configured Highlight highlightedContent = extractHighlightedContent(searchHit, preTag, postTag); // Set highlighted content in the response element indexSearchResult.setHighlight(highlightedContent); } indexSearchResults.add(indexSearchResult); } List<Facet> facets = null; if (CollectionUtils.isNotEmpty(request.getFacetFields())) { // Extract facets from the search response facets = new ArrayList<>(extractFacets(request, searchResponse)); } return new IndexSearchResponse(searchHits.getTotalHits(), indexSearchResults, facets); } /** * Extracts highlighted content from a given {@link SearchHit} * * @param searchHit a given {@link SearchHit} from the elasticsearch results * @param preTag the specified pre-tag for highlighting * @param postTag the specified post-tag for highlighting * * @return {@link Highlight} a cleaned highlighted content */ private Highlight extractHighlightedContent(SearchHit searchHit, String preTag, String postTag) { Highlight highlightedContent = new Highlight(); List<Field> highlightFields = new ArrayList<>(); // make sure there is highlighted content in the search hit if (searchHit.getHighlightFields() != null) { for (Map.Entry<String, HighlightField> entry : searchHit.getHighlightFields().entrySet()) { Field field = new Field(); // Extract the field-name field.setFieldName(entry.getKey()); List<String> cleanFragments = new ArrayList<>(); // Extract fragments which have the highlighted content Text[] fragments = entry.getValue().getFragments(); for (Text fragment : fragments) { cleanFragments.add(HerdStringUtils.stripHtml(fragment.toString(), preTag, postTag)); } field.setFragments(cleanFragments); highlightFields.add(field); } } highlightedContent.setFields(highlightFields); return highlightedContent; } /** * Extracts facet information from a {@link SearchResponse} object * * @param request The specified {@link IndexSearchRequest} * @param searchResponse A given {@link SearchResponse} to extract the facet information from * * @return A list of {@link Facet} objects */ private List<Facet> extractFacets(IndexSearchRequest request, SearchResponse searchResponse) { ElasticsearchResponseDto elasticsearchResponseDto = new ElasticsearchResponseDto(); if (request.getFacetFields().contains(ElasticsearchHelper.TAG_FACET)) { elasticsearchResponseDto.setNestTagTypeIndexSearchResponseDtos(elasticsearchHelper.getNestedTagTagIndexSearchResponseDto(searchResponse)); elasticsearchResponseDto.setTagTypeIndexSearchResponseDtos(elasticsearchHelper.getTagTagIndexSearchResponseDto(searchResponse)); } if (request.getFacetFields().contains(ElasticsearchHelper.RESULT_TYPE_FACET)) { elasticsearchResponseDto.setResultTypeIndexSearchResponseDtos(elasticsearchHelper.getResultTypeIndexSearchResponseDto(searchResponse)); } return elasticsearchHelper.getFacetsResponse(elasticsearchResponseDto, true); } /** * Private method to build a multi match query. * * @param searchTerm the term on which to search * * @return the multi match query */ private MultiMatchQueryBuilder buildMultiMatchQuery(final String searchTerm, final MultiMatchQueryBuilder.Type queryType, final float queryBoost, final String fieldType) { final MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(searchTerm).type(queryType); multiMatchQueryBuilder.boost(queryBoost); if (fieldType.equals(FIELD_TYPE_STEMMED)) { // Get the configured value for 'stemmed' fields and their respective boosts if any String stemmedFieldsValue = configurationHelper.getProperty(ConfigurationValue.ELASTICSEARCH_SEARCHABLE_FIELDS_STEMMED); try { @SuppressWarnings("unchecked") final Map<String, String> stemmedFieldsWithBoost = jsonHelper.unmarshallJsonToObject(Map.class, stemmedFieldsValue); final Map<String, Float> fieldsBoosts = new HashMap<>(); // This additional step is needed because trying to cast an unmarshalled json to a Map of anything other than String key-value pairs won't work stemmedFieldsWithBoost.entrySet().forEach(entry -> fieldsBoosts.put(entry.getKey(), Float.parseFloat(entry.getValue()))); // Set the fields and their respective boosts to the multi-match query multiMatchQueryBuilder.fields(fieldsBoosts); } catch (IOException e) { LOGGER.warn("Could not parse the configured JSON value for stemmed fields: {}", ConfigurationValue.ELASTICSEARCH_SEARCHABLE_FIELDS_STEMMED, e); } } if (fieldType.equals(FIELD_TYPE_NGRAMS)) { // Get the configured value for 'ngrams' fields and their respective boosts if any String ngramsFieldsValue = configurationHelper.getProperty(ConfigurationValue.ELASTICSEARCH_SEARCHABLE_FIELDS_NGRAMS); try { @SuppressWarnings("unchecked") final Map<String, String> ngramsFieldsWithBoost = jsonHelper.unmarshallJsonToObject(Map.class, ngramsFieldsValue); final Map<String, Float> fieldsBoosts = new HashMap<>(); // This additional step is needed because trying to cast an unmarshalled json to a Map of anything other than String key-value pairs won't work ngramsFieldsWithBoost.entrySet().forEach(entry -> fieldsBoosts.put(entry.getKey(), Float.parseFloat(entry.getValue()))); // Set the fields and their respective boosts to the multi-match query multiMatchQueryBuilder.fields(fieldsBoosts); } catch (IOException e) { LOGGER.warn("Could not parse the configured JSON value for ngrams fields: {}", ConfigurationValue.ELASTICSEARCH_SEARCHABLE_FIELDS_NGRAMS, e); } } return multiMatchQueryBuilder; } /** * Builds a {@link HighlightBuilder} based on (pre/post)tags and fields fetched from the DB config which is added to the main {@link SearchRequestBuilder} * * @param preTag The specified pre-tag to be used for highlighting * @param postTag The specified post-tag to be used for highlighting * * @return A configured {@link HighlightBuilder} object */ private HighlightBuilder buildHighlightQuery(String preTag, String postTag) { HighlightBuilder highlightBuilder = new HighlightBuilder(); // Field matching is not needed since we are matching on multiple 'type' fields like stemmed and ngrams and enabling highlighting on all those fields // will yield duplicates highlightBuilder.requireFieldMatch(false); // Set the configured value for pre-tags for highlighting highlightBuilder.preTags(preTag); // Set the configured value for post-tags for highlighting highlightBuilder.postTags(postTag); // Get highlight fields value from configuration String highlightFieldsValue = configurationHelper.getProperty(ConfigurationValue.ELASTICSEARCH_HIGHLIGHT_FIELDS); try { @SuppressWarnings("unchecked") IndexSearchHighlightFields highlightFieldsConfig = jsonHelper.unmarshallJsonToObject(IndexSearchHighlightFields.class, highlightFieldsValue); highlightFieldsConfig.getHighlightFields().forEach( highlightFieldConfig -> { // set the field name to the configured value HighlightBuilder.Field highlightField = new HighlightBuilder.Field(highlightFieldConfig.getFieldName()); // set matched_fields to the configured list of fields, this accounts for 'multifields' that analyze the same string in different ways if (CollectionUtils.isNotEmpty(highlightFieldConfig.getMatchedFields())) { highlightField.matchedFields(highlightFieldConfig.getMatchedFields().toArray(new String[0])); } // set fragment size to the configured value if (highlightFieldConfig.getFragmentSize() != null) { highlightField.fragmentSize(highlightFieldConfig.getFragmentSize()); } // set the number of desired fragments to the configured value if (highlightFieldConfig.getNumOfFragments() != null) { highlightField.numOfFragments(highlightFieldConfig.getNumOfFragments()); } highlightBuilder.field(highlightField); } ); } catch (IOException e) { LOGGER.warn("Could not parse the configured value for highlight fields: {}", ConfigurationValue.ELASTICSEARCH_HIGHLIGHT_FIELDS, e); } return highlightBuilder; } }