/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; public class MultiMatchQuery extends MatchQuery { private Float groupTieBreaker = null; public void setTieBreaker(float tieBreaker) { this.groupTieBreaker = tieBreaker; } public MultiMatchQuery(QueryShardContext context) { super(context); } private Query parseAndApply(Type type, String fieldName, Object value, String minimumShouldMatch, Float boostValue) throws IOException { Query query = parse(type, fieldName, value); query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); if (query != null && boostValue != null && boostValue != AbstractQueryBuilder.DEFAULT_BOOST) { query = new BoostQuery(query, boostValue); } return query; } public Query parse(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames, Object value, String minimumShouldMatch) throws IOException { Query result; if (fieldNames.size() == 1) { Map.Entry<String, Float> fieldBoost = fieldNames.entrySet().iterator().next(); Float boostValue = fieldBoost.getValue(); result = parseAndApply(type.matchQueryType(), fieldBoost.getKey(), value, minimumShouldMatch, boostValue); } else { final float tieBreaker = groupTieBreaker == null ? type.tieBreaker() : groupTieBreaker; switch (type) { case PHRASE: case PHRASE_PREFIX: case BEST_FIELDS: case MOST_FIELDS: queryBuilder = new QueryBuilder(tieBreaker); break; case CROSS_FIELDS: queryBuilder = new CrossFieldsQueryBuilder(tieBreaker); break; default: throw new IllegalStateException("No such type: " + type); } final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch); result = queryBuilder.combineGrouped(queries); } assert result != null; return result; } private QueryBuilder queryBuilder; public class QueryBuilder { protected final boolean groupDismax; protected final float tieBreaker; public QueryBuilder(float tieBreaker) { this(tieBreaker != 1.0f, tieBreaker); } public QueryBuilder(boolean groupDismax, float tieBreaker) { this.groupDismax = groupDismax; this.tieBreaker = tieBreaker; } public List<Query> buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames, Object value, String minimumShouldMatch) throws IOException{ List<Query> queries = new ArrayList<>(); for (String fieldName : fieldNames.keySet()) { Float boostValue = fieldNames.get(fieldName); Query query = parseGroup(type.matchQueryType(), fieldName, boostValue, value, minimumShouldMatch); if (query != null) { queries.add(query); } } return queries; } public Query parseGroup(Type type, String field, Float boostValue, Object value, String minimumShouldMatch) throws IOException { return parseAndApply(type, field, value, minimumShouldMatch, boostValue); } private Query combineGrouped(List<? extends Query> groupQuery) { if (groupQuery == null || groupQuery.isEmpty()) { return new MatchNoDocsQuery("[multi_match] list of group queries was empty"); } if (groupQuery.size() == 1) { return groupQuery.get(0); } if (groupDismax) { List<Query> queries = new ArrayList<>(); for (Query query : groupQuery) { queries.add(query); } return new DisjunctionMaxQuery(queries, tieBreaker); } else { final BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); for (Query query : groupQuery) { booleanQuery.add(query, BooleanClause.Occur.SHOULD); } return booleanQuery.build(); } } public Query blendTerm(Term term, MappedFieldType fieldType) { return MultiMatchQuery.super.blendTermQuery(term, fieldType); } public Query blendTerms(Term[] terms, MappedFieldType fieldType) { return MultiMatchQuery.super.blendTermsQuery(terms, fieldType); } public Query termQuery(MappedFieldType fieldType, Object value) { return MultiMatchQuery.this.termQuery(fieldType, value, lenient); } } final class CrossFieldsQueryBuilder extends QueryBuilder { private FieldAndFieldType[] blendedFields; CrossFieldsQueryBuilder(float tieBreaker) { super(false, tieBreaker); } @Override public List<Query> buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map<String, Float> fieldNames, Object value, String minimumShouldMatch) throws IOException { Map<Analyzer, List<FieldAndFieldType>> groups = new HashMap<>(); List<Tuple<String, Float>> missing = new ArrayList<>(); for (Map.Entry<String, Float> entry : fieldNames.entrySet()) { String name = entry.getKey(); MappedFieldType fieldType = context.fieldMapper(name); if (fieldType != null) { Analyzer actualAnalyzer = getAnalyzer(fieldType); name = fieldType.name(); if (!groups.containsKey(actualAnalyzer)) { groups.put(actualAnalyzer, new ArrayList<>()); } Float boost = entry.getValue(); boost = boost == null ? Float.valueOf(1.0f) : boost; groups.get(actualAnalyzer).add(new FieldAndFieldType(fieldType, boost)); } else { missing.add(new Tuple<>(name, entry.getValue())); } } List<Query> queries = new ArrayList<>(); for (Tuple<String, Float> tuple : missing) { Query q = parseGroup(type.matchQueryType(), tuple.v1(), tuple.v2(), value, minimumShouldMatch); if (q != null) { queries.add(q); } } for (List<FieldAndFieldType> group : groups.values()) { if (group.size() > 1) { blendedFields = new FieldAndFieldType[group.size()]; int i = 0; for (FieldAndFieldType fieldAndFieldType : group) { blendedFields[i++] = fieldAndFieldType; } } else { blendedFields = null; } /* * We have to pick some field to pass through the superclass so * we just pick the first field. It shouldn't matter because * fields are already grouped by their analyzers/types. */ String representativeField = group.get(0).fieldType.name(); Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch); if (q != null) { queries.add(q); } } return queries.isEmpty() ? null : queries; } @Override public Query blendTerms(Term[] terms, MappedFieldType fieldType) { if (blendedFields == null || blendedFields.length == 1) { return super.blendTerms(terms, fieldType); } BytesRef[] values = new BytesRef[terms.length]; for (int i = 0; i < terms.length; i++) { values[i] = terms[i].bytes(); } return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields); } @Override public Query blendTerm(Term term, MappedFieldType fieldType) { if (blendedFields == null) { return super.blendTerm(term, fieldType); } return MultiMatchQuery.blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, blendedFields); } @Override public Query termQuery(MappedFieldType fieldType, Object value) { /* * Use the string value of the term because we're reusing the * portion of the query is usually after the analyzer has run on * each term. We just skip that analyzer phase. */ return blendTerm(new Term(fieldType.name(), value.toString()), fieldType); } } static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) { return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields); } static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) { List<Query> queries = new ArrayList<>(); Term[] terms = new Term[blendedFields.length * values.length]; float[] blendedBoost = new float[blendedFields.length * values.length]; int i = 0; for (FieldAndFieldType ft : blendedFields) { for (BytesRef term : values) { Query query; try { query = ft.fieldType.termQuery(term, context); } catch (IllegalArgumentException e) { // the query expects a certain class of values such as numbers // of ip addresses and the value can't be parsed, so ignore this // field continue; } catch (ElasticsearchParseException parseException) { // date fields throw an ElasticsearchParseException with the // underlying IAE as the cause, ignore this field if that is // the case if (parseException.getCause() instanceof IllegalArgumentException) { continue; } throw parseException; } float boost = ft.boost; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; query = bq.getQuery(); boost *= bq.getBoost(); } if (query.getClass() == TermQuery.class) { terms[i] = ((TermQuery) query).getTerm(); blendedBoost[i] = boost; i++; } else { if (boost != 1f) { query = new BoostQuery(query, boost); } queries.add(query); } } } if (i > 0) { terms = Arrays.copyOf(terms, i); blendedBoost = Arrays.copyOf(blendedBoost, i); if (commonTermsCutoff != null) { queries.add(BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, commonTermsCutoff)); } else if (tieBreaker == 1.0f) { queries.add(BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost)); } else { queries.add(BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker)); } } if (queries.size() == 1) { return queries.get(0); } else { // best effort: add clauses that are not term queries so that they have an opportunity to match // however their score contribution will be different // TODO: can we improve this? BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (Query query : queries) { bq.add(query, Occur.SHOULD); } return bq.build(); } } @Override protected Query blendTermQuery(Term term, MappedFieldType fieldType) { if (queryBuilder == null) { return super.blendTermQuery(term, fieldType); } return queryBuilder.blendTerm(term, fieldType); } @Override protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { if (queryBuilder == null) { return super.blendTermsQuery(terms, fieldType); } return queryBuilder.blendTerms(terms, fieldType); } static final class FieldAndFieldType { final MappedFieldType fieldType; final float boost; FieldAndFieldType(MappedFieldType fieldType, float boost) { this.fieldType = Objects.requireNonNull(fieldType); this.boost = boost; } } }