/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.search.aggregations.*; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import java.io.IOException; import java.util.List; import java.util.Map; /** * */ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> { public enum ExecutionMode { MAP(new ParseField("map")) { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(); return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } @Override boolean needsGlobalOrdinals() { return false; } }, GLOBAL_ORDINALS(new ParseField("global_ordinals")) { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } @Override boolean needsGlobalOrdinals() { return true; } }, GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } @Override boolean needsGlobalOrdinals() { return true; } }, GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { if (includeExclude != null || factories.count() > 0 // we need the FieldData impl to be able to extract the // segment to global ord mapping || valuesSource.getClass() != ValuesSource.Bytes.FieldData.class) { return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, bucketCountThresholds, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } @Override boolean needsGlobalOrdinals() { return true; } }; public static ExecutionMode fromString(String value, ParseFieldMatcher parseFieldMatcher) { for (ExecutionMode mode : values()) { if (parseFieldMatcher.match(value, mode.parseField)) { return mode; } } throw new IllegalArgumentException("Unknown `execution_hint`: [" + value + "], expected any of " + values()); } private final ParseField parseField; ExecutionMode(ParseField parseField) { this.parseField = parseField; } abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException; abstract boolean needsGlobalOrdinals(); @Override public String toString() { return parseField.getPreferredName(); } } private final Terms.Order order; private final IncludeExclude includeExclude; private final String executionHint; private final SubAggCollectionMode collectMode; private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final boolean showTermDocCountError; public TermsAggregatorFactory(String name, ValuesSourceConfig config, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, String executionHint, SubAggCollectionMode executionMode, boolean showTermDocCountError) { super(name, StringTerms.TYPE.name(), config); this.order = order; this.includeExclude = includeExclude; this.executionHint = executionHint; this.bucketCountThresholds = bucketCountThresholds; this.collectMode = executionMode; this.showTermDocCountError = showTermDocCountError; } @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators, metaData); return new NonCollectingAggregator(name, aggregationContext, parent, factories, pipelineAggregators, metaData) { { // even in the case of an unmapped aggregator, validate the order InternalOrder.validate(order, this); } @Override public InternalAggregation buildEmptyAggregation() { return aggregation; } }; } @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { if (collectsFromSingleBucket == false) { return asMultiBucketAggregator(this, aggregationContext, parent); } if (valuesSource instanceof ValuesSource.Bytes) { ExecutionMode execution = null; if (executionHint != null) { execution = ExecutionMode.fromString(executionHint, aggregationContext.searchContext().parseFieldMatcher()); } // In some cases, using ordinals is just not supported: override it if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) { execution = ExecutionMode.MAP; } final long maxOrd; final double ratio; if (execution == null || execution.needsGlobalOrdinals()) { ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher); ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs()); } else { maxOrd = -1; ratio = -1; } // Let's try to use a good default if (execution == null) { // if there is a parent bucket aggregator the number of instances of this aggregator is going // to be unbounded and most instances may only aggregate few documents, so use hashed based // global ordinals to keep the bucket ords dense. if (Aggregator.descendsFromBucketAggregator(parent)) { execution = ExecutionMode.GLOBAL_ORDINALS_HASH; } else { if (factories == AggregatorFactories.EMPTY) { if (ratio <= 0.5 && maxOrd <= 2048) { // 0.5: At least we need reduce the number of global ordinals look-ups by half // 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has additional memory usage, which directly linked to maxOrd, so we need to limit. execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY; } else { execution = ExecutionMode.GLOBAL_ORDINALS; } } else { execution = ExecutionMode.GLOBAL_ORDINALS; } } } return execution.create(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectMode, showTermDocCountError, pipelineAggregators, metaData); } if ((includeExclude != null) && (includeExclude.isRegexBased())) { throw new AggregationExecutionException( "Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields"); } if (valuesSource instanceof ValuesSource.Numeric) { IncludeExclude.LongFilter longFilter = null; if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { if (includeExclude != null) { longFilter = includeExclude.convertToDoubleFilter(); } return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, aggregationContext, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators, metaData); } if (includeExclude != null) { longFilter = includeExclude.convertToLongFilter(); } return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, aggregationContext, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators, metaData); } throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields."); } }