/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket.terms; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import org.elasticsearch.common.io.stream.Streamable; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.format.ValueFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; /** * */ public abstract class InternalTerms<A extends InternalTerms, B extends InternalTerms.Bucket> extends InternalMultiBucketAggregation<A, B> implements Terms, ToXContent, Streamable { protected static final String DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME = "doc_count_error_upper_bound"; protected static final String SUM_OF_OTHER_DOC_COUNTS = "sum_other_doc_count"; public static abstract class Bucket extends Terms.Bucket { long bucketOrd; protected long docCount; protected long docCountError; protected InternalAggregations aggregations; protected boolean showDocCountError; transient final ValueFormatter formatter; protected Bucket(ValueFormatter formatter, boolean showDocCountError) { // for serialization this.showDocCountError = showDocCountError; this.formatter = formatter; } protected Bucket(long docCount, InternalAggregations aggregations, boolean showDocCountError, long docCountError, ValueFormatter formatter) { this(formatter, showDocCountError); this.docCount = docCount; this.aggregations = aggregations; this.docCountError = docCountError; } @Override public long getDocCount() { return docCount; } @Override public long getDocCountError() { if (!showDocCountError) { throw new IllegalStateException("show_terms_doc_count_error is false"); } return docCountError; } @Override public Aggregations getAggregations() { return aggregations; } abstract Bucket newBucket(long docCount, InternalAggregations aggs, long docCountError); public Bucket reduce(List<? extends Bucket> buckets, ReduceContext context) { long docCount = 0; long docCountError = 0; List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size()); for (Bucket bucket : buckets) { docCount += bucket.docCount; if (docCountError != -1) { if (bucket.docCountError == -1) { docCountError = -1; } else { docCountError += bucket.docCountError; } } aggregationsList.add(bucket.aggregations); } InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context); return newBucket(docCount, aggs, docCountError); } } protected Terms.Order order; protected int requiredSize; protected int shardSize; protected long minDocCount; protected List<? extends Bucket> buckets; protected Map<String, Bucket> bucketMap; protected long docCountError; protected boolean showTermDocCountError; protected long otherDocCount; protected InternalTerms() {} // for serialization protected InternalTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount, List<? extends Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { super(name, pipelineAggregators, metaData); this.order = order; this.requiredSize = requiredSize; this.shardSize = shardSize; this.minDocCount = minDocCount; this.buckets = buckets; this.showTermDocCountError = showTermDocCountError; this.docCountError = docCountError; this.otherDocCount = otherDocCount; } @Override public List<Terms.Bucket> getBuckets() { Object o = buckets; return (List<Terms.Bucket>) o; } @Override public Terms.Bucket getBucketByKey(String term) { if (bucketMap == null) { bucketMap = Maps.newHashMapWithExpectedSize(buckets.size()); for (Bucket bucket : buckets) { bucketMap.put(bucket.getKeyAsString(), bucket); } } return bucketMap.get(term); } @Override public long getDocCountError() { return docCountError; } @Override public long getSumOfOtherDocCounts() { return otherDocCount; } @Override public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) { Multimap<Object, InternalTerms.Bucket> buckets = ArrayListMultimap.create(); long sumDocCountError = 0; long otherDocCount = 0; InternalTerms<A, B> referenceTerms = null; for (InternalAggregation aggregation : aggregations) { InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation; if (referenceTerms == null && !terms.getClass().equals(UnmappedTerms.class)) { referenceTerms = (InternalTerms<A, B>) aggregation; } if (referenceTerms != null && !referenceTerms.getClass().equals(terms.getClass()) && !terms.getClass().equals(UnmappedTerms.class)) { // control gets into this loop when the same field name against which the query is executed // is of different types in different indices. throw new AggregationExecutionException("Merging/Reducing the aggregations failed " + "when computing the aggregation [ Name: " + referenceTerms.getName() + ", Type: " + referenceTerms.type() + " ]" + " because: " + "the field you gave in the aggregation query " + "existed as two different types " + "in two different indices"); } otherDocCount += terms.getSumOfOtherDocCounts(); final long thisAggDocCountError; if (terms.buckets.size() < this.shardSize || this.order == InternalOrder.TERM_ASC || this.order == InternalOrder.TERM_DESC) { thisAggDocCountError = 0; } else if (InternalOrder.isCountDesc(this.order)) { thisAggDocCountError = terms.buckets.get(terms.buckets.size() - 1).docCount; } else { thisAggDocCountError = -1; } if (sumDocCountError != -1) { if (thisAggDocCountError == -1) { sumDocCountError = -1; } else { sumDocCountError += thisAggDocCountError; } } terms.docCountError = thisAggDocCountError; for (Bucket bucket : terms.buckets) { bucket.docCountError = thisAggDocCountError; buckets.put(bucket.getKey(), bucket); } } final int size = Math.min(requiredSize, buckets.size()); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(null)); for (Collection<Bucket> l : buckets.asMap().values()) { List<Bucket> sameTermBuckets = (List<Bucket>) l; // cast is ok according to javadocs final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext); if (b.docCountError != -1) { if (sumDocCountError == -1) { b.docCountError = -1; } else { b.docCountError = sumDocCountError - b.docCountError; } } if (b.docCount >= minDocCount) { Terms.Bucket removed = ordered.insertWithOverflow(b); if (removed != null) { otherDocCount += removed.getDocCount(); } } } Bucket[] list = new Bucket[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (Bucket) ordered.pop(); } long docCountError; if (sumDocCountError == -1) { docCountError = -1; } else { docCountError = aggregations.size() == 1 ? 0 : sumDocCountError; } return create(name, Arrays.asList(list), docCountError, otherDocCount, this); } protected abstract A create(String name, List<InternalTerms.Bucket> buckets, long docCountError, long otherDocCount, InternalTerms prototype); }