/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.InternalOrder;
import org.elasticsearch.search.aggregations.KeyComparable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public abstract class InternalTerms<A extends InternalTerms<A, B>, B extends InternalTerms.Bucket<B>>
extends InternalMultiBucketAggregation<A, B> implements Terms, ToXContent {
protected static final ParseField DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME = new ParseField("doc_count_error_upper_bound");
protected static final ParseField SUM_OF_OTHER_DOC_COUNTS = new ParseField("sum_other_doc_count");
public abstract static class Bucket<B extends Bucket<B>> extends InternalMultiBucketAggregation.InternalBucket
implements Terms.Bucket, KeyComparable<B> {
/**
* Reads a bucket. Should be a constructor reference.
*/
@FunctionalInterface
public interface Reader<B extends Bucket<B>> {
B read(StreamInput in, DocValueFormat format, boolean showDocCountError) throws IOException;
}
long bucketOrd;
protected long docCount;
protected long docCountError;
protected InternalAggregations aggregations;
protected final boolean showDocCountError;
protected final DocValueFormat format;
protected Bucket(long docCount, InternalAggregations aggregations, boolean showDocCountError, long docCountError,
DocValueFormat formatter) {
this.showDocCountError = showDocCountError;
this.format = formatter;
this.docCount = docCount;
this.aggregations = aggregations;
this.docCountError = docCountError;
}
/**
* Read from a stream.
*/
protected Bucket(StreamInput in, DocValueFormat formatter, boolean showDocCountError) throws IOException {
this.showDocCountError = showDocCountError;
this.format = formatter;
docCount = in.readVLong();
docCountError = -1;
if (showDocCountError) {
docCountError = in.readLong();
}
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public final void writeTo(StreamOutput out) throws IOException {
out.writeVLong(getDocCount());
if (showDocCountError) {
out.writeLong(docCountError);
}
aggregations.writeTo(out);
writeTermTo(out);
}
protected abstract void writeTermTo(StreamOutput out) throws IOException;
@Override
public long getDocCount() {
return docCount;
}
@Override
public long getDocCountError() {
if (!showDocCountError) {
throw new IllegalStateException("show_terms_doc_count_error is false");
}
return docCountError;
}
@Override
public Aggregations getAggregations() {
return aggregations;
}
abstract B newBucket(long docCount, InternalAggregations aggs, long docCountError);
public B reduce(List<B> buckets, ReduceContext context) {
long docCount = 0;
// For the per term doc count error we add up the errors from the
// shards that did not respond with the term. To do this we add up
// the errors from the shards that did respond with the terms and
// subtract that from the sum of the error from all shards
long docCountError = 0;
List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
for (B bucket : buckets) {
docCount += bucket.docCount;
if (docCountError != -1) {
if (bucket.docCountError == -1) {
docCountError = -1;
} else {
docCountError += bucket.docCountError;
}
}
aggregationsList.add(bucket.aggregations);
}
InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
return newBucket(docCount, aggs, docCountError);
}
@Override
public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
keyToXContent(builder);
builder.field(CommonFields.DOC_COUNT.getPreferredName(), getDocCount());
if (showDocCountError) {
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME.getPreferredName(), getDocCountError());
}
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
}
protected abstract XContentBuilder keyToXContent(XContentBuilder builder) throws IOException;
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
Bucket<?> that = (Bucket<?>) obj;
// No need to take format and showDocCountError, they are attributes
// of the parent terms aggregation object that are only copied here
// for serialization purposes
return Objects.equals(docCount, that.docCount)
&& Objects.equals(docCountError, that.docCountError)
&& Objects.equals(aggregations, that.aggregations);
}
@Override
public int hashCode() {
return Objects.hash(getClass(), docCount, docCountError, aggregations);
}
}
protected final BucketOrder order;
protected final int requiredSize;
protected final long minDocCount;
protected InternalTerms(String name, BucketOrder order, int requiredSize, long minDocCount,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
super(name, pipelineAggregators, metaData);
this.order = order;
this.requiredSize = requiredSize;
this.minDocCount = minDocCount;
}
/**
* Read from a stream.
*/
protected InternalTerms(StreamInput in) throws IOException {
super(in);
order = InternalOrder.Streams.readOrder(in);
requiredSize = readSize(in);
minDocCount = in.readVLong();
}
@Override
protected final void doWriteTo(StreamOutput out) throws IOException {
order.writeTo(out);
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
writeTermTypeInfoTo(out);
}
protected abstract void writeTermTypeInfoTo(StreamOutput out) throws IOException;
@Override
public abstract List<B> getBuckets();
@Override
public abstract B getBucketByKey(String term);
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
Map<Object, List<B>> buckets = new HashMap<>();
long sumDocCountError = 0;
long otherDocCount = 0;
InternalTerms<A, B> referenceTerms = null;
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked")
InternalTerms<A, B> terms = (InternalTerms<A, B>) aggregation;
if (referenceTerms == null && !aggregation.getClass().equals(UnmappedTerms.class)) {
referenceTerms = terms;
}
if (referenceTerms != null &&
!referenceTerms.getClass().equals(terms.getClass()) &&
!terms.getClass().equals(UnmappedTerms.class)) {
// control gets into this loop when the same field name against which the query is executed
// is of different types in different indices.
throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation ["
+ referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different "
+ "types in two different indices");
}
otherDocCount += terms.getSumOfOtherDocCounts();
final long thisAggDocCountError;
if (terms.getBuckets().size() < getShardSize() || InternalOrder.isKeyOrder(order)) {
thisAggDocCountError = 0;
} else if (InternalOrder.isCountDesc(order)) {
if (terms.getDocCountError() > 0) {
// If there is an existing docCountError for this agg then
// use this as the error for this aggregation
thisAggDocCountError = terms.getDocCountError();
} else {
// otherwise use the doc count of the last term in the
// aggregation
thisAggDocCountError = terms.getBuckets().get(terms.getBuckets().size() - 1).docCount;
}
} else {
thisAggDocCountError = -1;
}
if (sumDocCountError != -1) {
if (thisAggDocCountError == -1) {
sumDocCountError = -1;
} else {
sumDocCountError += thisAggDocCountError;
}
}
setDocCountError(thisAggDocCountError);
for (B bucket : terms.getBuckets()) {
// If there is already a doc count error for this bucket
// subtract this aggs doc count error from it to make the
// new value for the bucket. This then means that when the
// final error for the bucket is calculated below we account
// for the existing error calculated in a previous reduce.
// Note that if the error is unbounded (-1) this will be fixed
// later in this method.
bucket.docCountError -= thisAggDocCountError;
List<B> bucketList = buckets.get(bucket.getKey());
if (bucketList == null) {
bucketList = new ArrayList<>();
buckets.put(bucket.getKey(), bucketList);
}
bucketList.add(bucket);
}
}
final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
final BucketPriorityQueue<B> ordered = new BucketPriorityQueue<>(size, order.comparator(null));
for (List<B> sameTermBuckets : buckets.values()) {
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
if (sumDocCountError == -1) {
b.docCountError = -1;
} else {
b.docCountError += sumDocCountError;
}
if (b.docCount >= minDocCount || reduceContext.isFinalReduce() == false) {
B removed = ordered.insertWithOverflow(b);
if (removed != null) {
otherDocCount += removed.getDocCount();
}
}
}
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
long docCountError;
if (sumDocCountError == -1) {
docCountError = -1;
} else {
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
}
return create(name, Arrays.asList(list), docCountError, otherDocCount);
}
protected abstract void setDocCountError(long docCountError);
protected abstract int getShardSize();
protected abstract A create(String name, List<B> buckets, long docCountError, long otherDocCount);
/**
* Create an array to hold some buckets. Used in collecting the results.
*/
protected abstract B[] createBucketsArray(int size);
@Override
protected boolean doEquals(Object obj) {
InternalTerms<?,?> that = (InternalTerms<?,?>) obj;
return Objects.equals(minDocCount, that.minDocCount)
&& Objects.equals(order, that.order)
&& Objects.equals(requiredSize, that.requiredSize);
}
@Override
protected int doHashCode() {
return Objects.hash(minDocCount, order, requiredSize);
}
protected static XContentBuilder doXContentCommon(XContentBuilder builder, Params params,
long docCountError, long otherDocCount, List<? extends Bucket> buckets) throws IOException {
builder.field(DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME.getPreferredName(), docCountError);
builder.field(SUM_OF_OTHER_DOC_COUNTS.getPreferredName(), otherDocCount);
builder.startArray(CommonFields.BUCKETS.getPreferredName());
for (Bucket bucket : buckets) {
bucket.toXContent(builder, params);
}
builder.endArray();
return builder;
}
}