/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.range;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptyList;
/** A range aggregator for values that are stored in SORTED_SET doc values. */
public final class BinaryRangeAggregator extends BucketsAggregator {
public static class Range {
final String key;
final BytesRef from, to;
public Range(String key, BytesRef from, BytesRef to) {
this.key = key;
this.from = from;
this.to = to;
}
}
static final Comparator<Range> RANGE_COMPARATOR = (a, b) -> {
int cmp = compare(a.from, b.from, 1);
if (cmp == 0) {
cmp = compare(a.to, b.to, -1);
}
return cmp;
};
private static int compare(BytesRef a, BytesRef b, int m) {
return a == null
? b == null ? 0 : -m
: b == null ? m : a.compareTo(b);
}
final ValuesSource.Bytes valuesSource;
final DocValueFormat format;
final boolean keyed;
final Range[] ranges;
public BinaryRangeAggregator(String name, AggregatorFactories factories,
ValuesSource.Bytes valuesSource, DocValueFormat format,
List<Range> ranges, boolean keyed, SearchContext context,
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, context, parent, pipelineAggregators, metaData);
this.valuesSource = valuesSource;
this.format = format;
this.keyed = keyed;
this.ranges = ranges.toArray(new Range[0]);
Arrays.sort(this.ranges, RANGE_COMPARATOR);
}
@Override
public boolean needsScores() {
return (valuesSource != null && valuesSource.needsScores()) || super.needsScores();
}
@Override
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
if (valuesSource == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
SortedSetDocValues values = ((ValuesSource.Bytes.WithOrdinals) valuesSource).ordinalsValues(ctx);
return new SortedSetRangeLeafCollector(values, ranges, sub) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
collectBucket(sub, doc, bucket);
}
};
} else {
SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
return new SortedBinaryRangeLeafCollector(values, ranges, sub) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket)
throws IOException {
collectBucket(sub, doc, bucket);
}
};
}
}
abstract static class SortedSetRangeLeafCollector extends LeafBucketCollectorBase {
final long[] froms, tos, maxTos;
final SortedSetDocValues values;
final LeafBucketCollector sub;
SortedSetRangeLeafCollector(SortedSetDocValues values,
Range[] ranges, LeafBucketCollector sub) throws IOException {
super(sub, values);
for (int i = 1; i < ranges.length; ++i) {
if (RANGE_COMPARATOR.compare(ranges[i-1], ranges[i]) > 0) {
throw new IllegalArgumentException("Ranges must be sorted");
}
}
this.values = values;
this.sub = sub;
froms = new long[ranges.length];
tos = new long[ranges.length]; // inclusive
maxTos = new long[ranges.length];
for (int i = 0; i < ranges.length; ++i) {
if (ranges[i].from == null) {
froms[i] = 0;
} else {
froms[i] = values.lookupTerm(ranges[i].from);
if (froms[i] < 0) {
froms[i] = -1 - froms[i];
}
}
if (ranges[i].to == null) {
tos[i] = values.getValueCount() - 1;
} else {
long ord = values.lookupTerm(ranges[i].to);
if (ord < 0) {
tos[i] = -2 - ord;
} else {
tos[i] = ord - 1;
}
}
}
maxTos[0] = tos[0];
for (int i = 1; i < tos.length; ++i) {
maxTos[i] = Math.max(maxTos[i-1], tos[i]);
}
}
@Override
public void collect(int doc, long bucket) throws IOException {
if (values.advanceExact(doc)) {
int lo = 0;
for (long ord = values
.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
.nextOrd()) {
lo = collect(doc, ord, bucket, lo);
}
}
}
private int collect(int doc, long ord, long bucket, int lowBound) throws IOException {
int lo = lowBound, hi = froms.length - 1; // all candidates are between these indexes
int mid = (lo + hi) >>> 1;
while (lo <= hi) {
if (ord < froms[mid]) {
hi = mid - 1;
} else if (ord > maxTos[mid]) {
lo = mid + 1;
} else {
break;
}
mid = (lo + hi) >>> 1;
}
if (lo > hi) return lo; // no potential candidate
// binary search the lower bound
int startLo = lo, startHi = mid;
while (startLo <= startHi) {
final int startMid = (startLo + startHi) >>> 1;
if (ord > maxTos[startMid]) {
startLo = startMid + 1;
} else {
startHi = startMid - 1;
}
}
// binary search the upper bound
int endLo = mid, endHi = hi;
while (endLo <= endHi) {
final int endMid = (endLo + endHi) >>> 1;
if (ord < froms[endMid]) {
endHi = endMid - 1;
} else {
endLo = endMid + 1;
}
}
assert startLo == lowBound || ord > maxTos[startLo - 1];
assert endHi == froms.length - 1 || ord < froms[endHi + 1];
for (int i = startLo; i <= endHi; ++i) {
if (ord <= tos[i]) {
doCollect(sub, doc, bucket * froms.length + i);
}
}
return endHi + 1;
}
protected abstract void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException;
}
abstract static class SortedBinaryRangeLeafCollector extends LeafBucketCollectorBase {
final Range[] ranges;
final BytesRef[] maxTos;
final SortedBinaryDocValues values;
final LeafBucketCollector sub;
SortedBinaryRangeLeafCollector(SortedBinaryDocValues values,
Range[] ranges, LeafBucketCollector sub) {
super(sub, values);
for (int i = 1; i < ranges.length; ++i) {
if (RANGE_COMPARATOR.compare(ranges[i-1], ranges[i]) > 0) {
throw new IllegalArgumentException("Ranges must be sorted");
}
}
this.values = values;
this.sub = sub;
this.ranges = ranges;
maxTos = new BytesRef[ranges.length];
if (ranges.length > 0) {
maxTos[0] = ranges[0].to;
}
for (int i = 1; i < ranges.length; ++i) {
if (compare(ranges[i].to, maxTos[i-1], -1) >= 0) {
maxTos[i] = ranges[i].to;
} else {
maxTos[i] = maxTos[i-1];
}
}
}
@Override
public void collect(int doc, long bucket) throws IOException {
if (values.advanceExact(doc)) {
final int valuesCount = values.docValueCount();
for (int i = 0, lo = 0; i < valuesCount; ++i) {
final BytesRef value = values.nextValue();
lo = collect(doc, value, bucket, lo);
}
}
}
private int collect(int doc, BytesRef value, long bucket, int lowBound) throws IOException {
int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes
int mid = (lo + hi) >>> 1;
while (lo <= hi) {
if (compare(value, ranges[mid].from, 1) < 0) {
hi = mid - 1;
} else if (compare(value, maxTos[mid], -1) >= 0) {
lo = mid + 1;
} else {
break;
}
mid = (lo + hi) >>> 1;
}
if (lo > hi) return lo; // no potential candidate
// binary search the lower bound
int startLo = lo, startHi = mid;
while (startLo <= startHi) {
final int startMid = (startLo + startHi) >>> 1;
if (compare(value, maxTos[startMid], -1) >= 0) {
startLo = startMid + 1;
} else {
startHi = startMid - 1;
}
}
// binary search the upper bound
int endLo = mid, endHi = hi;
while (endLo <= endHi) {
final int endMid = (endLo + endHi) >>> 1;
if (compare(value, ranges[endMid].from, 1) < 0) {
endHi = endMid - 1;
} else {
endLo = endMid + 1;
}
}
assert startLo == lowBound || compare(value, maxTos[startLo - 1], -1) >= 0;
assert endHi == ranges.length - 1 || compare(value, ranges[endHi + 1].from, 1) < 0;
for (int i = startLo; i <= endHi; ++i) {
if (compare(value, ranges[i].to, -1) < 0) {
doCollect(sub, doc, bucket * ranges.length + i);
}
}
return endHi + 1;
}
protected abstract void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException;
}
@Override
public InternalAggregation buildAggregation(long bucket) throws IOException {
List<InternalBinaryRange.Bucket> buckets = new ArrayList<>(ranges.length);
for (int i = 0; i < ranges.length; ++i) {
long bucketOrd = bucket * ranges.length + i;
buckets.add(new InternalBinaryRange.Bucket(format, keyed,
ranges[i].key, ranges[i].from, ranges[i].to,
bucketDocCount(bucketOrd), bucketAggregations(bucketOrd)));
}
return new InternalBinaryRange(name, format, keyed, buckets, pipelineAggregators(), metaData());
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new InternalBinaryRange(name, format, keyed, emptyList(), pipelineAggregators(), metaData());
}
}