/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.pipeline.movavg;
import org.elasticsearch.common.collect.EvictingQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramFactory;
import org.elasticsearch.search.aggregations.pipeline.BucketHelpers.GapPolicy;
import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.movavg.models.MovAvgModel;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static org.elasticsearch.search.aggregations.pipeline.BucketHelpers.resolveBucketValue;
public class MovAvgPipelineAggregator extends PipelineAggregator {
private final DocValueFormat formatter;
private final GapPolicy gapPolicy;
private final int window;
private MovAvgModel model;
private final int predict;
private final boolean minimize;
public MovAvgPipelineAggregator(String name, String[] bucketsPaths, DocValueFormat formatter, GapPolicy gapPolicy,
int window, int predict, MovAvgModel model, boolean minimize, Map<String, Object> metadata) {
super(name, bucketsPaths, metadata);
this.formatter = formatter;
this.gapPolicy = gapPolicy;
this.window = window;
this.model = model;
this.predict = predict;
this.minimize = minimize;
}
/**
* Read from a stream.
*/
public MovAvgPipelineAggregator(StreamInput in) throws IOException {
super(in);
formatter = in.readNamedWriteable(DocValueFormat.class);
gapPolicy = GapPolicy.readFrom(in);
window = in.readVInt();
predict = in.readVInt();
model = in.readNamedWriteable(MovAvgModel.class);
minimize = in.readBoolean();
}
@Override
public void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(formatter);
gapPolicy.writeTo(out);
out.writeVInt(window);
out.writeVInt(predict);
out.writeNamedWriteable(model);
out.writeBoolean(minimize);
}
@Override
public String getWriteableName() {
return MovAvgPipelineAggregationBuilder.NAME;
}
@Override
public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext reduceContext) {
InternalMultiBucketAggregation<? extends InternalMultiBucketAggregation, ? extends InternalMultiBucketAggregation.InternalBucket>
histo = (InternalMultiBucketAggregation<? extends InternalMultiBucketAggregation, ? extends
InternalMultiBucketAggregation.InternalBucket>) aggregation;
List<? extends InternalMultiBucketAggregation.InternalBucket> buckets = histo.getBuckets();
HistogramFactory factory = (HistogramFactory) histo;
List<Bucket> newBuckets = new ArrayList<>();
EvictingQueue<Double> values = new EvictingQueue<>(this.window);
Number lastValidKey = 0;
int lastValidPosition = 0;
int counter = 0;
// Do we need to fit the model parameters to the data?
if (minimize) {
assert (model.canBeMinimized());
model = minimize(buckets, histo, model);
}
for (InternalMultiBucketAggregation.InternalBucket bucket : buckets) {
Double thisBucketValue = resolveBucketValue(histo, bucket, bucketsPaths()[0], gapPolicy);
// Default is to reuse existing bucket. Simplifies the rest of the logic,
// since we only change newBucket if we can add to it
Bucket newBucket = bucket;
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
// Some models (e.g. HoltWinters) have certain preconditions that must be met
if (model.hasValue(values.size())) {
double movavg = model.next(values);
List<InternalAggregation> aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map((p) -> {
return (InternalAggregation) p;
}).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), movavg, formatter, new ArrayList<PipelineAggregator>(), metaData()));
newBucket = factory.createBucket(factory.getKey(bucket), bucket.getDocCount(), new InternalAggregations(aggs));
}
if (predict > 0) {
lastValidKey = factory.getKey(bucket);
lastValidPosition = counter;
}
values.offer(thisBucketValue);
}
counter += 1;
newBuckets.add(newBucket);
}
if (buckets.size() > 0 && predict > 0) {
double[] predictions = model.predict(values, predict);
for (int i = 0; i < predictions.length; i++) {
List<InternalAggregation> aggs;
Number newKey = factory.nextKey(lastValidKey);
if (lastValidPosition + i + 1 < newBuckets.size()) {
Bucket bucket = newBuckets.get(lastValidPosition + i + 1);
// Get the existing aggs in the bucket so we don't clobber data
aggs = StreamSupport.stream(bucket.getAggregations().spliterator(), false).map((p) -> {
return (InternalAggregation) p;
}).collect(Collectors.toList());
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList<PipelineAggregator>(), metaData()));
Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(aggs));
// Overwrite the existing bucket with the new version
newBuckets.set(lastValidPosition + i + 1, newBucket);
} else {
// Not seen before, create fresh
aggs = new ArrayList<>();
aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList<PipelineAggregator>(), metaData()));
Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(aggs));
// Since this is a new bucket, simply append it
newBuckets.add(newBucket);
}
lastValidKey = newKey;
}
}
return factory.createAggregation(newBuckets);
}
private MovAvgModel minimize(List<? extends InternalMultiBucketAggregation.InternalBucket> buckets,
MultiBucketsAggregation histo, MovAvgModel model) {
int counter = 0;
EvictingQueue<Double> values = new EvictingQueue<>(this.window);
double[] test = new double[window];
ListIterator<? extends InternalMultiBucketAggregation.InternalBucket> iter = buckets.listIterator(buckets.size());
// We have to walk the iterator backwards because we don't know if/how many buckets are empty.
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
test[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the test set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
//And do it again, for the train set. Unfortunately we have to fill an array and then
//fill an evicting queue backwards :(
counter = 0;
double[] train = new double[window];
while (iter.hasPrevious() && counter < window) {
Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);
if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
train[window - counter - 1] = thisBucketValue;
counter += 1;
}
}
// If we didn't fill the train set, we don't have enough data to minimize.
// Just return the model with the starting coef
if (counter < window) {
return model;
}
for (double v : train) {
values.add(v);
}
return SimulatedAnealingMinimizer.minimize(model, values, test);
}
}