MovAvgPipelineAggregator.java example

Explorer
elassandra-master
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.search.aggregations.pipeline.movavg;

import com.google.common.base.Function;
import com.google.common.collect.EvictingQueue;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
import org.elasticsearch.search.aggregations.InternalAggregation.Type;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregator;
import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
import org.elasticsearch.search.aggregations.pipeline.BucketHelpers.GapPolicy;
import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregatorFactory;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregatorStreams;
import org.elasticsearch.search.aggregations.pipeline.movavg.models.MovAvgModel;
import org.elasticsearch.search.aggregations.pipeline.movavg.models.MovAvgModelStreams;
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams;
import org.joda.time.DateTime;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import static org.elasticsearch.common.util.CollectionUtils.eagerTransform;
import static org.elasticsearch.search.aggregations.pipeline.BucketHelpers.resolveBucketValue;

public class MovAvgPipelineAggregator extends PipelineAggregator {

    public final static Type TYPE = new Type("moving_avg");

    public final static PipelineAggregatorStreams.Stream STREAM = new PipelineAggregatorStreams.Stream() {
        @Override
        public MovAvgPipelineAggregator readResult(StreamInput in) throws IOException {
            MovAvgPipelineAggregator result = new MovAvgPipelineAggregator();
            result.readFrom(in);
            return result;
        }
    };

    public static void registerStreams() {
        PipelineAggregatorStreams.registerStream(STREAM, TYPE.stream());
    }

    private static final Function<Aggregation, InternalAggregation> FUNCTION = new Function<Aggregation, InternalAggregation>() {
        @Override
        public InternalAggregation apply(Aggregation input) {
            return (InternalAggregation) input;
        }
    };

    private ValueFormatter formatter;
    private GapPolicy gapPolicy;
    private int window;
    private MovAvgModel model;
    private int predict;
    private boolean minimize;

    public MovAvgPipelineAggregator() {
    }

    public MovAvgPipelineAggregator(String name, String[] bucketsPaths, ValueFormatter formatter, GapPolicy gapPolicy,
                         int window, int predict, MovAvgModel model, boolean minimize, Map<String, Object> metadata) {
        super(name, bucketsPaths, metadata);
        this.formatter = formatter;
        this.gapPolicy = gapPolicy;
        this.window = window;
        this.model = model;
        this.predict = predict;
        this.minimize = minimize;
    }

    @Override
    public Type type() {
        return TYPE;
    }

    @Override
    public InternalAggregation reduce(InternalAggregation aggregation, ReduceContext reduceContext) {
        InternalHistogram histo = (InternalHistogram) aggregation;
        List<? extends InternalHistogram.Bucket> buckets = histo.getBuckets();
        InternalHistogram.Factory<? extends InternalHistogram.Bucket> factory = histo.getFactory();

        List newBuckets = new ArrayList<>();
        EvictingQueue<Double> values = EvictingQueue.create(this.window);

        long lastValidKey = 0;
        int lastValidPosition = 0;
        int counter = 0;

        // Do we need to fit the model parameters to the data?
        if (minimize) {
            assert (model.canBeMinimized());
            model = minimize(buckets, histo, model);
        }

        for (InternalHistogram.Bucket bucket : buckets) {
            Double thisBucketValue = resolveBucketValue(histo, bucket, bucketsPaths()[0], gapPolicy);

            // Default is to reuse existing bucket.  Simplifies the rest of the logic,
            // since we only change newBucket if we can add to it
            InternalHistogram.Bucket newBucket = bucket;

            if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {

                // Some models (e.g. HoltWinters) have certain preconditions that must be met
                if (model.hasValue(values.size())) {
                    double movavg = model.next(values);

                    List<InternalAggregation> aggs = new ArrayList<>(eagerTransform(bucket.getAggregations().asList(), AGGREGATION_TRANFORM_FUNCTION));
                    aggs.add(new InternalSimpleValue(name(), movavg, formatter, new ArrayList<PipelineAggregator>(), metaData()));
                    newBucket = factory.createBucket(bucket.getKey(), bucket.getDocCount(), new InternalAggregations(
                            aggs), bucket.getKeyed(), bucket.getFormatter());
                }

                if (predict > 0) {
                    if (bucket.getKey() instanceof Number) {
                        lastValidKey  = ((Number) bucket.getKey()).longValue();
                    } else if (bucket.getKey() instanceof DateTime) {
                        lastValidKey = ((DateTime) bucket.getKey()).getMillis();
                    } else {
                        throw new AggregationExecutionException("Expected key of type Number or DateTime but got [" + lastValidKey + "]");
                    }
                    lastValidPosition = counter;
                }

                values.offer(thisBucketValue);
            }
            counter += 1;
            newBuckets.add(newBucket);

        }

        if (buckets.size() > 0 && predict > 0) {

            boolean keyed;
            ValueFormatter formatter;
            keyed = buckets.get(0).getKeyed();
            formatter = buckets.get(0).getFormatter();

            double[] predictions = model.predict(values, predict);
            for (int i = 0; i < predictions.length; i++) {

                List<InternalAggregation> aggs;
                long newKey = histo.getRounding().nextRoundingValue(lastValidKey);

                if (lastValidPosition + i + 1 < newBuckets.size()) {
                    InternalHistogram.Bucket bucket = (InternalHistogram.Bucket) newBuckets.get(lastValidPosition + i + 1);

                    // Get the existing aggs in the bucket so we don't clobber data
                    aggs = new ArrayList<>(eagerTransform(bucket.getAggregations().asList(), AGGREGATION_TRANFORM_FUNCTION));
                    aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList<PipelineAggregator>(), metaData()));

                    InternalHistogram.Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(
                            aggs), keyed, formatter);

                    // Overwrite the existing bucket with the new version
                    newBuckets.set(lastValidPosition + i + 1, newBucket);

                } else {
                    // Not seen before, create fresh
                    aggs = new ArrayList<>();
                    aggs.add(new InternalSimpleValue(name(), predictions[i], formatter, new ArrayList<PipelineAggregator>(), metaData()));

                    InternalHistogram.Bucket newBucket = factory.createBucket(newKey, 0, new InternalAggregations(
                            aggs), keyed, formatter);

                    // Since this is a new bucket, simply append it
                    newBuckets.add(newBucket);
                }
                lastValidKey = newKey;
            }
        }

        return factory.create(newBuckets, histo);
    }

    private MovAvgModel minimize(List<? extends InternalHistogram.Bucket> buckets, InternalHistogram histo, MovAvgModel model) {

        int counter = 0;
        EvictingQueue<Double> values = EvictingQueue.create(window);

        double[] test = new double[window];
        ListIterator<? extends InternalHistogram.Bucket> iter = buckets.listIterator(buckets.size());

        // We have to walk the iterator backwards because we don't know if/how many buckets are empty.
        while (iter.hasPrevious() && counter < window) {

            Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);

            if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
                test[window - counter - 1] = thisBucketValue;
                counter += 1;
            }
        }

        // If we didn't fill the test set, we don't have enough data to minimize.
        // Just return the model with the starting coef
        if (counter < window) {
            return model;
        }

        //And do it again, for the train set.  Unfortunately we have to fill an array and then
        //fill an evicting queue backwards :(

        counter = 0;
        double[] train = new double[window];

        while (iter.hasPrevious() && counter < window) {

            Double thisBucketValue = resolveBucketValue(histo, iter.previous(), bucketsPaths()[0], gapPolicy);

            if (!(thisBucketValue == null || thisBucketValue.equals(Double.NaN))) {
                train[window - counter - 1] = thisBucketValue;
                counter += 1;
            }
        }

        // If we didn't fill the train set, we don't have enough data to minimize.
        // Just return the model with the starting coef
        if (counter < window) {
            return model;
        }

        for (double v : train) {
            values.add(v);
        }

        return SimulatedAnealingMinimizer.minimize(model, values, test);
    }

    @Override
    public void doReadFrom(StreamInput in) throws IOException {
        formatter = ValueFormatterStreams.readOptional(in);
        gapPolicy = GapPolicy.readFrom(in);
        window = in.readVInt();
        predict = in.readVInt();
        model = MovAvgModelStreams.read(in);
        minimize = in.readBoolean();

    }

    @Override
    public void doWriteTo(StreamOutput out) throws IOException {
        ValueFormatterStreams.writeOptional(formatter, out);
        gapPolicy.writeTo(out);
        out.writeVInt(window);
        out.writeVInt(predict);
        model.writeTo(out);
        out.writeBoolean(minimize);

    }

    public static class Factory extends PipelineAggregatorFactory {

        private final ValueFormatter formatter;
        private GapPolicy gapPolicy;
        private int window;
        private MovAvgModel model;
        private int predict;
        private boolean minimize;

        public Factory(String name, String[] bucketsPaths, ValueFormatter formatter, GapPolicy gapPolicy,
                       int window, int predict, MovAvgModel model, boolean minimize) {
            super(name, TYPE.name(), bucketsPaths);
            this.formatter = formatter;
            this.gapPolicy = gapPolicy;
            this.window = window;
            this.model = model;
            this.predict = predict;
            this.minimize = minimize;
        }

        @Override
        protected PipelineAggregator createInternal(Map<String, Object> metaData) throws IOException {
            return new MovAvgPipelineAggregator(name, bucketsPaths, formatter, gapPolicy, window, predict, model, minimize, metaData);
        }

        @Override
        public void doValidate(AggregatorFactory parent, AggregatorFactory[] aggFactories,
                List<PipelineAggregatorFactory> pipelineAggregatoractories) {
            if (bucketsPaths.length != 1) {
                throw new IllegalStateException(PipelineAggregator.Parser.BUCKETS_PATH.getPreferredName()
                        + " must contain a single entry for aggregation [" + name + "]");
            }
            if (!(parent instanceof HistogramAggregator.Factory)) {
                throw new IllegalStateException("moving average aggregation [" + name
                        + "] must have a histogram or date_histogram as parent");
            } else {
                HistogramAggregator.Factory histoParent = (HistogramAggregator.Factory) parent;
                if (histoParent.minDocCount() != 0) {
                    throw new IllegalStateException("parent histogram of moving average aggregation [" + name
                            + "] must have min_doc_count of 0");
                }
            }
        }

    }
}