/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.matrix.stats; import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.index.fielddata.NumericDoubleValues; import org.elasticsearch.search.MultiValueMode; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.metrics.MetricsAggregator; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.MultiValuesSource.NumericMultiValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.List; import java.util.Map; /** * Metric Aggregation for computing the pearson product correlation coefficient between multiple fields **/ public class MatrixStatsAggregator extends MetricsAggregator { /** Multiple ValuesSource with field names */ final NumericMultiValuesSource valuesSources; /** array of descriptive stats, per shard, needed to compute the correlation */ ObjectArray<RunningStats> stats; public MatrixStatsAggregator(String name, Map<String, ValuesSource.Numeric> valuesSources, SearchContext context, Aggregator parent, MultiValueMode multiValueMode, List<PipelineAggregator> pipelineAggregators, Map<String,Object> metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); if (valuesSources != null && !valuesSources.isEmpty()) { this.valuesSources = new NumericMultiValuesSource(valuesSources, multiValueMode); stats = context.bigArrays().newObjectArray(1); } else { this.valuesSources = null; } } @Override public boolean needsScores() { return (valuesSources == null) ? false : valuesSources.needsScores(); } @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { if (valuesSources == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); final NumericDoubleValues[] values = new NumericDoubleValues[valuesSources.fieldNames().length]; for (int i = 0; i < values.length; ++i) { values[i] = valuesSources.getField(i, ctx); } return new LeafBucketCollectorBase(sub, values) { final String[] fieldNames = valuesSources.fieldNames(); final double[] fieldVals = new double[fieldNames.length]; @Override public void collect(int doc, long bucket) throws IOException { // get fields if (includeDocument(doc) == true) { stats = bigArrays.grow(stats, bucket + 1); RunningStats stat = stats.get(bucket); // add document fields to correlation stats if (stat == null) { stat = new RunningStats(fieldNames, fieldVals); stats.set(bucket, stat); } else { stat.add(fieldNames, fieldVals); } } } /** * return a map of field names and data */ private boolean includeDocument(int doc) throws IOException { // loop over fields for (int i = 0; i < fieldVals.length; ++i) { final NumericDoubleValues doubleValues = values[i]; if (doubleValues.advanceExact(doc)) { final double value = doubleValues.doubleValue(); if (value == Double.NEGATIVE_INFINITY) { // TODO: Fix matrix stats to treat neg inf as any other value return false; } fieldVals[i] = value; } else { return false; } } return true; } }; } @Override public InternalAggregation buildAggregation(long bucket) { if (valuesSources == null || bucket >= stats.size()) { return buildEmptyAggregation(); } return new InternalMatrixStats(name, stats.size(), stats.get(bucket), null, pipelineAggregators(), metaData()); } @Override public InternalAggregation buildEmptyAggregation() { return new InternalMatrixStats(name, 0, null, null, pipelineAggregators(), metaData()); } @Override public void doClose() { Releasables.close(stats); } }