/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.function;
import org.diqube.data.column.ColumnType;
import org.diqube.function.aggregate.result.IntermediaryResultValueIterator;
import org.diqube.function.aggregate.result.IntermediaryResultValueSink;
/**
* A function that aggregates values that have been grouped.
*
* <p>
* An aggregate function object is alive during the computation of one group, which means it is intended to hold a
* state. Additionally, it is required to calculate an {@link IntermediaryResult} from time to time (
* {@link #populateIntermediary()}) : The {@link AggregationFunction} is executed on each cluster node, where the
* {@link AggregationFunction} object on each node produces an {@link IntermediaryResult} which is passed on to the
* Query Master node. That node collects the {@link IntermediaryResult}s of all cluster nodes and needs to merge the
* results for equal groups (i.e. a group that had elements not only on one cluster node). This means that the Query
* Master will receive the {@link IntermediaryResult} objects and feed them into another instance of the same
* {@link AggregationFunction} to calculate the final result ({@link #calculate()}).
*
* <p>
* Implementing classes do not need to be thread-safe.
*
* <p>
* An aggregation function can have an optional input column name of a specific {@link ColumnType}. Which input column
* type is supported by a specific class is identified by {@link #getInputType()}. There can be multiple classes with
* the same function name, but different input column types.
*
* <p>
* In addition to that, there might be constant parameters provided to the aggregation function. These are typically the
* first parameters to the function in diql. That constant parameter always has to be of the same type as the input
* col-type to the aggregation function.
*
*
* @param <I>
* Input value type
* @param <M>
* Type of {@link IntermediaryResult} this aggregate calculates.
* @param <O>
* Result type of this function.
*
* @author Bastian Gloeckle
*/
public interface AggregationFunction<I, O> {
/**
* @return Name of the function, lowercase.
*/
public String getNameLowerCase();
/**
* Provide a specific constant parameter value to the function.
*
* @param idx
* Index of the parameter.
* @param value
* The parameter value. The value can be of the following types: {@link Long}, {@link Double}, {@link String}
* .
* @throws FunctionException
* if the value is not supported.
*/
public void provideConstantParameter(int idx, Object value) throws FunctionException;
/**
* Add actual values to the internal state of this function.
*
* <p>
* The values are not provided directly, but by an instance of a {@link ValueProvider}. The implementing class should
* call only that method of the ValueProvider which returns the minimum information needed by the function to proceed.
*
* <p>
* The provided valueProvider carries a flag if the set of values is the "last" set. If that flag is true, the
* {@link AggregationFunction} has to be able to provide its (final) result in both, the {@link #calculate()} and the
* {@link #populateIntermediary(IntermediaryResultValueSink)} functions. This is important for
* {@link AggregationFunction}s that cannot internally handle
* {@link #removeIntermediary(IntermediaryResultValueIterator)} calls nicely - be sure to populate the result data
* when all input data is consumed!
*
* @see #needsActualValues()
*/
public void addValues(ValueProvider<I> valueProvider);
/**
* Add intermediary values to the internal state of this instance.
*
* The values provided by the passed iterator have been created by
* {@link #populateIntermediary(IntermediaryResultValueSink)} of a potentially different instance of this
* {@link AggregationFunction} class before.
*/
public void addIntermediary(IntermediaryResultValueIterator intermediary);
/**
* Remove intermediary values to the internal state of this instance.
*
* <p>
* Note that this function does not have to be supported in an internally meaningful way. If the implementation is not
* capable of removing internal state, it can choose to send updates only after a single instance of the class has
* received all its input data (flag in {@link ValueProvider} at a call to {@link #addValues(ValueProvider)}).
*
* The values provided by the passed iterator have been created by
* {@link #populateIntermediary(IntermediaryResultValueSink)} of a potentially different instance of this
* {@link AggregationFunction} class before.
*/
public void removeIntermediary(IntermediaryResultValueIterator intermediary);
/**
* Populate a given instance of {@link IntermediaryResultValueSink} with the current internal state of this
* {@link AggregationFunction}.
*
* <p>
* Expect a {@link IntermediaryResultValueIterator} with the same value-ordering to be passed to
* {@link #addIntermediary(IntermediaryResultValueSink)} and/or
* {@link #removeIntermediary(IntermediaryResultValueSink)} on different instances of this {@link AggregationFunction}
* later on.
*
* @throws FunctionException
* If the intermediary cannot be calculated.
*/
public void populateIntermediary(IntermediaryResultValueSink res) throws FunctionException;
/**
* Calculate and return the final result.
*
* @throws FunctionException
* If the result cannot be calculated.
*/
public O calculate() throws FunctionException;
/**
* @return data type of the output of this function.
*/
public ColumnType getOutputType();
/**
* If this aggregation function needs an input column, this method returns the type of that input column. If this
* method does not need an input parameter, <code>null</code> must be returned.
*/
public ColumnType getInputType();
/**
* @return true if {@link #addValues(ValueProvider)} will call the {@link ValueProvider#getValues()} method, false if
* not.
*/
public boolean needsActualValues();
/**
* Provides values which the function needs in the {@link AggregationFunction#addValues(ValueProvider)} call.
*/
public static interface ValueProvider<I> {
/**
* Fully resolve the values being provided. This is the most expensive function.
*
* <p>
* This method must only be called by the implementation of {@link AggregationFunction} if
* {@link AggregationFunction#getInputType()} != null, as there needs to be an actual input column to resolve the
* column value IDs of.
*/
public I[] getValues();
/**
* Returns the number of values without resovling the values themselves.
*/
public long size();
/**
* @return <code>true</code> if the provided values are the last ones for this {@link AggregationFunction}, because
* after this set, all data has been processed. {@link AggregationFunction#addValues(ValueProvider)} will
* not be called again.
*/
public boolean isFinalSetOfValues();
}
}