/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.generic;
import java.util.ArrayDeque;
import java.util.Deque;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationType;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@Description(name = "max", value = "_FUNC_(expr) - Returns the maximum value of expr")
public class GenericUDAFMax extends AbstractGenericUDAFResolver {
static final Logger LOG = LoggerFactory.getLogger(GenericUDAFMax.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Exactly one argument is expected.");
}
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
if (!ObjectInspectorUtils.compareSupported(oi)) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Cannot support comparison of map<> type or complex type containing map<>.");
}
return new GenericUDAFMaxEvaluator();
}
@UDFType(distinctLike=true)
public static class GenericUDAFMaxEvaluator extends GenericUDAFEvaluator {
private transient ObjectInspector inputOI;
private transient ObjectInspector outputOI;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
assert (parameters.length == 1);
super.init(m, parameters);
inputOI = parameters[0];
// Copy to Java object because that saves object creation time.
// Note that on average the number of copies is log(N) so that's not
// very important.
outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
ObjectInspectorCopyOption.JAVA);
return outputOI;
}
/** class for storing the current max value */
@AggregationType(estimable = true)
static class MaxAgg extends AbstractAggregationBuffer {
Object o;
@Override
public int estimate() {
return JavaDataModel.PRIMITIVES2;
}
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
MaxAgg result = new MaxAgg();
return result;
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
MaxAgg myagg = (MaxAgg) agg;
myagg.o = null;
}
boolean warned = false;
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
assert (parameters.length == 1);
merge(agg, parameters[0]);
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
return terminate(agg);
}
@Override
public void merge(AggregationBuffer agg, Object partial)
throws HiveException {
if (partial != null) {
MaxAgg myagg = (MaxAgg) agg;
int r = ObjectInspectorUtils.compare(myagg.o, outputOI, partial, inputOI);
if (myagg.o == null || r < 0) {
myagg.o = ObjectInspectorUtils.copyToStandardObject(partial, inputOI,
ObjectInspectorCopyOption.JAVA);
}
}
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
MaxAgg myagg = (MaxAgg) agg;
return myagg.o;
}
@Override
public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
return new MaxStreamingFixedWindow(this, wFrmDef);
}
}
/*
* Based on the Paper by Daniel Lemire: Streaming Max-Min filter using no more
* than 3 comparisons per elem.
*
* 1. His algorithm works on fixed size windows up to the current row. For row
* 'i' and window 'w' it computes the min/max for window (i-w, i). 2. The core
* idea is to keep a queue of (max, idx) tuples. A tuple in the queue
* represents the max value in the range (prev tuple.idx, idx). Using the
* queue data structure and following 2 operations it is easy to see that
* maxes can be computed: - on receiving the ith row; drain the queue from the
* back of any entries whose value is less than the ith entry; add the ith
* value as a tuple in the queue (i-val, i) - on the ith step, check if the
* element at the front of the queue has reached its max range of influence;
* i.e. frontTuple.idx + w > i. If yes we can remove it from the queue. - on
* the ith step o/p the front of the queue as the max for the ith entry.
*
* Here we modify the algorithm: 1. to handle window's that are of the form
* (i-p, i+f), where p is numPreceding,f = numFollowing - we start outputing
* rows only after receiving f rows. - the formula for 'influence range' of an
* idx accounts for the following rows. 2. optimize for the case when
* numPreceding is Unbounded. In this case only 1 max needs to be tarcked at
* any given time.
*/
static class MaxStreamingFixedWindow extends
GenericUDAFStreamingEvaluator<Object> {
class State extends GenericUDAFStreamingEvaluator<Object>.StreamingState {
private final Deque<Object[]> maxChain;
public State(AggregationBuffer buf) {
super(buf);
maxChain = new ArrayDeque<Object[]>(wFrameDef.isStartUnbounded() ? 1 : wFrameDef.getWindowSize());
}
@Override
public int estimate() {
if (!(wrappedBuf instanceof AbstractAggregationBuffer)) {
return -1;
}
int underlying = ((AbstractAggregationBuffer) wrappedBuf).estimate();
if (underlying == -1) {
return -1;
}
if (wFrameDef.isStartUnbounded()) {
return -1;
}
/*
* sz Estimate = sz needed by underlying AggBuffer + sz for results + sz
* for maxChain + 3 * JavaDataModel.PRIMITIVES1 sz of results = sz of
* underlying * wdwSz sz of maxChain = sz of underlying * wdwSz
*/
int wdwSz = wFrameDef.getWindowSize();
return underlying + (underlying * wdwSz) + (underlying * wdwSz)
+ (3 * JavaDataModel.PRIMITIVES1);
}
@Override
protected void reset() {
maxChain.clear();
super.reset();
}
}
public MaxStreamingFixedWindow(GenericUDAFEvaluator wrappedEval,
WindowFrameDef wFrmDef) {
super(wrappedEval, wFrmDef);
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
AggregationBuffer underlying = wrappedEval.getNewAggregationBuffer();
return new State(underlying);
}
protected ObjectInspector inputOI() {
return ((GenericUDAFMaxEvaluator) wrappedEval).inputOI;
}
protected ObjectInspector outputOI() {
return ((GenericUDAFMaxEvaluator) wrappedEval).outputOI;
}
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
State s = (State) agg;
Object o = parameters[0];
while (!s.maxChain.isEmpty()) {
if (!removeLast(o, s.maxChain.getLast()[0])) {
break;
} else {
s.maxChain.removeLast();
}
}
// We need to insert 'null' before processing first row for the case: X preceding and y preceding
if (s.numRows == 0) {
for (int i = wFrameDef.getEnd().getRelativeOffset(); i < 0; i++) {
s.results.add(null);
}
}
/*
* add row to chain. except in case of UNB preceding: - only 1 max needs
* to be tracked. - current max will never become out of range. It can
* only be replaced by a larger max.
*/
if (!wFrameDef.isStartUnbounded() || s.maxChain.isEmpty()) {
o = o == null ? null : ObjectInspectorUtils.copyToStandardObject(o,
inputOI(), ObjectInspectorCopyOption.JAVA);
s.maxChain.addLast(new Object[] { o, s.numRows });
}
if (s.hasResultReady()) {
s.results.add(s.maxChain.getFirst()[0]);
}
s.numRows++;
int fIdx = (Integer) s.maxChain.getFirst()[1];
if (!wFrameDef.isStartUnbounded()
&& s.numRows >= fIdx + wFrameDef.getWindowSize()) {
s.maxChain.removeFirst();
}
}
protected boolean removeLast(Object in, Object last) {
return isGreater(in, last);
}
private boolean isGreater(Object in, Object last) {
if (in == null) {
return false;
}
if (last == null) {
return true;
}
return ObjectInspectorUtils.compare(in, inputOI(), last, outputOI()) > 0;
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
State s = (State) agg;
Object[] r = s.maxChain.isEmpty() ? null : s.maxChain.getFirst();
// After all the rows are processed, continue to generate results for the rows that results haven't generated.
// For the case: X following and Y following, process first Y-X results and then insert X nulls.
// For the case X preceding and Y following, process Y results.
for (int i = Math.max(0, wFrameDef.getStart().getRelativeOffset()); i < wFrameDef.getEnd().getRelativeOffset(); i++) {
if (s.hasResultReady()) {
s.results.add(r == null ? null : r[0]);
}
s.numRows++;
if (r != null) {
int fIdx = (Integer) r[1];
if (!wFrameDef.isStartUnbounded()
&& s.numRows >= fIdx + wFrameDef.getWindowSize()
&& !s.maxChain.isEmpty()) {
s.maxChain.removeFirst();
r = !s.maxChain.isEmpty() ? s.maxChain.getFirst() : null;
}
}
}
for (int i = 0; i < wFrameDef.getStart().getRelativeOffset(); i++) {
if (s.hasResultReady()) {
s.results.add(null);
}
s.numRows++;
}
return null;
}
@Override
public int getRowsRemainingAfterTerminate() throws HiveException {
throw new UnsupportedOperationException();
}
}
}