/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* The utility class for UDAFMaxN and UDAFMinN.
*/
public final class UDAFExampleMaxMinNUtil {
/**
* This class stores the information during an aggregation.
*
* Note that this class has to have a public constructor, so that Hive can
* serialize/deserialize this class using reflection.
*/
public static class State {
ArrayList<Double> a; // This ArrayList holds the max/min N
int n; // This is the N
}
/**
* The base class of the UDAFEvaluator for UDAFMaxN and UDAFMinN.
* We just need to override the getAscending function to make it work.
*/
public abstract static class Evaluator implements UDAFEvaluator {
private State state;
public Evaluator() {
state = new State();
init();
}
/**
* Reset the state.
*/
public void init() {
state.a = new ArrayList<Double>();
state.n = 0;
}
/**
* Returns true in UDAFMaxN, and false in UDAFMinN.
*/
protected abstract boolean getAscending();
/**
* Iterate through one row of original data.
* This function will update the internal max/min buffer if the internal buffer is not full,
* or the new row is larger/smaller than the current max/min n.
*/
public boolean iterate(Double o, int n) {
boolean ascending = getAscending();
state.n = n;
if (o != null) {
boolean doInsert = state.a.size() < n;
if (!doInsert) {
Double last = state.a.get(state.a.size()-1);
if (ascending) {
doInsert = o < last;
} else {
doInsert = o > last;
}
}
if (doInsert) {
binaryInsert(state.a, o, ascending);
if (state.a.size() > n) {
state.a.remove(state.a.size()-1);
}
}
}
return true;
}
/**
* Get partial aggregation results.
*/
public State terminatePartial() {
// This is SQL standard - max_n of zero items should be null.
return state.a.size() == 0 ? null : state;
}
/** Two pointers are created to track the maximal elements in both o and MaxNArray.
* The smallest element is added into tempArrayList
* Consider the sizes of o and MaxNArray may be different.
*/
public boolean merge(State o) {
if (o != null) {
state.n = o.n;
state.a = sortedMerge(o.a, state.a, getAscending(), o.n);
}
return true;
}
/**
* Terminates the max N lookup and return the final result.
*/
public ArrayList<Double> terminate() {
// This is SQL standard - return state.MaxNArray, or null if the size is zero.
return state.a.size() == 0 ? null : state.a;
}
}
/**
* Returns a comparator based on whether the order is ascending or not.
* Has a dummy parameter to make sure generics can infer the type correctly.
*/
static <T extends Comparable<T>> Comparator<T> getComparator(boolean ascending, T dummy) {
Comparator<T> comp;
if (ascending) {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o1.compareTo(o2);
}
};
} else {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o2.compareTo(o1);
}
};
}
return comp;
}
/**
* Insert an element into an ascending/descending array, and keep the order.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*
*/
static <T extends Comparable<T>> void binaryInsert(List<T> list, T value, boolean ascending) {
int position = Collections.binarySearch(list, value, getComparator(ascending, (T)null));
if (position < 0) {
position = (-position) - 1;
}
list.add(position, value);
}
/**
* Merge two ascending/descending array and keep the first n elements.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*/
static <T extends Comparable<T>> ArrayList<T> sortedMerge(List<T> a1, List<T> a2,
boolean ascending, int n) {
Comparator<T> comparator = getComparator(ascending, (T)null);
int n1 = a1.size();
int n2 = a2.size();
int p1 = 0; // The current element in a1
int p2 = 0; // The current element in a2
ArrayList<T> output = new ArrayList<T>(n);
while (output.size() < n && (p1 < n1 || p2 < n2)) {
if (p1 < n1) {
if (p2 == n2 || comparator.compare(a1.get(p1), a2.get(p2)) < 0) {
output.add(a1.get(p1++));
}
}
if (output.size() == n) {
break;
}
if (p2 < n2) {
if (p1 == n1 || comparator.compare(a2.get(p2), a1.get(p1)) < 0) {
output.add(a2.get(p2++));
}
}
}
return output;
}
// No instantiation.
private UDAFExampleMaxMinNUtil() {
}
}