/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.hive.udf; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; import java.util.HashMap; /** * Compute (normalized) entropy over a series of values. * */ public final class UDAFCollectMap extends UDAF { /** * Note that this is only needed if the internal state cannot be represented * by a primitive. * * The internal state can also contains fields with types like * ArrayList<String> and HashMap<String,Double> if needed. */ public static class UDAFCollectMapState { private HashMap<String, String> elements; } /** * The actual class for doing the aggregation. Hive will automatically look * for all internal classes of the UDAF that implements UDAFEvaluator. */ public static class UDAFCollectMapEvaluator implements UDAFEvaluator { UDAFCollectMapState state; public UDAFCollectMapEvaluator() { super(); state = new UDAFCollectMapState(); init(); } /** * Reset the state of the aggregation. */ public void init() { state.elements = new HashMap<String, String>(); } /** * Iterate through one row of original data. * * The number and type of arguments need to the same as we call this UDAF * from Hive command line. * * This function should always return true. */ public boolean iterate(String key, String val) { if (key != null && val != null) { state.elements.put(key, val); } return true; } /** * Terminate a partial aggregation and return the state. If the state is a * primitive, just return primitive Java classes like Integer or String. */ public UDAFCollectMapState terminatePartial() { // Return null if we have no data. if (state.elements.size() == 0) { return null; } else { return state; } } /** * Merge with a partial aggregation. * * This function should always have a single argument which has the same * type as the return value of terminatePartial(). */ public boolean merge(UDAFCollectMapState o) { if (o != null) { state.elements.putAll(o.elements); } return true; } /** * Terminates the aggregation and return the final result. */ public HashMap<String, String> terminate() { return state.elements; } } private UDAFCollectMap() { // prevent instantiation } }