/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.contrib.udaf.example; import java.util.ArrayList; import java.util.Collections; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; /** * This is a simple UDAF that concatenates all arguments from different rows * into a single string. * * It should be very easy to follow and can be used as an example for writing * new UDAFs. * * Note that Hive internally uses a different mechanism (called GenericUDAF) to * implement built-in aggregation functions, which are harder to program but * more efficient. */ @Description(name = "example_group_concat", value = "_FUNC_(col) - Example UDAF that concatenates all arguments from different rows into a single string") public class UDAFExampleGroupConcat extends UDAF { /** * The actual class for doing the aggregation. Hive will automatically look * for all internal classes of the UDAF that implements UDAFEvaluator. */ public static class UDAFExampleGroupConcatEvaluator implements UDAFEvaluator { ArrayList<String> data; public UDAFExampleGroupConcatEvaluator() { super(); data = new ArrayList<String>(); } /** * Reset the state of the aggregation. */ public void init() { data.clear(); } /** * Iterate through one row of original data. * * This UDF accepts arbitrary number of String arguments, so we use * String[]. If it only accepts a single String, then we should use a single * String argument. * * This function should always return true. */ public boolean iterate(String[] o) { if (o != null) { StringBuilder sb = new StringBuilder(); for (String element : o) { sb.append(element); } data.add(sb.toString()); } return true; } /** * Terminate a partial aggregation and return the state. */ public ArrayList<String> terminatePartial() { return data; } /** * Merge with a partial aggregation. * * This function should always have a single argument which has the same * type as the return value of terminatePartial(). * * This function should always return true. */ public boolean merge(ArrayList<String> o) { if (o != null) { data.addAll(o); } return true; } /** * Terminates the aggregation and return the final result. */ public String terminate() { Collections.sort(data); StringBuilder sb = new StringBuilder(); for (int i = 0; i < data.size(); i++) { sb.append(data.get(i)); } return sb.toString(); } } }