package brickhouse.udf.sketch;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import brickhouse.analytics.uniques.SketchSet;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import java.util.ArrayList;
import java.util.List;
/**
* UDF for combining two lists or two maps together,
* across multiple rows, ( in a grouping ),
* so that state can be store, and we can calculate
* things like "previous actors"
*/
@Description(name = "combine_previous_sketch",
value = "_FUNC_(grouping, map) - Returns a map of the combined keys of previous calls to this "
)
public class CombinePreviousSketchUDF extends GenericUDF {
private StringObjectInspector groupInspector;
private ListObjectInspector listInspector;
private MapObjectInspector mapInspector;
private String lastGrouping = null;
private SketchSet prevValue = new SketchSet();
public List evaluate(List<String> l1, List<String> l2) {
ArrayList newList = new ArrayList();
if (l1 != null && l1.size() > 0)
newList.addAll(l1);
if (l2 != null && l2.size() > 0)
newList.addAll(l2);
return newList;
}
@Override
public Object evaluate(DeferredObject[] args) throws HiveException {
String grouping = this.groupInspector.getPrimitiveJavaObject(args[0].get());
if (lastGrouping == null || !lastGrouping.equals(grouping)) {
lastGrouping = grouping;
prevValue = new SketchSet();
}
List<String> prevHashItems = prevValue.getMinHashItems();
List newList = listInspector.getList(args[1].get());
if (newList != null) {
for (Object strObj : newList) {
String str = ((StringObjectInspector) listInspector.getListElementObjectInspector()).getPrimitiveJavaObject(strObj);
prevValue.addItem(str);
}
}
return prevHashItems;
}
@Override
public String getDisplayString(String[] args) {
StringBuilder sb = new StringBuilder("combine_previous_sketch( ");
for (int i = 0; i < args.length - 1; ++i) {
sb.append(args[i]);
sb.append(",");
}
sb.append(args[args.length - 1]);
sb.append(")");
return sb.toString();
}
@Override
public ObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 2) {
throw new UDFArgumentException("Usage: combine_previous_sketch takes a grouping string, and a sketch_set");
}
///ObjectInspector first = ObjectInspectorUtils.getStandardObjectInspector(args[0] );
ObjectInspector first = args[0];
if ((first.getCategory() != Category.PRIMITIVE)
|| ((PrimitiveObjectInspector) first).getPrimitiveCategory() != PrimitiveCategory.STRING) {
throw new UDFArgumentException("Usage: combine_previous_sketch takes a grouping string, and a sketch_set");
} else {
groupInspector = (StringObjectInspector) first;
}
///ObjectInspector second = ObjectInspectorUtils.getStandardObjectInspector(args[1] );
ObjectInspector second = args[1];
Category category = second.getCategory();
if (category == Category.LIST) {
listInspector = (ListObjectInspector) second;
} else {
throw new UDFArgumentException(" combine_previous_sketch only takes sketch_sets.");
}
ListObjectInspector sketchListInspector = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return sketchListInspector;
}
}