package brickhouse.udf.date;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.joda.time.DateTime;
import org.joda.time.DurationFieldType;
import org.joda.time.format.DateTimeFormatter;
/**
* DateRange is a UDTF for generating days from start date to end_date,
* inclusively.
* <p/>
* This might be useful in cases where one needed to have a row for every day in
* the range ...
* <p/>
* select t1.id, t1.date, coalesce( t2.val, 0.0 ) as val from ( select id,
* rng.date from tab1 lateral view( date_range( tab1.start_date, tab1.end_date )
* ) rng as date, index ) t1 left outer join ( select val from tab2 ) t2 on (
* t1.id = t2.id and t1.date = t2.date );
*/
@Description(name = "date_range", value = "_FUNC_(a,b,c) - Generates a range of integers from a to b incremented by c"
+ " or the elements of a map into multiple rows and columns ")
public class DateRangeUDTF extends GenericUDTF {
private StringObjectInspector startInspector = null;
private StringObjectInspector endInspector = null;
private IntObjectInspector incrInspector = null;
private StringObjectInspector durationTypeInspector = null;
private StringObjectInspector dateFormatInspector = null;
// Unfortunately joda doesn't have anything a method where I can do
// plus(timeUnit, incr)
// In Java 8 we could use ChronoUnit but I don't want to add that dependency
// yet
static Map<String, DurationFieldType> durationTypes = new HashMap<String, DurationFieldType>();
static {
durationTypes.put("millis", DurationFieldType.millis());
durationTypes.put("seconds", DurationFieldType.seconds());
durationTypes.put("minutes", DurationFieldType.minutes());
durationTypes.put("hours", DurationFieldType.hours());
durationTypes.put("days", DurationFieldType.days());
durationTypes.put("months", DurationFieldType.months());
durationTypes.put("years", DurationFieldType.years());
}
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
String usage = "DateRange takes <startdate>, <enddate>, <optional increment>, <optional durationtype>, <optional dateformat>";
if ((argOIs.length < 2) || (argOIs.length > 5)) {
throw new UDFArgumentException(usage);
}
if (!(argOIs[0] instanceof StringObjectInspector)) {
throw new UDFArgumentException(usage);
} else {
this.startInspector = (StringObjectInspector) argOIs[0];
}
if (!(argOIs[1] instanceof StringObjectInspector)) {
throw new UDFArgumentException(usage);
} else {
this.endInspector = (StringObjectInspector) argOIs[1];
}
if (argOIs.length >= 3) {
if (!(argOIs[2] instanceof IntObjectInspector)) {
throw new UDFArgumentException(usage);
} else {
this.incrInspector = (IntObjectInspector) argOIs[2];
}
}
if (argOIs.length >= 4) {
if (!(argOIs[3] instanceof StringObjectInspector)) {
throw new UDFArgumentException(usage);
} else {
this.durationTypeInspector = (StringObjectInspector) argOIs[3];
}
}
if (argOIs.length >= 5) {
if (!(argOIs[4] instanceof StringObjectInspector)) {
throw new UDFArgumentException(usage);
} else {
this.dateFormatInspector = (StringObjectInspector) argOIs[4];
}
}
ArrayList<String> fieldNames = new ArrayList<String>();
fieldNames.add("date");
fieldNames.add("index");
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
private final Object[] forwardListObj = new Object[2];
@Override
public void process(Object[] args) throws HiveException {
String start = null;
String end = null;
int incr = 1;
DurationFieldType durationType = DurationFieldType.days();
DateTimeFormatter dateFormatter = org.joda.time.format.DateTimeFormat.forPattern("YYYYMMdd");
if (args.length >= 2) {
start = this.startInspector.getPrimitiveJavaObject(args[0]);
end = this.endInspector.getPrimitiveJavaObject(args[1]);
}
if (args.length >= 3) {
incr = this.incrInspector.get(args[2]);
}
if (args.length >= 4) {
String value = this.durationTypeInspector.getPrimitiveJavaObject(args[3]);
if ((value != null) && durationTypes.containsKey(value.toLowerCase())) {
durationType = durationTypes.get(value.toLowerCase());
}
}
if (args.length >= 5) {
dateFormatter = org.joda.time.format.DateTimeFormat.forPattern(this.dateFormatInspector
.getPrimitiveJavaObject(args[4]));
}
try {
DateTime startDt = dateFormatter.parseDateTime(start);
DateTime endDt = dateFormatter.parseDateTime(end);
int i = 0;
for (DateTime dt = startDt; dt.isBefore(endDt) || dt.isEqual(endDt); dt = dt.withFieldAdded(
durationType, incr), i++) {
this.forwardListObj[0] = dateFormatter.print(dt);
this.forwardListObj[1] = new Integer(i);
this.forward(this.forwardListObj);
}
} catch (IllegalArgumentException badFormat) {
throw new HiveException("Unable to parse dates; start = " + start + " ; end = " + end);
}
}
@Override
public void close() throws HiveException {
}
}