package brickhouse.udf.collect;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
*/
/**
* Creates a session id for an index and a time stamp. Default session length is 30 minute = 1800000 milliseconds
*/
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.UUID;
@Description(
name = "sessionize",
value = "_FUNC_(string, timestamp) - Returns a session id for the given id and ts(long). Optional third parameter to specify interval tolerance in milliseconds",
extended = "SELECT _FUNC_(uid, ts), uid, ts, event_type from foo;")
public class SessionizeUDF extends UDF {
private String lastUid = null;
private long lastTS = 0;
private String lastUUID = null;
public String evaluate(String uid, long ts, int tolerance) {
if (uid.equals(lastUid) && timeStampCompare(lastTS, ts, tolerance)) {
lastTS = ts;
} else if (uid.equals(lastUid)) {
lastTS = ts;
lastUUID = UUID.randomUUID().toString();
} else {
lastUid = uid;
lastTS = ts;
lastUUID = UUID.randomUUID().toString();
}
return lastUUID;
}
public String evaluate(String uid, long ts) {
return evaluate(uid, ts, 1800000);
}
private Boolean timeStampCompare(long lastTS, long ts, int ms) {
try {
long difference = ts - lastTS;
return (Math.abs((int) difference) < ms) ? true : false;
} catch (ArithmeticException e) {
return false;
}
}
}