package org.gbif.occurrence.hive.udf;
import org.gbif.api.model.occurrence.Occurrence;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.occurrence.processor.guice.ApiClientConfiguration;
import org.gbif.occurrence.processor.interpreting.CoordinateInterpreter;
import org.gbif.occurrence.processor.interpreting.LocationInterpreter;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import com.beust.jcommander.internal.Lists;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* Reinterpret location (latitude, longitude, country) based on verbatim fields.
* This is used to test addition or changes to location interpretation algorithm.
*
*
*/
@Description(name = "reinterpretLocation", value = "_FUNC_(apiUrl, decimalLatitude, decimalLongitude, verbatimLatitude, verbatimLongitude, verbatimCoordinates, geodeticDatum, country, countrycode)")
public class ReinterpretLocationUDF extends GenericUDF {
private static final int argLength = 9;
private ObjectInspectorConverters.Converter[] converters;
private static final Logger LOG = LoggerFactory.getLogger(ReinterpretLocationUDF.class);
private LocationInterpreter locInterpreter;
private CoordinateInterpreter coordInterpreter;
private Object lock = new Object();
public LocationInterpreter getLocInterpreter(URI apiWs) {
init(apiWs);
return locInterpreter;
}
private void init(URI apiWs) {
if (locInterpreter == null) {
synchronized (lock) { // while we were waiting for the lock, another thread may have instantiated the object
if (locInterpreter == null) {
LOG.info("Create new coordinate & location interpreter using API at {}", apiWs);
ApiClientConfiguration cfg = new ApiClientConfiguration();
cfg.url = apiWs;
coordInterpreter = new CoordinateInterpreter(cfg.newApiClient());
locInterpreter = new LocationInterpreter(coordInterpreter);
}
}
}
}
public String getConvertArguments(int idx, DeferredObject[] arguments) throws HiveException {
return arguments[idx].get() == null ? null : converters[idx].convert(arguments[idx].get()).toString();
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
assert arguments.length == argLength;
List<Object> result = Lists.newArrayList(1);
URI api = URI.create(arguments[0].get().toString());
String latitude = getConvertArguments(1, arguments);
String longitude = getConvertArguments(2, arguments);
String verbatimLatitude = getConvertArguments(3, arguments);
String verbatimlLongitude = getConvertArguments(4, arguments);
String verbatimCoordinates = getConvertArguments(5, arguments);
String geodeticDatum = getConvertArguments(6, arguments);
String country = getConvertArguments(7, arguments);
String countryCode = getConvertArguments(8, arguments);
VerbatimOccurrence verbatim = new VerbatimOccurrence();
verbatim.setVerbatimField(DwcTerm.decimalLatitude, latitude);
verbatim.setVerbatimField(DwcTerm.decimalLongitude, longitude);
verbatim.setVerbatimField(DwcTerm.verbatimLatitude, verbatimLatitude);
verbatim.setVerbatimField(DwcTerm.verbatimLongitude, verbatimlLongitude);
verbatim.setVerbatimField(DwcTerm.verbatimCoordinates, verbatimCoordinates);
verbatim.setVerbatimField(DwcTerm.geodeticDatum, geodeticDatum);
verbatim.setVerbatimField(DwcTerm.country, country);
verbatim.setVerbatimField(DwcTerm.countryCode, countryCode);
Occurrence occ = new Occurrence(verbatim);
try {
getLocInterpreter(api).interpretLocation(verbatim, occ);
}
catch (Exception e){
//From VerbatimOccurrenceInterpreter: these interpreters throw a variety of runtime exceptions but should throw checked exceptions
}
result.add(occ.getDecimalLatitude());
result.add(occ.getDecimalLongitude());
if(occ.getCountry() != null){
result.add(occ.getCountry().getIso2LetterCode());
}
else{
result.add(null);
}
return result;
}
@Override
public String getDisplayString(String[] strings) {
assert strings.length == argLength;
return "reinterpretLocation(" + strings[0] + ", " + strings[1] + ", " + strings[2] + ", " + strings[3] +
", " + strings[4] + ", " + strings[5] + ", " + strings[6] + ", " + strings[7] + ", " + strings[8] + ')';
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != argLength) {
throw new UDFArgumentException("compareLocationInterpretation takes 9 arguments");
}
converters = new ObjectInspectorConverters.Converter[arguments.length];
for (int i = 0; i < arguments.length; i++) {
converters[i] = ObjectInspectorConverters
.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
return ObjectInspectorFactory
.getStandardStructObjectInspector(Arrays.asList("decimallatitude", "decimallongitude", "countrycode"), Arrays
.<ObjectInspector>asList(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
PrimitiveObjectInspectorFactory.javaStringObjectInspector));
}
}