/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.generic;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
@Description(name = "character_length,char_length",
value = "_FUNC_(str | binary) - Returns the number of characters in str or binary data",
extended = "Example:\n"
+ " > SELECT _FUNC_('안녕하세요') FROM src LIMIT 1;\n" + " 5")
@VectorizedExpressions({StringLength.class})
public class GenericUDFCharacterLength extends GenericUDF {
private final IntWritable result = new IntWritable();
private transient PrimitiveObjectInspector argumentOI;
private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter;
private transient boolean isInputString;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentLengthException(
"CHARACTER_LENGTH requires 1 argument, got " + arguments.length);
}
if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentException(
"CHARACTER_LENGTH only takes primitive types, got " + argumentOI.getTypeName());
}
argumentOI = (PrimitiveObjectInspector) arguments[0];
stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI);
PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory();
ObjectInspector outputOI = null;
switch (inputType) {
case CHAR:
case VARCHAR:
case STRING:
isInputString = true;
break;
case BINARY:
isInputString = false;
break;
default:
throw new UDFArgumentException(
" CHARACTER_LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got "
+ inputType);
}
outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector;
return outputOI;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
byte[] data = null;
if (isInputString) {
String val = null;
if (arguments[0] != null) {
val = (String) stringConverter.convert(arguments[0].get());
}
if (val == null) {
return null;
}
data = val.getBytes();
} else {
BytesWritable val = null;
if (arguments[0] != null) {
val = (BytesWritable) arguments[0].get();
}
if (val == null) {
return null;
}
data = val.getBytes();
}
int len = 0;
for (int i = 0; i < data.length; i++) {
if (GenericUDFUtils.isUtfStartByte(data[i])) {
len++;
}
}
result.set(len);
return result;
}
@Override
public String getDisplayString(String[] children) {
return getStandardDisplayString("character_length", children);
}
}