/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.udf.generic; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.NUMERIC_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; /** * GenericUDFSubstringIndex. * */ @Description(name = "substring_index", value = "_FUNC_(str, delim, count) - Returns the substring from string str before count occurrences " + "of the delimiter delim.", extended = "If count is positive, everything to the left of the final delimiter (counting from the left) " + "is returned. If count is negative, everything to the right of the final delimiter " + "(counting from the right) is returned. Substring_index performs a case-sensitive match when searching " + "for delim.\n" + "Example:\n > SELECT _FUNC_('www.apache.org', '.', 2);\n 'www.apache'") public class GenericUDFSubstringIndex extends GenericUDF { private transient Converter[] converters = new Converter[3]; private transient PrimitiveCategory[] inputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); private transient String delimConst; private transient boolean isDelimConst; private transient Integer countConst; private transient boolean isCountConst; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { checkArgsSize(arguments, 3, 3); checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); checkArgPrimitive(arguments, 2); checkArgGroups(arguments, 0, inputTypes, STRING_GROUP); checkArgGroups(arguments, 1, inputTypes, STRING_GROUP); checkArgGroups(arguments, 2, inputTypes, NUMERIC_GROUP); obtainStringConverter(arguments, 0, inputTypes, converters); obtainStringConverter(arguments, 1, inputTypes, converters); obtainIntConverter(arguments, 2, inputTypes, converters); if (arguments[1] instanceof ConstantObjectInspector) { delimConst = getConstantStringValue(arguments, 1); isDelimConst = true; } if (arguments[2] instanceof ConstantObjectInspector) { countConst = getConstantIntValue(arguments, 2); isCountConst = true; } ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; return outputOI; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { // str String str = getStringValue(arguments, 0, converters); if (str == null) { return null; } if (str.length() == 0) { output.set(""); return output; } // delim String delim; if (isDelimConst) { delim = delimConst; } else { delim = getStringValue(arguments, 1, converters); } if (delim == null) { return null; } if (delim.length() == 0) { output.set(""); return output; } // count Integer countV; if (isCountConst) { countV = countConst; } else { countV = getIntValue(arguments, 2, converters); } if (countV == null) { return null; } int count = countV.intValue(); if (count == 0) { output.set(""); return output; } // get substring String res; if (count > 0) { int idx = StringUtils.ordinalIndexOf(str, delim, count); if (idx != -1) { res = str.substring(0, idx); } else { res = str; } } else { int idx = StringUtils.lastOrdinalIndexOf(str, delim, -count); if (idx != -1) { res = str.substring(idx + 1); } else { res = str; } } output.set(res); return output; } @Override public String getDisplayString(String[] children) { return getStandardDisplayString(getFuncName(), children); } @Override protected String getFuncName() { return "substring_index"; } }