/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.engine.function.string; import org.apache.tajo.catalog.Column; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.function.GeneralFunction; import org.apache.tajo.engine.function.annotation.Description; import org.apache.tajo.engine.function.annotation.ParamTypes; import org.apache.tajo.storage.Tuple; /** * find_in_set(text,str_array) - Returns the first occurrence of str in str_array where str_array * is a comma-delimited string. * * Returns null if either argument is null. * Returns 0 if the first argument has any commas. * * Example: * SELECT find_in_set('cr','crt,c,cr,c,def') FROM src LIMIT 1;\n" * -> result: 3 */ @Description( functionName = "find_in_set", description = "Returns the first occurrence of str in str_array where str_array is a comma-delimited string", detail = "Returns null if either argument is null.\n" + "Returns 0 if the first argument has any commas.", example = "> SELECT find_in_set('cr','crt,c,cr,c,def');\n" + "3", returnType = TajoDataTypes.Type.INT4, paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.TEXT})} ) public class FindInSet extends GeneralFunction { public FindInSet() { super(new Column[]{ new Column("string", TajoDataTypes.Type.TEXT), new Column("str_array", TajoDataTypes.Type.TEXT) }); } @Override public Datum eval(Tuple params) { Datum finding = params.get(0); Datum textArray = params.get(1); if (finding instanceof NullDatum || textArray instanceof NullDatum) { return NullDatum.get(); } byte[] searchBytes = finding.asByteArray(); // Returns 0 if the first argument has any commas. for (int i = 0; i < finding.size(); i++) { if (searchBytes[i] == ',') { return DatumFactory.createInt4(0); } } byte[] arrayData = textArray.asByteArray(); int findingLength = finding.size(); int posInTextArray = 0; int curLengthOfCandidate = 0; boolean matching = true; for (int i = 0; i < textArray.size(); i++) { if (arrayData[i] == ',') { posInTextArray++; if (matching && curLengthOfCandidate == findingLength) { return DatumFactory.createInt4(posInTextArray); } else { matching = true; curLengthOfCandidate = 0; } } else { if (curLengthOfCandidate + 1 <= findingLength) { if (!matching || searchBytes[curLengthOfCandidate] != arrayData[i]) { matching = false; } } else { matching = false; } curLengthOfCandidate++; } } if (matching && curLengthOfCandidate == findingLength) { posInTextArray++; return DatumFactory.createInt4(posInTextArray); } else { return DatumFactory.createInt4(0); } } }