/* * Licensed to CRATE Technology GmbH ("Crate") under one or more contributor * license agreements. See the NOTICE file distributed with this work for * additional information regarding copyright ownership. Crate licenses * this file to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial agreement. */ package io.crate.operation.scalar; import com.google.common.annotations.VisibleForTesting; import io.crate.metadata.*; import io.crate.data.Input; import io.crate.types.DataType; import io.crate.types.DataTypes; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.BytesRefs; import javax.annotation.Nonnull; import java.util.List; public class SubstrFunction extends Scalar<BytesRef, Object> { public static final String NAME = "substr"; private static final BytesRef EMPTY_BYTES_REF = new BytesRef(""); private FunctionInfo info; private SubstrFunction(FunctionInfo info) { this.info = info; } public static void register(ScalarFunctionModule module) { module.register(NAME, new Resolver()); } @Override public FunctionInfo info() { return info; } @Override public BytesRef evaluate(Input[] args) { assert args.length == 2 || args.length == 3 : "number of arguments must be 2 or 3"; final Object val = args[0].value(); if (val == null) { return null; } Number beginIdx = (Number) args[1].value(); if (beginIdx == null) { return null; } if (args.length == 3) { Number len = (Number) args[2].value(); if (len == null) { return null; } return evaluate(BytesRefs.toBytesRef(val), (beginIdx).intValue(), len.intValue()); } return evaluate(BytesRefs.toBytesRef(val), (beginIdx).intValue()); } private static BytesRef evaluate(@Nonnull BytesRef inputStr, int beginIdx) { final int startPos = Math.max(0, beginIdx - 1); if (startPos > inputStr.length - 1) { return EMPTY_BYTES_REF; } int endPos = inputStr.length; return substring(inputStr, startPos, endPos); } @VisibleForTesting static BytesRef evaluate(@Nonnull BytesRef inputStr, int beginIdx, int len) { final int startPos = Math.max(0, beginIdx - 1); if (startPos > inputStr.length - 1) { return EMPTY_BYTES_REF; } int endPos = inputStr.length; if (startPos + len < endPos) { endPos = startPos + len; } return substring(inputStr, startPos, endPos); } @VisibleForTesting static BytesRef substring(BytesRef utf8, int begin, int end) { int pos = utf8.offset; final int limit = pos + utf8.length; final byte[] bytes = utf8.bytes; int posBegin = pos; int codePointCount = 0; for (; pos < limit; codePointCount++) { if (codePointCount == begin) { posBegin = pos; } if (codePointCount == end) { break; } int v = bytes[pos] & 0xFF; if (v < /* 0xxx xxxx */ 0x80) { pos += 1; continue; } if (v >= /* 110x xxxx */ 0xc0) { if (v < /* 111x xxxx */ 0xe0) { pos += 2; continue; } if (v < /* 1111 xxxx */ 0xf0) { pos += 3; continue; } if (v < /* 1111 1xxx */ 0xf8) { pos += 4; continue; } // fallthrough, consider 5 and 6 byte sequences invalid. } // Anything not covered above is invalid UTF8. throw new IllegalArgumentException("substr: invalid UTF8 string found."); } // Check if we didn't go over the limit on the last character. if (pos > limit) throw new IllegalArgumentException("begin index must not be > end index"); return new BytesRef(bytes, posBegin, pos - posBegin); } private static class Resolver extends BaseFunctionResolver { protected Resolver() { super(Signature.numArgs(2, 3).and( Signature.withLenientVarArgs(Signature.ArgMatcher.STRING, Signature.ArgMatcher.NUMERIC))); } private static FunctionInfo createInfo(List<DataType> types) { return new FunctionInfo(new FunctionIdent(NAME, types), DataTypes.STRING); } @Override public FunctionImplementation getForTypes(List<DataType> dataTypes) throws IllegalArgumentException { return new SubstrFunction(createInfo(dataTypes)); } } }