/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.engine.function.string;
import com.google.gson.annotations.Expose;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.engine.function.GeneralFunction;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.datum.*;
import org.apache.tajo.engine.function.annotation.Description;
import org.apache.tajo.engine.function.annotation.ParamTypes;
import org.apache.tajo.storage.Tuple;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.tajo.engine.eval.FunctionEval.ParamType;
/**
* This function is defined as:
* <pre>
* regexp_replace(string text, pattern text, replacement text [, flags text])
* </pre>
*
* flags is not supported yet.
*/
@Description(
functionName = "regexp_replace",
description = " Replace substring(s) matching a POSIX regular expression.",
example = "> SELECT regexp_replace('Thomas', '.[mN]a.', 'M');\n"
+ "ThM",
returnType = TajoDataTypes.Type.TEXT,
paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT,
TajoDataTypes.Type.TEXT,TajoDataTypes.Type.TEXT})}
)
public class RegexpReplace extends GeneralFunction {
@Expose protected boolean isPatternConstant;
// transient variables
protected String pattern;
private boolean isAlwaysNull = false;
private BooleanDatum result;
protected Pattern compiled;
public RegexpReplace() {
super(new Column[] {
new Column("string", TajoDataTypes.Type.TEXT),
new Column("pattern", TajoDataTypes.Type.TEXT),
new Column("replacement", TajoDataTypes.Type.INT4),
new Column("flags", TajoDataTypes.Type.INT4), // it is not supported yet.
});
}
public void init(ParamType [] paramTypes) {
if (paramTypes[0] == ParamType.NULL || paramTypes[1] == ParamType.NULL || paramTypes[2] == ParamType.NULL) {
isAlwaysNull = true;
} else if (paramTypes[1] == ParamType.CONSTANT) {
isPatternConstant = true;
}
}
@Override
public Datum eval(Tuple params) {
Datum thisValue = params.get(0);
Datum thisPattern = params.get(1);
Datum thisReplacement = params.get(2);
boolean nullResult = isAlwaysNull
|| thisValue instanceof NullDatum
|| thisReplacement instanceof NullDatum
|| thisPattern instanceof NullDatum;
Pattern thisCompiled;
if (!nullResult) {
if (compiled != null) {
thisCompiled = compiled;
} else {
thisCompiled = Pattern.compile(thisPattern.asChars());
// if a regular expression pattern is a constant,
// it will be reused in every call
if (isPatternConstant) {
compiled = thisCompiled;
}
}
Matcher matcher = thisCompiled.matcher(thisValue.asChars());
String replacement = thisReplacement.asChars();
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, replacement);
}
matcher.appendTail(sb);
return DatumFactory.createText(sb.toString());
} else {
return NullDatum.get();
}
}
}