/**
* Copyright 2013-2014 Recruit Technologies Co., Ltd. and contributors
* (see CONTRIBUTORS.md)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. A copy of the
* License is distributed with this work in the LICENSE.md file. You may
* also obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gennai.gungnir.topology.udf;
import static org.gennai.gungnir.GungnirConst.*;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gennai.gungnir.tuple.Field;
import org.gennai.gungnir.tuple.GungnirTuple;
import com.google.common.collect.Maps;
@BaseFunction.Description(name = "regexp_extract")
public class RegexpExtract extends BaseFunction<String> {
private static final long serialVersionUID = SERIAL_VERSION_UID;
private Map<String, Pattern> patternsCache;
public RegexpExtract() {
}
private RegexpExtract(RegexpExtract c) {
super(c);
}
@Override
public RegexpExtract create(Object... parameters) throws ArgumentException {
if (parameters.length == 3) {
setParameters(parameters);
} else {
throw new ArgumentException("Incorrect number of arguments");
}
return this;
}
@Override
protected void prepare() {
patternsCache = Maps.newHashMap();
}
@Override
public String evaluate(GungnirTuple tuple) {
String subject = null;
if (getParameter(0) instanceof Field) {
Object value = ((Field) getParameter(0)).getValue(tuple);
if (value != null) {
subject = value.toString();
}
} else {
subject = getParameter(0).toString();
}
String pattern = null;
if (getParameter(1) instanceof Field) {
Object value = ((Field) getParameter(1)).getValue(tuple);
if (value != null) {
pattern = value.toString();
}
} else {
pattern = getParameter(1).toString();
}
Integer index = null;
if (getParameter(2) instanceof Field) {
Object value = ((Field) getParameter(2)).getValue(tuple);
if (value != null && value instanceof Integer) {
index = (Integer) value;
}
} else {
if (getParameter(2) != null && getParameter(2) instanceof Integer) {
index = (Integer) getParameter(2);
}
}
if (subject != null && pattern != null) {
Pattern p = patternsCache.get(pattern);
if (p == null) {
p = Pattern.compile(pattern);
patternsCache.put(pattern, p);
}
Matcher matcher = p.matcher(subject);
if (matcher.find()) {
if (index != null) {
if (matcher.groupCount() >= index) {
return matcher.group(index);
}
} else {
return matcher.group();
}
}
}
return null;
}
@Override
public RegexpExtract clone() {
return new RegexpExtract(this);
}
}