/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.task.source.bundleizer;
import javax.annotation.Nullable;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.util.AutoField;
import com.addthis.bundle.util.CachingField;
import com.addthis.bundle.util.NoopField;
import com.addthis.bundle.value.ValueFactory;
import com.google.common.collect.ImmutableList;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RegexBundleizer extends NewlineBundleizer {
private static final Logger log = LoggerFactory.getLogger(RegexBundleizer.class);
private static final Method GROUPS_METHOD = tryGetNamedGroupsMethod();
private static final Pattern TRYING_GROUPS_HEURISTIC = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]+)>");
private final ImmutableList<AutoField> fields;
private final Pattern regex;
public RegexBundleizer(@JsonProperty("fields") ImmutableList<AutoField> fields,
@JsonProperty("regex") Pattern regex) {
this.regex = regex;
Map<String, Integer> namedGroups = tryCallNamedGroups(regex);
if (namedGroups == null) {
if (estimateIfPatternHasNamedGroups(regex)) {
throw new IllegalArgumentException("Looks like named groups were used, but we can't support them");
} else if (fields.isEmpty()) {
throw new IllegalArgumentException("No fields were specified, and we can't support named groups");
} else {
this.fields = fields;
}
} else if (fields.isEmpty() == namedGroups.isEmpty()) {
throw new IllegalArgumentException("Must use (exactly one of) either named groups or fields");
} else if (!namedGroups.isEmpty()) {
int maxGroupIndex = namedGroups.values().stream().mapToInt(Integer::intValue).max().getAsInt() - 1;
AutoField[] fieldsFromGroups = new AutoField[maxGroupIndex + 1];
Arrays.fill(fieldsFromGroups, new NoopField());
namedGroups.forEach((key, value) -> fieldsFromGroups[value - 1] = new CachingField(key));
this.fields = ImmutableList.copyOf(fieldsFromGroups);
} else {
this.fields = fields;
}
}
@Override public Bundle bundleize(Bundle next, String line) {
Matcher lineMatcher = regex.matcher(line);
if (lineMatcher.matches()) {
for (int i = 0; i < fields.size(); i++) {
fields.get(i).setValue(next, ValueFactory.create(lineMatcher.group(i + 1)));
}
} else {
return null;
}
return next;
}
private static boolean estimateIfPatternHasNamedGroups(Pattern pattern) {
return TRYING_GROUPS_HEURISTIC.matcher(pattern.pattern()).find();
}
@Nullable private static Method tryGetNamedGroupsMethod() {
try {
Method namedGroupsMethod = Pattern.class.getDeclaredMethod("namedGroups");
namedGroupsMethod.setAccessible(true);
return namedGroupsMethod;
} catch (NoSuchMethodException | SecurityException ex) {
log.warn("Failed to reflect the Pattern.namedGroups method, so we cannot use them for field names.", ex);
return null;
}
}
@SuppressWarnings("unchecked") @Nullable private static Map<String, Integer> tryCallNamedGroups(Pattern pattern) {
if (GROUPS_METHOD != null) {
try {
return (Map<String, Integer>) GROUPS_METHOD.invoke(pattern);
} catch (ReflectiveOperationException ex) {
log.warn("Unexpected error invoking Pattern.namedGroups, so we cannot use them for field names.", ex);
}
}
return null;
}
}