/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.filter.value;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import com.addthis.ahocorasick.AhoCorasick;
import com.addthis.ahocorasick.SearchResult;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.util.AutoParam;
import com.addthis.bundle.util.ConstantTypedField;
import com.addthis.bundle.util.TypedField;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.bundle.value.ValueObject;
import com.addthis.codec.annotations.Time;
import com.addthis.codec.codables.SuperCodable;
import com.addthis.hydra.data.util.JSONFetcher;
import com.fasterxml.jackson.annotation.JsonProperty;
abstract class AbstractMatchStringFilter extends AbstractValueFilterContextual implements SuperCodable {
/**
* The input must match exactly to an element in this set.
*/
private TypedField<Set<String>> value;
/**
* A URL to retrieve the 'value' field.
*/
final private String valueURL;
/**
* The input must match to one of the regular expressions in this set.
*/
private HashSet<String> match;
/**
* A URL to retrieve the 'match' field.
*/
final private String matchURL;
/**
* A substring of the input must match to one of the regular expressions in this set.
*/
private HashSet<String> find;
/**
* A URL to retrieve the 'find' field.
*/
final private String findURL;
/**
* A substring of the input must match exactly to an element of this set.
*/
private TypedField<Set<String>> contains;
/**
* A URL to retrieve the 'contains' field.
*/
final private String containsURL;
/**
* If true, then interpret the payload from the URLs as CSV files. Default is false.
*/
final private boolean urlReturnsCSV;
/**
* If true, then convert the input to lowercase. The filter output will be in lowercase.
* Default is false.
*/
final private boolean toLower;
/**
* A timeout value if any of the URL fields are used. Default is 60000.
*/
final private int urlTimeout;
/**
* The number of retries if any of the URL fields are used. Default is 5.
*/
final private int urlRetries;
final private int urlMinBackoff;
final private int urlMaxBackoff;
final private boolean not;
private ArrayList<Pattern> pattern;
private ArrayList<Pattern> findPattern;
private AhoCorasick containsDictionary;
AbstractMatchStringFilter(TypedField<Set<String>> value,
String valueURL,
HashSet<String> match,
String matchURL,
HashSet<String> find,
String findURL,
TypedField<Set<String>> contains,
String containsURL,
boolean urlReturnsCSV,
boolean toLower,
int urlTimeout,
int urlRetries,
int urlMinBackoff,
int urlMaxBackoff,
boolean not) {
this.value = value;
this.valueURL = valueURL;
this.match = match;
this.matchURL = matchURL;
this.find = find;
this.findURL = findURL;
this.contains = contains;
this.containsURL = containsURL;
this.urlReturnsCSV = urlReturnsCSV;
this.toLower = toLower;
this.urlTimeout = urlTimeout;
this.urlRetries = urlRetries;
this.urlMinBackoff = urlMinBackoff;
this.urlMaxBackoff = urlMaxBackoff;
this.not = not;
if (match != null) {
ArrayList<Pattern> np = new ArrayList<>();
for (String s : match) {
np.add(Pattern.compile(s));
}
this.pattern = np;
}
if (find != null) {
ArrayList<Pattern> np = new ArrayList<>();
for (String s : find) {
np.add(Pattern.compile(s));
}
this.findPattern = np;
}
}
public boolean passedMatch(String sv) {
// match regex
if (pattern != null) {
for (Pattern pat : pattern) {
if (pat.matcher(sv).matches()) {
return true;
}
}
}
return false;
}
public boolean passedContains(String sv, Bundle context) {
// match contains
if (containsDictionary != null) {
Iterator<SearchResult> matcher = containsDictionary.progressiveSearch(sv);
return matcher.hasNext();
} else if (contains != null) {
for (String search : contains.getValue(context)) {
if (sv.contains(search)) {
return true;
}
}
}
return false;
}
public boolean passedValue(String sv, Bundle context) {
// match exact values
return (value != null) && value.getValue(context).contains(sv);
}
public boolean passedFind(String sv) {
// match regex
if (findPattern != null) {
for (Pattern pat : findPattern) {
if (pat.matcher(sv).find()) {
return true;
}
}
}
return false;
}
@Override public void postDecode() {
if (valueURL != null) {
JSONFetcher.SetLoader loader = new JSONFetcher.SetLoader(valueURL)
.setContention(urlTimeout, urlRetries, urlMinBackoff, urlMaxBackoff);
if (urlReturnsCSV) {
loader.setCsv(true);
}
value = new ConstantTypedField<>(loader.load());
}
if (matchURL != null) {
JSONFetcher.SetLoader loader = new JSONFetcher.SetLoader(matchURL)
.setContention(urlTimeout, urlRetries, urlMinBackoff, urlMaxBackoff).setTarget(match);
if (urlReturnsCSV) {
loader.setCsv(true);
}
match = loader.load();
if (match != null) {
ArrayList<Pattern> np = new ArrayList<>();
for (String s : match) {
np.add(Pattern.compile(s));
}
this.pattern = np;
}
}
if (findURL != null) {
JSONFetcher.SetLoader loader = new JSONFetcher.SetLoader(findURL)
.setContention(urlTimeout, urlRetries, urlMinBackoff, urlMaxBackoff).setTarget(find);
if (urlReturnsCSV) {
loader.setCsv(true);
}
find = loader.load();
if (find != null) {
ArrayList<Pattern> np = new ArrayList<>();
for (String s : find) {
np.add(Pattern.compile(s));
}
this.findPattern = np;
}
}
if (containsURL != null) {
JSONFetcher.SetLoader loader = new JSONFetcher.SetLoader(containsURL)
.setContention(urlTimeout, urlRetries, urlMinBackoff, urlMaxBackoff);
if (urlReturnsCSV) {
loader.setCsv(true);
}
contains = new ConstantTypedField<>(loader.load());
}
if (contains instanceof Supplier) {
Set<String> candidates = ((Supplier<Set<String>>) contains).get();
if (candidates != null) {
containsDictionary = AhoCorasick.builder().build();
candidates.forEach(containsDictionary::add);
containsDictionary.prepare();
}
}
}
@Override public void preEncode() {}
private static final class ValidationOnly extends AbstractMatchStringFilter {
public ValidationOnly(@AutoParam @JsonProperty("value") TypedField<Set<String>> value,
@JsonProperty("valueURL") String valueURL,
@JsonProperty("match") HashSet<String> match,
@JsonProperty("matchURL") String matchURL,
@JsonProperty("find") HashSet<String> find,
@JsonProperty("findURL") String findURL,
@AutoParam @JsonProperty("contains") TypedField<Set<String>> contains,
@JsonProperty("containsURL") String containsURL,
@JsonProperty("urlReturnsCSV") boolean urlReturnsCSV,
@JsonProperty("toLower") boolean toLower,
@Time(TimeUnit.MILLISECONDS) @JsonProperty("urlTimeout") int urlTimeout,
@JsonProperty("urlRetries") int urlRetries,
@Time(TimeUnit.MILLISECONDS) @JsonProperty("urlMinBackoff") int urlMinBackoff,
@Time(TimeUnit.MILLISECONDS) @JsonProperty("urlMaxBackoff") int urlMaxBackoff) {
super(value,
valueURL,
match,
matchURL,
find,
findURL,
contains,
containsURL,
urlReturnsCSV,
toLower,
urlTimeout,
urlRetries,
urlMinBackoff,
urlMaxBackoff,
false);
}
@Override public void postDecode() {
// intentionally do nothing
}
@Nullable @Override public ValueObject filterValue(@Nullable ValueObject value, @Nullable Bundle context) {
throw new UnsupportedOperationException("This class is only intended for use in construction validation.");
}
}
@Nullable @Override public ValueObject filterValue(@Nullable ValueObject value, @Nullable Bundle context) {
String sv = (value == null) ? null : value.toString();
if (sv != null && (not || !sv.isEmpty())) {
if (toLower) {
sv = sv.toLowerCase();
value = ValueFactory.create(sv);
}
boolean success = passedMatch(sv) || passedContains(sv, context) || passedValue(sv, context) || passedFind(sv);
if (not) {
return success ? null : value;
} else {
return success ? value : null;
}
} else {
return value;
}
}
}