/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.util;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import gobblin.configuration.State;
/**
* A utility class for filtering datasets through blacklist and whitelist.
*/
public class DatasetFilterUtils {
public static List<Pattern> getPatternList(State state, String propKey) {
return getPatternList(state, propKey, StringUtils.EMPTY);
}
public static List<Pattern> getPatternList(State state, String propKey, String def) {
List<String> list = state.getPropAsList(propKey, def);
return getPatternsFromStrings(list);
}
/**
* Convert a list of Strings to a list of Patterns.
*/
public static List<Pattern> getPatternsFromStrings(List<String> strings) {
List<Pattern> patterns = Lists.newArrayList();
for (String s : strings) {
patterns.add(Pattern.compile(s));
}
return patterns;
}
public static List<String> filter(List<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) {
List<String> result = Lists.newArrayList();
for (String topic : topics) {
if (survived(topic, blacklist, whitelist)) {
result.add(topic);
}
}
return result;
}
public static Set<String> filter(Set<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) {
Set<String> result = Sets.newHashSet();
for (String topic : topics) {
if (survived(topic, blacklist, whitelist)) {
result.add(topic);
}
}
return result;
}
/**
* A topic survives if (1) it doesn't match the blacklist, and
* (2) either whitelist is empty, or it matches the whitelist.
* Whitelist and blacklist use regex patterns (NOT glob patterns).
*/
public static boolean survived(String topic, List<Pattern> blacklist, List<Pattern> whitelist) {
if (stringInPatterns(topic, blacklist)) {
return false;
}
return (whitelist.isEmpty() || stringInPatterns(topic, whitelist));
}
/**
* Determines whether a string matches one of the regex patterns.
*/
public static boolean stringInPatterns(String s, List<Pattern> patterns) {
for (Pattern pattern : patterns) {
if (pattern.matcher(s).matches()) {
return true;
}
}
return false;
}
}