/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.io;
import org.apache.flink.annotation.Internal;
import org.apache.flink.core.fs.Path;
import org.apache.flink.util.Preconditions;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Class for determining if a particular file should be included or excluded
* based on a set of include and exclude glob filters.
*
* Glob filter support the following expressions:
* <ul>
* <li>* - matches any number of any characters including none</li>
* <li>** - matches any file in all subdirectories</li>
* <li>? - matches any single character</li>
* <li>[abc] - matches one of the characters listed in a brackets</li>
* <li>[a-z] - matches one character from the range given in the brackets</li>
* </ul>
*
* <p> If does not match an include pattern it is excluded. If it matches and include
* pattern but also matches an exclude pattern it is excluded.
*
* <p> If no patterns are provided all files are included
*/
@Internal
public class GlobFilePathFilter extends FilePathFilter {
private static final long serialVersionUID = 1L;
private final List<String> includePatterns;
private final List<String> excludePatterns;
// Path matchers are not serializable so we are delaying their
// creation until they are used
private transient ArrayList<PathMatcher> includeMatchers;
private transient ArrayList<PathMatcher> excludeMatchers;
/**
* Constructor for GlobFilePathFilter that will match all files
*/
public GlobFilePathFilter() {
this(Collections.<String>emptyList(), Collections.<String>emptyList());
}
/**
* Constructor for GlobFilePathFilter
*
* @param includePatterns glob patterns for files to include
* @param excludePatterns glob patterns for files to exclude
*/
public GlobFilePathFilter(List<String> includePatterns, List<String> excludePatterns) {
this.includePatterns = Preconditions.checkNotNull(includePatterns);
this.excludePatterns = Preconditions.checkNotNull(excludePatterns);
}
private ArrayList<PathMatcher> buildPatterns(List<String> patterns) {
FileSystem fileSystem = FileSystems.getDefault();
ArrayList<PathMatcher> matchers = new ArrayList<>(patterns.size());
for (String patternStr : patterns) {
matchers.add(fileSystem.getPathMatcher("glob:" + patternStr));
}
return matchers;
}
@Override
public boolean filterPath(Path filePath) {
if (getIncludeMatchers().isEmpty() && getExcludeMatchers().isEmpty()) {
return false;
}
// compensate for the fact that Flink paths are slashed
final String path = filePath.hasWindowsDrive() ?
filePath.getPath().substring(1) :
filePath.getPath();
final java.nio.file.Path nioPath = Paths.get(path);
for (PathMatcher matcher : getIncludeMatchers()) {
if (matcher.matches(nioPath)) {
return shouldExclude(nioPath);
}
}
return true;
}
private ArrayList<PathMatcher> getIncludeMatchers() {
if (includeMatchers == null) {
includeMatchers = buildPatterns(includePatterns);
}
return includeMatchers;
}
private ArrayList<PathMatcher> getExcludeMatchers() {
if (excludeMatchers == null) {
excludeMatchers = buildPatterns(excludePatterns);
}
return excludeMatchers;
}
private boolean shouldExclude(java.nio.file.Path nioPath) {
for (PathMatcher matcher : getExcludeMatchers()) {
if (matcher.matches(nioPath)) {
return true;
}
}
return false;
}
}