/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.dfs;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Stopwatch;
import com.google.common.base.Strings;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
/**
* Jackson serializable description of a file selection.
*/
public class FileSelection {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class);
private static final String PATH_SEPARATOR = System.getProperty("file.separator");
private static final String WILD_CARD = "*";
private List<FileStatus> statuses;
public List<String> files;
/**
* root path for the selections
*/
public final String selectionRoot;
/**
* root path for the metadata cache file (if any)
*/
public final String cacheFileRoot;
/**
* metadata context useful for metadata operations (if any)
*/
private MetadataContext metaContext = null;
private enum StatusType {
NOT_CHECKED, // initial state
NO_DIRS, // no directories in this selection
HAS_DIRS, // directories were found in the selection
EXPANDED_FULLY, // whether selection fully expanded to files
EXPANDED_PARTIAL // whether selection partially expanded to only directories (not files)
}
private StatusType dirStatus;
// whether this selection previously had a wildcard
private boolean hadWildcard = false;
// whether all partitions were previously pruned for this selection
private boolean wasAllPartitionsPruned = false;
/**
* Creates a {@link FileSelection selection} out of given file statuses/files and selection root.
*
* @param statuses list of file statuses
* @param files list of files
* @param selectionRoot root path for selections
*/
public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot) {
this(statuses, files, selectionRoot, null, false, StatusType.NOT_CHECKED);
}
public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot,
final String cacheFileRoot, final boolean wasAllPartitionsPruned) {
this(statuses, files, selectionRoot, cacheFileRoot, wasAllPartitionsPruned, StatusType.NOT_CHECKED);
}
public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot,
final String cacheFileRoot, final boolean wasAllPartitionsPruned, final StatusType dirStatus) {
this.statuses = statuses;
this.files = files;
this.selectionRoot = Preconditions.checkNotNull(selectionRoot);
this.dirStatus = dirStatus;
this.cacheFileRoot = cacheFileRoot;
this.wasAllPartitionsPruned = wasAllPartitionsPruned;
}
/**
* Copy constructor for convenience.
*/
protected FileSelection(final FileSelection selection) {
Preconditions.checkNotNull(selection, "selection cannot be null");
this.statuses = selection.statuses;
this.files = selection.files;
this.selectionRoot = selection.selectionRoot;
this.dirStatus = selection.dirStatus;
this.cacheFileRoot = selection.cacheFileRoot;
this.metaContext = selection.metaContext;
this.hadWildcard = selection.hadWildcard;
this.wasAllPartitionsPruned = selection.wasAllPartitionsPruned;
}
public String getSelectionRoot() {
return selectionRoot;
}
public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException {
Stopwatch timer = Stopwatch.createStarted();
if (statuses == null) {
final List<FileStatus> newStatuses = Lists.newArrayList();
for (final String pathStr:files) {
newStatuses.add(fs.getFileStatus(new Path(pathStr)));
}
statuses = newStatuses;
}
logger.info("FileSelection.getStatuses() took {} ms, numFiles: {}",
timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
return statuses;
}
public List<String> getFiles() {
if (files == null) {
final List<String> newFiles = Lists.newArrayList();
for (final FileStatus status:statuses) {
newFiles.add(status.getPath().toString());
}
files = newFiles;
}
return files;
}
public boolean containsDirectories(DrillFileSystem fs) throws IOException {
if (dirStatus == StatusType.NOT_CHECKED) {
dirStatus = StatusType.NO_DIRS;
for (final FileStatus status : getStatuses(fs)) {
if (status.isDirectory()) {
dirStatus = StatusType.HAS_DIRS;
break;
}
}
}
return dirStatus == StatusType.HAS_DIRS;
}
public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
if (isExpandedFully()) {
return this;
}
Stopwatch timer = Stopwatch.createStarted();
final List<FileStatus> statuses = getStatuses(fs);
final int total = statuses.size();
final Path[] paths = new Path[total];
for (int i=0; i<total; i++) {
paths[i] = statuses.get(i).getPath();
}
final List<FileStatus> allStats = fs.list(true, paths);
final List<FileStatus> nonDirectories = Lists.newArrayList(Iterables.filter(allStats, new Predicate<FileStatus>() {
@Override
public boolean apply(@Nullable FileStatus status) {
return !status.isDirectory();
}
}));
final FileSelection fileSel = create(nonDirectories, null, selectionRoot);
logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}",
timer.elapsed(TimeUnit.MILLISECONDS), total);
// fileSel will be null if we query an empty folder
if (fileSel != null) {
fileSel.setExpandedFully();
}
return fileSel;
}
public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
return getStatuses(fs).get(0);
}
public void setExpandedFully() {
this.dirStatus = StatusType.EXPANDED_FULLY;
}
public boolean isExpandedFully() {
return dirStatus == StatusType.EXPANDED_FULLY;
}
public void setExpandedPartial() {
this.dirStatus = StatusType.EXPANDED_PARTIAL;
}
public boolean isExpandedPartial() {
return dirStatus == StatusType.EXPANDED_PARTIAL;
}
public StatusType getDirStatus() {
return dirStatus;
}
public boolean wasAllPartitionsPruned() {
return this.wasAllPartitionsPruned;
}
private static String commonPath(final List<FileStatus> statuses) {
if (statuses == null || statuses.isEmpty()) {
return "";
}
final List<String> files = Lists.newArrayList();
for (final FileStatus status : statuses) {
files.add(status.getPath().toString());
}
return commonPathForFiles(files);
}
/**
* Returns longest common path for the given list of files.
*
* @param files list of files.
* @return longest common path
*/
private static String commonPathForFiles(final List<String> files) {
if (files == null || files.isEmpty()) {
return "";
}
final int total = files.size();
final String[][] folders = new String[total][];
int shortest = Integer.MAX_VALUE;
for (int i = 0; i < total; i++) {
final Path path = new Path(files.get(i));
folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR);
shortest = Math.min(shortest, folders[i].length);
}
int latest;
out:
for (latest = 0; latest < shortest; latest++) {
final String current = folders[0][latest];
for (int i = 1; i < folders.length; i++) {
if (!current.equals(folders[i][latest])) {
break out;
}
}
}
final Path path = new Path(files.get(0));
final URI uri = path.toUri();
final String pathString = buildPath(folders[0], latest);
return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString();
}
private static String buildPath(final String[] path, final int folderIndex) {
final StringBuilder builder = new StringBuilder();
for (int i=0; i<folderIndex; i++) {
builder.append(path[i]).append(PATH_SEPARATOR);
}
builder.deleteCharAt(builder.length()-1);
return builder.toString();
}
public static FileSelection create(final DrillFileSystem fs, final String parent, final String path) throws IOException {
Stopwatch timer = Stopwatch.createStarted();
boolean hasWildcard = path.contains(WILD_CARD);
final Path combined = new Path(parent, removeLeadingSlash(path));
final FileStatus[] statuses = fs.globStatus(combined); // note: this would expand wildcards
if (statuses == null) {
return null;
}
final FileSelection fileSel = create(Lists.newArrayList(statuses), null, combined.toUri().toString());
logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
if (fileSel == null) {
return null;
}
fileSel.setHadWildcard(hasWildcard);
return fileSel;
}
/**
* Creates a {@link FileSelection selection} with the given file statuses/files and selection root.
*
* @param statuses list of file statuses
* @param files list of files
* @param root root path for selections
* @param cacheFileRoot root path for metadata cache (null for no metadata cache)
* @return null if creation of {@link FileSelection} fails with an {@link IllegalArgumentException}
* otherwise a new selection.
*
* @see FileSelection#FileSelection(List, List, String)
*/
public static FileSelection create(final List<FileStatus> statuses, final List<String> files, final String root,
final String cacheFileRoot, final boolean wasAllPartitionsPruned) {
final boolean bothNonEmptySelection = (statuses != null && statuses.size() > 0) && (files != null && files.size() > 0);
final boolean bothEmptySelection = (statuses == null || statuses.size() == 0) && (files == null || files.size() == 0);
if (bothNonEmptySelection || bothEmptySelection) {
return null;
}
final String selectionRoot;
if (statuses == null || statuses.isEmpty()) {
selectionRoot = commonPathForFiles(files);
} else {
if (Strings.isNullOrEmpty(root)) {
throw new DrillRuntimeException("Selection root is null or empty" + root);
}
final Path rootPath = handleWildCard(root);
final URI uri = statuses.get(0).getPath().toUri();
final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath());
selectionRoot = path.toString();
}
return new FileSelection(statuses, files, selectionRoot, cacheFileRoot, wasAllPartitionsPruned);
}
public static FileSelection create(final List<FileStatus> statuses, final List<String> files, final String root) {
return FileSelection.create(statuses, files, root, null, false);
}
public static FileSelection createFromDirectories(final List<String> dirPaths, final FileSelection selection,
final String cacheFileRoot) {
Stopwatch timer = Stopwatch.createStarted();
final String root = selection.getSelectionRoot();
if (Strings.isNullOrEmpty(root)) {
throw new DrillRuntimeException("Selection root is null or empty" + root);
}
if (dirPaths == null || dirPaths.isEmpty()) {
throw new DrillRuntimeException("List of directories is null or empty");
}
List<String> dirs = Lists.newArrayList();
if (selection.hadWildcard()) { // for wildcard the directory list should have already been expanded
for (FileStatus status : selection.getFileStatuses()) {
dirs.add(status.getPath().toString());
}
} else {
for (String s : dirPaths) {
dirs.add(s);
}
}
final Path rootPath = handleWildCard(root);
// final URI uri = dirPaths.get(0).toUri();
final URI uri = selection.getFileStatuses().get(0).getPath().toUri();
final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath());
FileSelection fileSel = new FileSelection(null, dirs, path.toString(), cacheFileRoot, false);
fileSel.setHadWildcard(selection.hadWildcard());
logger.info("FileSelection.createFromDirectories() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
return fileSel;
}
private static Path handleWildCard(final String root) {
if (root.contains(WILD_CARD)) {
int idx = root.indexOf(WILD_CARD); // first wild card in the path
idx = root.lastIndexOf('/', idx); // file separator right before the first wild card
final String newRoot = root.substring(0, idx);
return new Path(newRoot);
} else {
return new Path(root);
}
}
private static String removeLeadingSlash(String path) {
if (path.charAt(0) == '/') {
String newPath = path.substring(1);
return removeLeadingSlash(newPath);
} else {
return path;
}
}
public List<FileStatus> getFileStatuses() {
return statuses;
}
public boolean supportDirPrunig() {
if (isExpandedFully() || isExpandedPartial()) {
if (!wasAllPartitionsPruned) {
return true;
}
}
return false;
}
public void setHadWildcard(boolean wc) {
this.hadWildcard = wc;
}
public boolean hadWildcard() {
return this.hadWildcard;
}
public String getCacheFileRoot() {
return cacheFileRoot;
}
public void setMetaContext(MetadataContext context) {
metaContext = context;
}
public MetadataContext getMetaContext() {
return metaContext;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("root=" + this.selectionRoot);
sb.append("files=[");
boolean isFirst = true;
for (final String file : this.files) {
if (isFirst) {
isFirst = false;
sb.append(file);
} else {
sb.append(",");
sb.append(file);
}
}
sb.append("]");
return sb.toString();
}
}