/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.dfs; import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Stopwatch; import com.google.common.base.Strings; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; /** * Jackson serializable description of a file selection. */ public class FileSelection { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class); private static final String PATH_SEPARATOR = System.getProperty("file.separator"); private static final String WILD_CARD = "*"; private List<FileStatus> statuses; public List<String> files; /** * root path for the selections */ public final String selectionRoot; /** * root path for the metadata cache file (if any) */ public final String cacheFileRoot; /** * metadata context useful for metadata operations (if any) */ private MetadataContext metaContext = null; private enum StatusType { NOT_CHECKED, // initial state NO_DIRS, // no directories in this selection HAS_DIRS, // directories were found in the selection EXPANDED_FULLY, // whether selection fully expanded to files EXPANDED_PARTIAL // whether selection partially expanded to only directories (not files) } private StatusType dirStatus; // whether this selection previously had a wildcard private boolean hadWildcard = false; // whether all partitions were previously pruned for this selection private boolean wasAllPartitionsPruned = false; /** * Creates a {@link FileSelection selection} out of given file statuses/files and selection root. * * @param statuses list of file statuses * @param files list of files * @param selectionRoot root path for selections */ public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot) { this(statuses, files, selectionRoot, null, false, StatusType.NOT_CHECKED); } public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot, final String cacheFileRoot, final boolean wasAllPartitionsPruned) { this(statuses, files, selectionRoot, cacheFileRoot, wasAllPartitionsPruned, StatusType.NOT_CHECKED); } public FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot, final String cacheFileRoot, final boolean wasAllPartitionsPruned, final StatusType dirStatus) { this.statuses = statuses; this.files = files; this.selectionRoot = Preconditions.checkNotNull(selectionRoot); this.dirStatus = dirStatus; this.cacheFileRoot = cacheFileRoot; this.wasAllPartitionsPruned = wasAllPartitionsPruned; } /** * Copy constructor for convenience. */ protected FileSelection(final FileSelection selection) { Preconditions.checkNotNull(selection, "selection cannot be null"); this.statuses = selection.statuses; this.files = selection.files; this.selectionRoot = selection.selectionRoot; this.dirStatus = selection.dirStatus; this.cacheFileRoot = selection.cacheFileRoot; this.metaContext = selection.metaContext; this.hadWildcard = selection.hadWildcard; this.wasAllPartitionsPruned = selection.wasAllPartitionsPruned; } public String getSelectionRoot() { return selectionRoot; } public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException { Stopwatch timer = Stopwatch.createStarted(); if (statuses == null) { final List<FileStatus> newStatuses = Lists.newArrayList(); for (final String pathStr:files) { newStatuses.add(fs.getFileStatus(new Path(pathStr))); } statuses = newStatuses; } logger.info("FileSelection.getStatuses() took {} ms, numFiles: {}", timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size()); return statuses; } public List<String> getFiles() { if (files == null) { final List<String> newFiles = Lists.newArrayList(); for (final FileStatus status:statuses) { newFiles.add(status.getPath().toString()); } files = newFiles; } return files; } public boolean containsDirectories(DrillFileSystem fs) throws IOException { if (dirStatus == StatusType.NOT_CHECKED) { dirStatus = StatusType.NO_DIRS; for (final FileStatus status : getStatuses(fs)) { if (status.isDirectory()) { dirStatus = StatusType.HAS_DIRS; break; } } } return dirStatus == StatusType.HAS_DIRS; } public FileSelection minusDirectories(DrillFileSystem fs) throws IOException { if (isExpandedFully()) { return this; } Stopwatch timer = Stopwatch.createStarted(); final List<FileStatus> statuses = getStatuses(fs); final int total = statuses.size(); final Path[] paths = new Path[total]; for (int i=0; i<total; i++) { paths[i] = statuses.get(i).getPath(); } final List<FileStatus> allStats = fs.list(true, paths); final List<FileStatus> nonDirectories = Lists.newArrayList(Iterables.filter(allStats, new Predicate<FileStatus>() { @Override public boolean apply(@Nullable FileStatus status) { return !status.isDirectory(); } })); final FileSelection fileSel = create(nonDirectories, null, selectionRoot); logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}", timer.elapsed(TimeUnit.MILLISECONDS), total); // fileSel will be null if we query an empty folder if (fileSel != null) { fileSel.setExpandedFully(); } return fileSel; } public FileStatus getFirstPath(DrillFileSystem fs) throws IOException { return getStatuses(fs).get(0); } public void setExpandedFully() { this.dirStatus = StatusType.EXPANDED_FULLY; } public boolean isExpandedFully() { return dirStatus == StatusType.EXPANDED_FULLY; } public void setExpandedPartial() { this.dirStatus = StatusType.EXPANDED_PARTIAL; } public boolean isExpandedPartial() { return dirStatus == StatusType.EXPANDED_PARTIAL; } public StatusType getDirStatus() { return dirStatus; } public boolean wasAllPartitionsPruned() { return this.wasAllPartitionsPruned; } private static String commonPath(final List<FileStatus> statuses) { if (statuses == null || statuses.isEmpty()) { return ""; } final List<String> files = Lists.newArrayList(); for (final FileStatus status : statuses) { files.add(status.getPath().toString()); } return commonPathForFiles(files); } /** * Returns longest common path for the given list of files. * * @param files list of files. * @return longest common path */ private static String commonPathForFiles(final List<String> files) { if (files == null || files.isEmpty()) { return ""; } final int total = files.size(); final String[][] folders = new String[total][]; int shortest = Integer.MAX_VALUE; for (int i = 0; i < total; i++) { final Path path = new Path(files.get(i)); folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR); shortest = Math.min(shortest, folders[i].length); } int latest; out: for (latest = 0; latest < shortest; latest++) { final String current = folders[0][latest]; for (int i = 1; i < folders.length; i++) { if (!current.equals(folders[i][latest])) { break out; } } } final Path path = new Path(files.get(0)); final URI uri = path.toUri(); final String pathString = buildPath(folders[0], latest); return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString(); } private static String buildPath(final String[] path, final int folderIndex) { final StringBuilder builder = new StringBuilder(); for (int i=0; i<folderIndex; i++) { builder.append(path[i]).append(PATH_SEPARATOR); } builder.deleteCharAt(builder.length()-1); return builder.toString(); } public static FileSelection create(final DrillFileSystem fs, final String parent, final String path) throws IOException { Stopwatch timer = Stopwatch.createStarted(); boolean hasWildcard = path.contains(WILD_CARD); final Path combined = new Path(parent, removeLeadingSlash(path)); final FileStatus[] statuses = fs.globStatus(combined); // note: this would expand wildcards if (statuses == null) { return null; } final FileSelection fileSel = create(Lists.newArrayList(statuses), null, combined.toUri().toString()); logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS)); if (fileSel == null) { return null; } fileSel.setHadWildcard(hasWildcard); return fileSel; } /** * Creates a {@link FileSelection selection} with the given file statuses/files and selection root. * * @param statuses list of file statuses * @param files list of files * @param root root path for selections * @param cacheFileRoot root path for metadata cache (null for no metadata cache) * @return null if creation of {@link FileSelection} fails with an {@link IllegalArgumentException} * otherwise a new selection. * * @see FileSelection#FileSelection(List, List, String) */ public static FileSelection create(final List<FileStatus> statuses, final List<String> files, final String root, final String cacheFileRoot, final boolean wasAllPartitionsPruned) { final boolean bothNonEmptySelection = (statuses != null && statuses.size() > 0) && (files != null && files.size() > 0); final boolean bothEmptySelection = (statuses == null || statuses.size() == 0) && (files == null || files.size() == 0); if (bothNonEmptySelection || bothEmptySelection) { return null; } final String selectionRoot; if (statuses == null || statuses.isEmpty()) { selectionRoot = commonPathForFiles(files); } else { if (Strings.isNullOrEmpty(root)) { throw new DrillRuntimeException("Selection root is null or empty" + root); } final Path rootPath = handleWildCard(root); final URI uri = statuses.get(0).getPath().toUri(); final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath()); selectionRoot = path.toString(); } return new FileSelection(statuses, files, selectionRoot, cacheFileRoot, wasAllPartitionsPruned); } public static FileSelection create(final List<FileStatus> statuses, final List<String> files, final String root) { return FileSelection.create(statuses, files, root, null, false); } public static FileSelection createFromDirectories(final List<String> dirPaths, final FileSelection selection, final String cacheFileRoot) { Stopwatch timer = Stopwatch.createStarted(); final String root = selection.getSelectionRoot(); if (Strings.isNullOrEmpty(root)) { throw new DrillRuntimeException("Selection root is null or empty" + root); } if (dirPaths == null || dirPaths.isEmpty()) { throw new DrillRuntimeException("List of directories is null or empty"); } List<String> dirs = Lists.newArrayList(); if (selection.hadWildcard()) { // for wildcard the directory list should have already been expanded for (FileStatus status : selection.getFileStatuses()) { dirs.add(status.getPath().toString()); } } else { for (String s : dirPaths) { dirs.add(s); } } final Path rootPath = handleWildCard(root); // final URI uri = dirPaths.get(0).toUri(); final URI uri = selection.getFileStatuses().get(0).getPath().toUri(); final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath()); FileSelection fileSel = new FileSelection(null, dirs, path.toString(), cacheFileRoot, false); fileSel.setHadWildcard(selection.hadWildcard()); logger.info("FileSelection.createFromDirectories() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS)); return fileSel; } private static Path handleWildCard(final String root) { if (root.contains(WILD_CARD)) { int idx = root.indexOf(WILD_CARD); // first wild card in the path idx = root.lastIndexOf('/', idx); // file separator right before the first wild card final String newRoot = root.substring(0, idx); return new Path(newRoot); } else { return new Path(root); } } private static String removeLeadingSlash(String path) { if (path.charAt(0) == '/') { String newPath = path.substring(1); return removeLeadingSlash(newPath); } else { return path; } } public List<FileStatus> getFileStatuses() { return statuses; } public boolean supportDirPrunig() { if (isExpandedFully() || isExpandedPartial()) { if (!wasAllPartitionsPruned) { return true; } } return false; } public void setHadWildcard(boolean wc) { this.hadWildcard = wc; } public boolean hadWildcard() { return this.hadWildcard; } public String getCacheFileRoot() { return cacheFileRoot; } public void setMetaContext(MetadataContext context) { metaContext = context; } public MetadataContext getMetaContext() { return metaContext; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("root=" + this.selectionRoot); sb.append("files=["); boolean isFirst = true; for (final String file : this.files) { if (isFirst) { isFirst = false; sb.append(file); } else { sb.append(","); sb.append(file); } } sb.append("]"); return sb.toString(); } }