/* * Copyright 2013 Cloudera. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.filesystem; import com.google.common.base.Predicate; import org.apache.avro.Schema; import org.kitesdk.data.DatasetException; import org.kitesdk.data.impl.Accessor; import org.kitesdk.data.PartitionStrategy; import org.kitesdk.data.spi.StorageKey; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.Sets; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Set; class FileSystemPartitionIterator implements Iterator<StorageKey>, Iterable<StorageKey> { private static final Logger LOG = LoggerFactory.getLogger( FileSystemPartitionIterator.class); private final FileSystem fs; private final Path rootDirectory; private final Iterator<StorageKey> iterator; private final PartitionStrategy strategy; private final Schema schema; private final Predicate<StorageKey> predicate; private final MakePartialKey makeKey; class FileSystemIterator extends MultiLevelIterator<String> { public FileSystemIterator(int depth) throws IOException { super(depth); } @Override @SuppressWarnings({"unchecked", "deprecation"}) public Iterable<String> getLevel(List<String> current) { final Set<String> dirs = Sets.newLinkedHashSet(); Path dir = rootDirectory; for (int i = 0, n = current.size(); i < n; i += 1) { dir = new Path(dir, current.get(i)); } StorageKey key = makeKey.apply(dir); boolean applies = true; //if the key doesn't have any values then do not apply the predicate and assume it is ok. The official //predicate applied on the full path will exclude the details. if(key.size() > 0){ applies = predicate.apply(key); }else{ LOG.debug("Not applying predicate proactively because path {} does not have any key values.", dir); } if(applies) { try { for (FileStatus stat : fs.listStatus(dir, PathFilters.notHidden())) { if (stat.isDir()) { // TODO: add a check here for range.couldContain(Marker) dirs.add(stat.getPath().getName()); } } } catch (IOException ex) { throw new DatasetException("Cannot list directory:" + dir, ex); } }else{ LOG.debug("Skipping exploring {} path as it did not match the predicate {}", dir, predicate); } return dirs; } } /** * Conversion function to transform a List into a {@link StorageKey}. */ private static class MakeKey implements Function<List<String>, StorageKey> { private final StorageKey reusableKey; private final PathConversion convert; public MakeKey(PartitionStrategy strategy, Schema schema) { this.reusableKey = new StorageKey(strategy); this.convert = new PathConversion(schema); } @Override @SuppressWarnings("unchecked") @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="NP_PARAMETER_MUST_BE_NONNULL_BUT_MARKED_AS_NULLABLE", justification="Non-null @Nullable parameter inherited from Function") public StorageKey apply(List<String> dirs) { return reusableKey.reuseFor(dirs, convert); } public StorageKey apply(Path path) { return reusableKey.reuseFor(path, convert); } } /** * Function that will create a {@link StorageKey} based on the values available instead * of requiring the entire path to create a key. */ private static class MakePartialKey implements Function<List<String>, StorageKey> { private final StorageKey reusableKey; private final PathConversion convert; public MakePartialKey(Path rootPath, PartitionStrategy strategy, Schema schema) { this.reusableKey = new StorageKey(strategy); this.convert = new PartialPathConversion(rootPath, schema); } @Override @SuppressWarnings("unchecked") @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="NP_PARAMETER_MUST_BE_NONNULL_BUT_MARKED_AS_NULLABLE", justification="Non-null @Nullable parameter inherited from Function") public StorageKey apply(List<String> dirs) { return reusableKey.reuseFor(dirs, convert); } public StorageKey apply(Path path) { return reusableKey.reuseFor(path, convert); } } @SuppressWarnings("deprecation") FileSystemPartitionIterator( FileSystem fs, Path root, PartitionStrategy strategy, Schema schema, final Predicate<StorageKey> predicate) throws IOException { Preconditions.checkArgument(fs.isDirectory(root)); this.fs = fs; this.strategy = strategy; this.schema = schema; this.predicate = predicate; this.rootDirectory = root; this.makeKey = new MakePartialKey(rootDirectory, strategy, schema); this.iterator = Iterators.filter( Iterators.transform( new FileSystemIterator( Accessor.getDefault().getFieldPartitioners(strategy).size()), new MakeKey(strategy, schema)), predicate); } @Override public boolean hasNext() { return iterator.hasNext(); } @Override public StorageKey next() { return iterator.next(); } @Override public void remove() { iterator.remove(); } @Override public Iterator<StorageKey> iterator() { return this; } }