/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.partition; import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import com.google.common.base.Function; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import gobblin.data.management.copy.CopyConfiguration; import gobblin.data.management.copy.CopyEntity; import gobblin.data.management.copy.CopyableDataset; import gobblin.data.management.copy.CopyableDatasetBase; import gobblin.data.management.copy.IterableCopyableDataset; import gobblin.data.management.copy.IterableCopyableDatasetImpl; import gobblin.data.management.copy.prioritization.PrioritizedCopyableDataset; import gobblin.util.request_allocation.PushDownRequestor; import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** * A wrapper around a {@link CopyableDatasetBase} that makes it a {@link PushDownRequestor} for prioritization. */ @Slf4j @AllArgsConstructor @Getter public class CopyableDatasetRequestor implements PushDownRequestor<FileSet<CopyEntity>> { @AllArgsConstructor public static class Factory implements Function<CopyableDatasetBase, CopyableDatasetRequestor> { private final FileSystem targetFs; private final CopyConfiguration copyConfiguration; private final Logger log; @Nullable @Override public CopyableDatasetRequestor apply(CopyableDatasetBase input) { IterableCopyableDataset iterableCopyableDataset; if (input instanceof IterableCopyableDataset) { iterableCopyableDataset = (IterableCopyableDataset) input; } else if (input instanceof CopyableDataset) { iterableCopyableDataset = new IterableCopyableDatasetImpl((CopyableDataset) input); } else { log.error(String.format("Cannot process %s, can only copy %s or %s.", input == null ? null : input.getClass().getName(), CopyableDataset.class.getName(), IterableCopyableDataset.class.getName())); return null; } return new CopyableDatasetRequestor(this.targetFs, this.copyConfiguration, iterableCopyableDataset); } } private final FileSystem targetFs; private final CopyConfiguration copyConfiguration; private final IterableCopyableDataset dataset; @Override public Iterator<FileSet<CopyEntity>> iterator() { try { return injectRequestor(this.dataset.getFileSetIterator(this.targetFs, this.copyConfiguration)); } catch (Throwable exc) { if (copyConfiguration.isAbortOnSingleDatasetFailure()) { throw new RuntimeException(String.format("Could not get FileSets for dataset %s", this.dataset.datasetURN()), exc); } log.error(String.format("Could not get FileSets for dataset %s. Skipping.", this.dataset.datasetURN()), exc); return Iterators.emptyIterator(); } } @Override public Iterator<FileSet<CopyEntity>> getRequests(Comparator<FileSet<CopyEntity>> prioritizer) throws IOException { if (this.dataset instanceof PrioritizedCopyableDataset) { return ((PrioritizedCopyableDataset) this.dataset) .getFileSetIterator(this.targetFs, this.copyConfiguration, prioritizer, this); } List<FileSet<CopyEntity>> entities = Lists.newArrayList(injectRequestor(this.dataset.getFileSetIterator(this.targetFs, this.copyConfiguration))); Collections.sort(entities, prioritizer); return entities.iterator(); } private Iterator<FileSet<CopyEntity>> injectRequestor(Iterator<FileSet<CopyEntity>> iterator) { return Iterators.transform(iterator, new Function<FileSet<CopyEntity>, FileSet<CopyEntity>>() { @Override public FileSet<CopyEntity> apply(FileSet<CopyEntity> input) { input.setRequestor(CopyableDatasetRequestor.this); return input; } }); } }