/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.partition; import lombok.AccessLevel; import lombok.Builder; import lombok.Data; import lombok.Getter; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.Setter; import lombok.Singular; import java.io.IOException; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import com.beust.jcommander.internal.Maps; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import gobblin.data.management.copy.CopyEntity; import gobblin.data.management.copy.CopyableFile; import gobblin.dataset.Dataset; import gobblin.util.request_allocation.Request; import gobblin.util.request_allocation.Requestor; /** * A named subset of {@link File}s in a {@link Dataset}. (Useful for partitions, versions, etc.). * * The actual list of files in this {@link FileSet} is, in ideal circumstances, generated lazily. As such, the method * {@link #getFiles()} should only be called when the actual list of files is needed. */ @RequiredArgsConstructor(access = AccessLevel.PROTECTED) public abstract class FileSet<T extends CopyEntity> implements Request<FileSet<CopyEntity>> { /** * A builder for {@link StaticFileSet} provided for backwards compatibility. The output of this builder is not lazy. */ public static class Builder<T extends CopyEntity> { private final String name; private final List<T> files = Lists.newArrayList(); private final Dataset dataset; public Builder(String name, Dataset dataset) { if (name == null) { throw new RuntimeException("Name cannot be null."); } this.name = name; this.dataset = dataset; } public Builder<T> add(T t) { this.files.add(t); return this; } public Builder<T> add(Collection<T> collection) { this.files.addAll(collection); return this; } public FileSet<T> build() { return new StaticFileSet<>(this.name, this.dataset, this.files); } } @Getter @NonNull private final String name; @Getter private final Dataset dataset; private ImmutableList<T> generatedEntities; private long totalSize = -1; private int totalEntities = -1; @Setter @Getter private Requestor<FileSet<CopyEntity>> requestor; public ImmutableList<T> getFiles() { ensureFilesGenerated(); return this.generatedEntities; } public long getTotalSizeInBytes() { ensureStatsComputed(); return this.totalSize; } public int getTotalEntities() { ensureStatsComputed(); return this.totalEntities; } private void ensureFilesGenerated() { if (this.generatedEntities == null) { try { this.generatedEntities = ImmutableList.copyOf(generateCopyEntities()); } catch (Exception exc) { throw new RuntimeException("Failed to generate files for file set " + name, exc); } recomputeStats(); } } private void ensureStatsComputed() { ensureFilesGenerated(); if (this.totalEntities < 0 || this.totalSize < 0) { recomputeStats(); } } private void recomputeStats() { this.totalEntities = this.generatedEntities.size(); this.totalSize = 0; for (CopyEntity copyEntity : this.generatedEntities) { if (copyEntity instanceof CopyableFile) { this.totalSize += ((CopyableFile) copyEntity).getOrigin().getLen(); } } } /** * This method is called lazily when needed and only once, it is intended to do the heavy work of generating the * {@link CopyEntity}s. * @return The {@link Collection} of {@link CopyEntity}s in this file set. * @throws IOException */ protected abstract Collection<T> generateCopyEntities() throws IOException; @Override public String toString() { return this.dataset.datasetURN() + "@" + this.name; } }