/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy; import java.util.Map; import lombok.Builder; import lombok.Getter; import java.util.Comparator; import java.util.List; import java.util.Set; import java.util.TreeMap; import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import gobblin.data.management.partition.FileSet; import gobblin.source.workunit.WorkUnit; import lombok.extern.slf4j.Slf4j; /** * A {@link WorkUnit} container that is bounded, supports concurrent all-or-nothing addAll, and supports priority of * file sets, ie. attempting to add a file set with higher priority will automatically evict * lower priority {@link gobblin.data.management.partition.FileSet}s if necessary. * * <p> * File sets in {@link CopySource} are handled as {@link gobblin.data.management.partition.FileSet}, so this class uses a {@link gobblin.data.management.partition.FileSet} comparator * for priority. If fileSetA < fileSetB, then fileSetA has higher priority than fileSetB * (similar to {@link java.util.PriorityQueue}). * </p> */ @Slf4j class ConcurrentBoundedWorkUnitList { private final TreeMap<FileSet<CopyEntity>, List<WorkUnit>> workUnitsMap; @Getter private final Comparator<FileSet<CopyEntity>> comparator; private final int maxSize; private final int strictMaxSize; private int currentSize; /** Set to true the first time a file set is rejected (i.e. doesn't fit in the container) */ private boolean rejectedFileSet; private static class AugmentedComparator implements Comparator<FileSet<CopyEntity>> { private final Comparator<FileSet<CopyEntity>> userProvidedComparator; public AugmentedComparator(Comparator<FileSet<CopyEntity>> userProvidedComparator) { this.userProvidedComparator = userProvidedComparator; } @Override public int compare(FileSet<CopyEntity> p1, FileSet<CopyEntity> p2) { int userProvidedCompare = this.userProvidedComparator.compare(p1, p2); if (userProvidedCompare == 0) { int datasetCompare = p1.getDataset().datasetURN().compareTo(p2.getDataset().datasetURN()); if (datasetCompare == 0) { return p1.getName().compareTo(p2.getName()); } return datasetCompare; } return userProvidedCompare; } } /** * Creates a new {@link ConcurrentBoundedWorkUnitList}. * @param maxSize Maximum number of {@link WorkUnit}s to contain. * @param comparator {@link Comparator} for {@link gobblin.data.management.partition.FileSet}s to use for {@link gobblin.data.management.partition.FileSet} priority. * @param strictLimitMultiplier the list will only start rejecting {@link WorkUnit}s if its capacity exceeds * maxSize * strictLimitMultiplier. If this parameter is < 1, it will be auto-set to 1. */ @Builder public ConcurrentBoundedWorkUnitList(int maxSize, final Comparator<FileSet<CopyEntity>> comparator, double strictLimitMultiplier) { this.currentSize = 0; this.maxSize = maxSize; double actualStrictLimitMultiplier = Math.min((Integer.MAX_VALUE / (double) this.maxSize), Math.max(1.0, strictLimitMultiplier)); this.strictMaxSize = (int) (this.maxSize * actualStrictLimitMultiplier); this.comparator = comparator == null ? new AllEqualComparator<FileSet<CopyEntity>>() : comparator; this.workUnitsMap = new TreeMap<>(new AugmentedComparator(this.comparator)); this.rejectedFileSet = false; } /** * Add a file set to the container. * @param fileSet File set, expressed as a {@link gobblin.data.management.partition.FileSet} of {@link CopyEntity}s. * @param workUnits List of {@link WorkUnit}s corresponding to this file set. * @return true if the file set was added to the container, false otherwise (i.e. has reached max size). */ public boolean addFileSet(FileSet<CopyEntity> fileSet, List<WorkUnit> workUnits) { boolean addedWorkunits = addFileSetImpl(fileSet, workUnits); if (!addedWorkunits) { this.rejectedFileSet = true; } return addedWorkunits; } private synchronized boolean addFileSetImpl(FileSet<CopyEntity> fileSet, List<WorkUnit> workUnits) { if (this.currentSize + workUnits.size() > this.strictMaxSize) { if (this.comparator.compare(this.workUnitsMap.lastKey(), fileSet) <= 0) { return false; } int tmpSize = this.currentSize; Set<FileSet<CopyEntity>> partitionsToDelete = Sets.newHashSet(); for (FileSet<CopyEntity> existingFileSet : this.workUnitsMap.descendingKeySet()) { if (this.comparator.compare(existingFileSet, fileSet) <= 0) { return false; } tmpSize -= this.workUnitsMap.get(existingFileSet).size(); partitionsToDelete.add(existingFileSet); if (tmpSize + workUnits.size() <= this.strictMaxSize) { break; } } for (FileSet<CopyEntity> fileSetToRemove : partitionsToDelete) { List<WorkUnit> workUnitsRemoved = this.workUnitsMap.remove(fileSetToRemove); this.currentSize -= workUnitsRemoved.size(); } } // TreeMap determines key equality using provided comparator. If multiple fileSets have same priority, we need // to concat their work units, otherwise only the last one will survive. Obviously, the comparator must be // transitive, but it need not be consistent with equals. if (!this.workUnitsMap.containsKey(fileSet)) { this.workUnitsMap.put(fileSet, workUnits); } else { this.workUnitsMap.get(fileSet).addAll(workUnits); } this.currentSize += workUnits.size(); log.info(String.format("Added %d work units to bounded list. Total size: %d, soft limit: %d, hard limit: %d.", workUnits.size(), this.currentSize, this.maxSize, this.strictMaxSize)); return true; } /** * @return Whether any calls to {@link #addFileSet} have returned false, i.e. some file set has been rejected due * to strict capacity issues. */ public boolean hasRejectedFileSet() { return this.rejectedFileSet; } /** * @return Whether the list has reached its max size. */ public synchronized boolean isFull() { return this.currentSize >= this.maxSize; } /** * Get the {@link List} of {@link WorkUnit}s in this container. */ public List<WorkUnit> getWorkUnits() { ImmutableList.Builder<WorkUnit> allWorkUnits = ImmutableList.builder(); for (List<WorkUnit> workUnits : this.workUnitsMap.values()) { allWorkUnits.addAll(workUnits); } return allWorkUnits.build(); } /** * Get the raw map backing this object. */ public Map<FileSet<CopyEntity>, List<WorkUnit>> getRawWorkUnitMap() { return this.workUnitsMap; } }