/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.storage.log; import java.util.List; import java.util.Map.Entry; import com.google.common.base.Function; import com.google.common.collect.ListMultimap; import com.google.common.collect.Multimaps; import org.apache.aurora.codec.ThriftBinaryCodec.CodingException; import org.apache.aurora.common.inject.TimedInterceptor.Timed; import org.apache.aurora.gen.AssignedTask; import org.apache.aurora.gen.ScheduledTask; import org.apache.aurora.gen.TaskConfig; import org.apache.aurora.gen.storage.DeduplicatedScheduledTask; import org.apache.aurora.gen.storage.DeduplicatedSnapshot; import org.apache.aurora.gen.storage.Snapshot; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.aurora.gen.AssignedTask._Fields.TASK; import static org.apache.aurora.gen.ScheduledTask._Fields.ASSIGNED_TASK; import static org.apache.aurora.gen.storage.Snapshot._Fields.TASKS; /** * Converter between denormalized storage Snapshots and de-duplicated snapshots. * * <p> * For information on the difference in the two formats see the documentation in storage.thrift. */ public interface SnapshotDeduplicator { /** * Convert a Snapshot to the deduplicated format. * * @param snapshot Snapshot to convert. * @return deduplicated snapshot. */ DeduplicatedSnapshot deduplicate(Snapshot snapshot); /** * Restore a deduplicated snapshot to its original denormalized form. * * @param snapshot Deduplicated snapshot to restore. * @return A full snapshot. * @throws CodingException when the input data is corrupt. */ Snapshot reduplicate(DeduplicatedSnapshot snapshot) throws CodingException; class SnapshotDeduplicatorImpl implements SnapshotDeduplicator { private static final Logger LOG = LoggerFactory.getLogger(SnapshotDeduplicatorImpl.class); private static final Function<ScheduledTask, TaskConfig> SCHEDULED_TO_CONFIG = task -> task.getAssignedTask().getTask(); private static ScheduledTask deepCopyWithoutTaskConfig(ScheduledTask scheduledTask) { ScheduledTask scheduledTaskCopy = new ScheduledTask(); for (ScheduledTask._Fields scheduledTaskField : ScheduledTask._Fields.values()) { if (scheduledTaskField == ASSIGNED_TASK) { AssignedTask assignedTask = scheduledTask.getAssignedTask(); AssignedTask assignedTaskCopy = new AssignedTask(); for (AssignedTask._Fields assignedTaskField : AssignedTask._Fields.values()) { // Copy all fields in AssignedTask except the TASK field. if (assignedTaskField != TASK && assignedTask.isSet(assignedTaskField)) { assignedTaskCopy.setFieldValue( assignedTaskField, assignedTask.getFieldValue(assignedTaskField)); } } scheduledTaskCopy.setAssignedTask(assignedTaskCopy); } else if (scheduledTask.isSet(scheduledTaskField)) { scheduledTaskCopy.setFieldValue( scheduledTaskField, scheduledTask.getFieldValue(scheduledTaskField)); } } return scheduledTaskCopy.deepCopy(); } // NOTE: We intentionally try to minimize the number of copies of the Snapshot#tasks field // we make. The simpler implementation of deepCopy followed by unsetTasks creates a // lot of GC pressure. private static Snapshot deepCopyWithoutTasks(Snapshot snapshot) { Snapshot snapshotCopy = new Snapshot(); for (Snapshot._Fields field : Snapshot._Fields.values()) { if (field != TASKS && snapshot.isSet(field)) { snapshotCopy.setFieldValue(field, snapshot.getFieldValue(field)); } } return snapshotCopy.deepCopy(); } @Override @Timed("snapshot_deduplicate") public DeduplicatedSnapshot deduplicate(Snapshot snapshot) { int numInputTasks = snapshot.getTasksSize(); LOG.info("Starting deduplication of a snapshot with {} tasks.", numInputTasks); DeduplicatedSnapshot deduplicatedSnapshot = new DeduplicatedSnapshot() .setPartialSnapshot(deepCopyWithoutTasks(snapshot)); // Nothing to do if we don't have any input tasks. if (!snapshot.isSetTasks()) { LOG.warn("Got snapshot with unset tasks field."); return deduplicatedSnapshot; } // Match each unique TaskConfig to its hopefully-multiple ScheduledTask owners. ListMultimap<TaskConfig, ScheduledTask> index = Multimaps.index( snapshot.getTasks(), SCHEDULED_TO_CONFIG); for (Entry<TaskConfig, List<ScheduledTask>> entry : Multimaps.asMap(index).entrySet()) { deduplicatedSnapshot.addToTaskConfigs(entry.getKey()); for (ScheduledTask scheduledTask : entry.getValue()) { deduplicatedSnapshot.addToPartialTasks(new DeduplicatedScheduledTask() .setPartialScheduledTask(deepCopyWithoutTaskConfig(scheduledTask)) .setTaskConfigId(deduplicatedSnapshot.getTaskConfigsSize() - 1)); } } int numOutputTasks = deduplicatedSnapshot.getTaskConfigsSize(); LOG.info(String.format( "Finished deduplicating snapshot. Deduplication ratio: %d/%d = %.2f%%.", numInputTasks, numOutputTasks, 100.0 * numInputTasks / numOutputTasks)); return deduplicatedSnapshot; } @Override @Timed("snapshot_reduplicate") public Snapshot reduplicate(DeduplicatedSnapshot deduplicatedSnapshot) throws CodingException { LOG.info("Starting reduplication."); Snapshot snapshot = new Snapshot(deduplicatedSnapshot.getPartialSnapshot()); if (!deduplicatedSnapshot.isSetTaskConfigs()) { LOG.warn("Got deduplicated snapshot with unset task configs."); return snapshot; } for (DeduplicatedScheduledTask partialTask : deduplicatedSnapshot.getPartialTasks()) { ScheduledTask scheduledTask = new ScheduledTask(partialTask.getPartialScheduledTask()); int taskConfigId = partialTask.getTaskConfigId(); TaskConfig config; try { config = deduplicatedSnapshot.getTaskConfigs().get(taskConfigId); } catch (IndexOutOfBoundsException e) { throw new CodingException( "DeduplicatedScheduledTask referenced invalid task index " + taskConfigId, e); } scheduledTask.getAssignedTask().setTask(config); snapshot.addToTasks(scheduledTask); } int numInputTasks = deduplicatedSnapshot.getTaskConfigsSize(); int numOutputTasks = snapshot.getTasksSize(); LOG.info(String.format( "Finished reduplicating snapshot. Compression ratio: %d/%d = %.2f%%.", numInputTasks, numOutputTasks, 100.0 * numInputTasks / numOutputTasks)); return snapshot; } } }