/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.lineage; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.metadata.lineage.CollapseType; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import org.apache.twill.api.RunId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Objects; import java.util.Set; /** * Collapses {@link Relation Relations} based on {@link CollapseType} */ public final class LineageCollapser { private LineageCollapser() { // cannot instantiate objects } private static final Logger LOG = LoggerFactory.getLogger(LineageCollapser.class); /** * Collapse {@link Relation}s based on {@link CollapseType} * @param relations lineage relations * @param collapseTypes fields to collapse relations on * @return collapsed relations */ public static Set<CollapsedRelation> collapseRelations(Iterable<Relation> relations, Set<CollapseType> collapseTypes) { Set<CollapsedRelation> collapsedRelations = new HashSet<>(); Multimap<CollapseKey, Relation> multimap = HashMultimap.create(); for (Relation relation : relations) { multimap.put(getCollapseKey(relation, collapseTypes), relation); } LOG.trace("Collapsed relations: {}", multimap.asMap()); for (Map.Entry<CollapseKey, Collection<Relation>> collapsedEntry : multimap.asMap().entrySet()) { Id.NamespacedId data = collapsedEntry.getKey().data; Id.Program program = collapsedEntry.getKey().program; Set<AccessType> accessTypes = new HashSet<>(); Set<RunId> runs = new HashSet<>(); Set<Id.NamespacedId> components = new HashSet<>(); for (Relation relation : collapsedEntry.getValue()) { accessTypes.add(relation.getAccess()); runs.add(relation.getRun()); components.addAll(relation.getComponents()); } collapsedRelations.add(toCollapsedRelation(data, program, accessTypes, runs, components)); } return collapsedRelations; } private static CollapseKey getCollapseKey(Relation relation, Set<CollapseType> collapseTypes) { CollapseKeyBuilder builder = new CollapseKeyBuilder(relation.getData(), relation.getProgram()); if (!collapseTypes.contains(CollapseType.ACCESS)) { builder.setAccess(relation.getAccess()); } if (!collapseTypes.contains(CollapseType.RUN)) { builder.setRun(relation.getRun()); } if (!collapseTypes.contains(CollapseType.COMPONENT)) { builder.setComponents(relation.getComponents()); } return builder.build(); } private static final class CollapseKeyBuilder { private final Id.NamespacedId data; private final Id.Program program; private AccessType access; private RunId run; private Set<Id.NamespacedId> components; public CollapseKeyBuilder(Id.NamespacedId data, Id.Program program) { this.data = data; this.program = program; } public void setAccess(AccessType access) { this.access = access; } public void setRun(RunId run) { this.run = run; } public void setComponents(Set<Id.NamespacedId> components) { this.components = components; } public CollapseKey build() { return new CollapseKey(data, program, access, run, components); } } private static CollapsedRelation toCollapsedRelation(Id.NamespacedId data, Id.Program program, Set<AccessType> accesses, Set<RunId> runs, Set<Id.NamespacedId> components) { Preconditions.checkState(data instanceof Id.DatasetInstance || data instanceof Id.Stream, "%s should be an instance of dataset or stream", data); if (data instanceof Id.DatasetInstance) { return new CollapsedRelation((Id.DatasetInstance) data, program, accesses, runs, components); } return new CollapsedRelation((Id.Stream) data, program, accesses, runs, components); } private static final class CollapseKey { private final Id.NamespacedId data; private final Id.Program program; private final AccessType access; private final RunId run; private final Set<? extends Id.NamespacedId> components; public CollapseKey(Id.NamespacedId data, Id.Program program, AccessType access, RunId run, Set<? extends Id.NamespacedId> components) { this.data = data; this.program = program; this.access = access; this.run = run; this.components = components; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } CollapseKey that = (CollapseKey) o; return Objects.equals(data, that.data) && Objects.equals(program, that.program) && Objects.equals(access, that.access) && Objects.equals(run, that.run) && Objects.equals(components, that.components); } @Override public int hashCode() { return Objects.hash(data, program, access, run, components); } @Override public String toString() { return "CollapseKey{" + "data=" + data + ", program=" + program + ", access=" + access + ", run=" + run + ", components=" + components + '}'; } } }