/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.lineage; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.metadata.lineage.CollapseType; import co.cask.cdap.proto.metadata.lineage.DataRecord; import co.cask.cdap.proto.metadata.lineage.LineageRecord; import co.cask.cdap.proto.metadata.lineage.ProgramRecord; import co.cask.cdap.proto.metadata.lineage.RelationRecord; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import org.apache.twill.api.RunId; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** * Serializes {@link Lineage} into a {@link LineageRecord}. */ // TODO: Clean up this class (make data/program keys) when new Id classes are used CDAP-4291 public final class LineageSerializer { private static final Function<Id.NamespacedId, String> ID_STRING_FUNCTION = new Function<Id.NamespacedId, String>() { @Override public String apply(Id.NamespacedId input) { return input.getId(); } }; private static final Function<RunId, String> RUN_ID_STRING_FUNCTION = new Function<RunId, String>() { @Override public String apply(RunId input) { return input.getId(); } }; private static final Function<AccessType, String> ACCESS_TYPE_STRING_FUNCTION = new Function<AccessType, String>() { @Override public String apply(AccessType input) { return input.toString().toLowerCase(); } }; private LineageSerializer() {} public static LineageRecord toLineageRecord(long start, long end, Lineage lineage, Set<CollapseType> collapseTypes) { Set<RelationRecord> relationBuilder = new HashSet<>(); Map<String, ProgramRecord> programBuilder = new HashMap<>(); Map<String, DataRecord> dataBuilder = new HashMap<>(); Set<CollapsedRelation> collapsedRelations = LineageCollapser.collapseRelations(lineage.getRelations(), collapseTypes); for (CollapsedRelation relation : collapsedRelations) { String dataKey = makeDataKey(relation.getData()); String programKey = makeProgramKey(relation.getProgram()); RelationRecord relationRecord = new RelationRecord(dataKey, programKey, convertAccessType(relation.getAccess()), convertRuns(relation.getRuns()), convertComponents(relation.getComponents())); relationBuilder.add(relationRecord); programBuilder.put(programKey, new ProgramRecord(relation.getProgram())); dataBuilder.put(dataKey, new DataRecord(relation.getData())); } return new LineageRecord(start, end, relationBuilder, programBuilder, dataBuilder); } private static Set<String> convertAccessType(Set<AccessType> accessTypes) { return ImmutableSet.copyOf(Iterables.transform(accessTypes, ACCESS_TYPE_STRING_FUNCTION)); } private static Set<String> convertRuns(Set<RunId> runIds) { return ImmutableSet.copyOf((Iterables.transform(runIds, RUN_ID_STRING_FUNCTION))); } private static Set<String> convertComponents(Set<Id.NamespacedId> components) { return Sets.newHashSet(Iterables.transform(components, ID_STRING_FUNCTION)); } private static String makeProgramKey(Id.Program program) { return Joiner.on('.').join(program.getType().getCategoryName().toLowerCase(), program.getNamespaceId(), program.getApplicationId(), program.getId()); } private static String makeDataKey(Id.NamespacedId data) { if (data instanceof Id.DatasetInstance) { return makeDatasetKey((Id.DatasetInstance) data); } if (data instanceof Id.Stream) { return makeStreamKey((Id.Stream) data); } throw new IllegalArgumentException("Unknown data object " + data); } private static String makeDatasetKey(Id.DatasetInstance datasetInstance) { return Joiner.on('.').join("dataset", datasetInstance.getNamespaceId(), datasetInstance.getId()); } private static String makeStreamKey(Id.Stream stream) { return Joiner.on('.').join("stream", stream.getNamespaceId(), stream.getId()); } }