/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.metadata; import com.tinkerpop.blueprints.Graph; import com.tinkerpop.blueprints.Vertex; import org.apache.falcon.FalconException; import org.apache.falcon.entity.CatalogStorage; import org.apache.falcon.entity.FeedHelper; import org.apache.falcon.entity.Storage; import org.apache.falcon.entity.common.FeedDataPath; import org.apache.falcon.entity.store.ConfigurationStore; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.SchemaHelper; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.feed.Feed; import org.apache.falcon.entity.v0.feed.LocationType; import org.apache.falcon.entity.v0.process.Process; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URISyntaxException; import java.util.Map; /** * Instance Metadata relationship mapping helper. */ public class InstanceRelationshipGraphBuilder extends RelationshipGraphBuilder { private static final Logger LOG = LoggerFactory.getLogger(InstanceRelationshipGraphBuilder.class); private static final String PROCESS_INSTANCE_FORMAT = "yyyy-MM-dd-HH-mm"; // nominal time private static final String FEED_INSTANCE_FORMAT = "yyyyMMddHHmm"; // computed // process workflow properties from message private static final String[] INSTANCE_WORKFLOW_PROPERTIES = { LineageArgs.USER_WORKFLOW_NAME.getOptionName(), LineageArgs.USER_WORKFLOW_ENGINE.getOptionName(), LineageArgs.WORKFLOW_ID.getOptionName(), LineageArgs.RUN_ID.getOptionName(), LineageArgs.STATUS.getOptionName(), LineageArgs.WF_ENGINE_URL.getOptionName(), LineageArgs.USER_SUBFLOW_ID.getOptionName(), }; public InstanceRelationshipGraphBuilder(Graph graph, boolean preserveHistory) { super(graph, preserveHistory); } public Vertex addProcessInstance(Map<String, String> lineageMetadata) throws FalconException { String entityName = lineageMetadata.get(LineageArgs.ENTITY_NAME.getOptionName()); String processInstanceName = getProcessInstanceName(entityName, lineageMetadata.get(LineageArgs.NOMINAL_TIME.getOptionName())); LOG.info("Adding process instance: {}", processInstanceName); String timestamp = getTimestamp(lineageMetadata); Vertex processInstance = addVertex(processInstanceName, RelationshipType.PROCESS_INSTANCE, timestamp); addWorkflowInstanceProperties(processInstance, lineageMetadata); addInstanceToEntity(processInstance, entityName, RelationshipType.PROCESS_ENTITY, RelationshipLabel.INSTANCE_ENTITY_EDGE); addInstanceToEntity(processInstance, lineageMetadata.get(LineageArgs.CLUSTER.getOptionName()), RelationshipType.CLUSTER_ENTITY, RelationshipLabel.PROCESS_CLUSTER_EDGE); addInstanceToEntity(processInstance, lineageMetadata.get(LineageArgs.WORKFLOW_USER.getOptionName()), RelationshipType.USER, RelationshipLabel.USER); if (isPreserveHistory()) { Process process = ConfigurationStore.get().get(EntityType.PROCESS, entityName); addDataClassification(process.getTags(), processInstance); } return processInstance; } private String getTimestamp(Map<String, String> lineageMetadata) { String timestamp = lineageMetadata.get(LineageArgs.TIMESTAMP.getOptionName()); return SchemaHelper.formatDateUTCToISO8601(timestamp, PROCESS_INSTANCE_FORMAT); } public void addWorkflowInstanceProperties(Vertex processInstance, Map<String, String> lineageMetadata) { for (String instanceWorkflowProperty : INSTANCE_WORKFLOW_PROPERTIES) { addProperty(processInstance, lineageMetadata, instanceWorkflowProperty); } processInstance.setProperty(RelationshipProperty.VERSION.getName(), lineageMetadata.get(LineageArgs.USER_WORKFLOW_VERSION.getOptionName())); } public String getProcessInstanceName(String entityName, String nominalTime) { return entityName + "/" + SchemaHelper.formatDateUTCToISO8601(nominalTime, PROCESS_INSTANCE_FORMAT); } public void addInstanceToEntity(Vertex instanceVertex, String entityName, RelationshipType entityType, RelationshipLabel edgeLabel) { Vertex entityVertex = findVertex(entityName, entityType); LOG.info("Vertex exists? name={}, type={}, v={}", entityName, entityType, entityVertex); if (entityVertex == null) { // todo - throw new IllegalStateException(entityType + " entity vertex must exist " + entityName); LOG.error("Illegal State: {} vertex must exist for {}", entityType, entityName); return; } addEdge(instanceVertex, entityVertex, edgeLabel.getName()); } public void addOutputFeedInstances(Map<String, String> lineageMetadata, Vertex processInstance) throws FalconException { String outputFeedNamesArg = lineageMetadata.get(LineageArgs.FEED_NAMES.getOptionName()); if ("NONE".equals(outputFeedNamesArg)) { return; // there are no output feeds for this process } String[] outputFeedNames = outputFeedNamesArg.split(","); String[] outputFeedInstancePaths = lineageMetadata.get(LineageArgs.FEED_INSTANCE_PATHS.getOptionName()).split(","); for (int index = 0; index < outputFeedNames.length; index++) { String feedName = outputFeedNames[index]; String feedInstanceDataPath = outputFeedInstancePaths[index]; addFeedInstance(processInstance, RelationshipLabel.PROCESS_FEED_EDGE, lineageMetadata, feedName, feedInstanceDataPath); } } public void addInputFeedInstances(Map<String, String> lineageMetadata, Vertex processInstance) throws FalconException { String inputFeedNamesArg = lineageMetadata.get(LineageArgs.INPUT_FEED_NAMES.getOptionName()); if ("NONE".equals(inputFeedNamesArg)) { return; // there are no input feeds for this process } String[] inputFeedNames = lineageMetadata.get(LineageArgs.INPUT_FEED_NAMES.getOptionName()).split("#"); // Each input feed is separated by # String[] inputFeedInstancePaths = lineageMetadata.get(LineageArgs.INPUT_FEED_PATHS.getOptionName()).split("#"); for (int index = 0; index < inputFeedNames.length; index++) { String inputFeedName = inputFeedNames[index]; String inputFeedInstancePath = inputFeedInstancePaths[index]; // Multiple instance paths for a given feed is separated by "," String[] feedInstancePaths = inputFeedInstancePath.split(","); for (String feedInstanceDataPath : feedInstancePaths) { addFeedInstance(processInstance, RelationshipLabel.FEED_PROCESS_EDGE, lineageMetadata, inputFeedName, feedInstanceDataPath); } } } private void addFeedInstance(Vertex processInstance, RelationshipLabel edgeLabel, Map<String, String> lineageMetadata, String feedName, String feedInstanceDataPath) throws FalconException { String clusterName = lineageMetadata.get(LineageArgs.CLUSTER.getOptionName()); LOG.info("Computing feed instance for : name=" + feedName + ", path= " + feedInstanceDataPath + ", in cluster: " + clusterName); String feedInstanceName = getFeedInstanceName(feedName, clusterName, feedInstanceDataPath); LOG.info("Adding feed instance: " + feedInstanceName); Vertex feedInstance = addVertex(feedInstanceName, RelationshipType.FEED_INSTANCE, getTimestamp(lineageMetadata)); addProcessFeedEdge(processInstance, feedInstance, edgeLabel); addInstanceToEntity(feedInstance, feedName, RelationshipType.FEED_ENTITY, RelationshipLabel.INSTANCE_ENTITY_EDGE); addInstanceToEntity(feedInstance, clusterName, RelationshipType.CLUSTER_ENTITY, RelationshipLabel.FEED_CLUSTER_EDGE); addInstanceToEntity(feedInstance, lineageMetadata.get(LineageArgs.WORKFLOW_USER.getOptionName()), RelationshipType.USER, RelationshipLabel.USER); if (isPreserveHistory()) { Feed feed = ConfigurationStore.get().get(EntityType.FEED, feedName); addDataClassification(feed.getTags(), feedInstance); addGroups(feed.getGroups(), feedInstance); } } public String getFeedInstanceName(String feedName, String clusterName, String feedInstancePath) throws FalconException { try { Feed feed = ConfigurationStore.get().get(EntityType.FEED, feedName); Cluster cluster = ConfigurationStore.get().get(EntityType.CLUSTER, clusterName); Storage.TYPE storageType = FeedHelper.getStorageType(feed, cluster); return storageType == Storage.TYPE.TABLE ? getTableFeedInstanceName(feed, feedInstancePath, storageType) : getFileSystemFeedInstanceName(feedInstancePath, feed, cluster); } catch (URISyntaxException e) { throw new FalconException(e); } } private String getTableFeedInstanceName(Feed feed, String feedInstancePath, Storage.TYPE storageType) throws URISyntaxException { CatalogStorage instanceStorage = (CatalogStorage) FeedHelper.createStorage( storageType.name(), feedInstancePath); return feed.getName() + "/" + instanceStorage.toPartitionAsPath(); } private String getFileSystemFeedInstanceName(String feedInstancePath, Feed feed, Cluster cluster) throws FalconException { Storage rawStorage = FeedHelper.createStorage(cluster, feed); String feedPathTemplate = rawStorage.getUriTemplate(LocationType.DATA); String instance = feedInstancePath; String[] elements = FeedDataPath.PATTERN.split(feedPathTemplate); for (String element : elements) { instance = instance.replaceFirst(element, ""); } return feed.getName() + "/" + SchemaHelper.formatDateUTCToISO8601(instance, FEED_INSTANCE_FORMAT); } }