/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.resource.metadata;
import com.google.common.collect.Sets;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.VertexQuery;
import com.tinkerpop.blueprints.util.io.graphson.GraphSONMode;
import com.tinkerpop.blueprints.util.io.graphson.GraphSONUtility;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconWebException;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.metadata.GraphUtils;
import org.apache.falcon.metadata.RelationshipLabel;
import org.apache.falcon.metadata.RelationshipProperty;
import org.apache.falcon.metadata.RelationshipType;
import org.apache.falcon.monitors.Dimension;
import org.apache.falcon.monitors.Monitored;
import org.apache.falcon.resource.LineageGraphResult;
import org.apache.falcon.util.StartupProperties;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Jersey Resource for lineage metadata operations.
* Implements most of the GET operations of Rexster API with out the indexes.
* https://github.com/tinkerpop/rexster/wiki/Basic-REST-API
*/
@Path("metadata/lineage")
public class LineageMetadataResource extends AbstractMetadataResource {
private static final Logger LOG = LoggerFactory.getLogger(LineageMetadataResource.class);
/**
* Dump the graph.
*
* GET http://host/metadata/lineage/serialize
* graph.getVertices();
* @return Serialize graph to a file configured using *.falcon.graph.serialize.path in Custom startup.properties.
*/
@GET
@Path("/serialize")
@Produces({MediaType.APPLICATION_JSON})
public Response serializeGraph() {
String file = StartupProperties.get().getProperty("falcon.graph.serialize.path")
+ "/lineage-graph-" + System.currentTimeMillis() + ".json";
LOG.info("Serialize Graph to: {}", file);
try {
GraphUtils.dump(getGraph(), file);
return Response.ok().build();
} catch (Exception e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
/**
* It returns the graph depicting the relationship between the various processes and feeds in a given pipeline.
* @param pipeline Name of the pipeline
* @return It returns a json graph
*/
@GET
@Path("/entities")
@Produces({MediaType.APPLICATION_JSON})
@Monitored(event = "entity-lineage")
public Response getEntityLineageGraph(@Dimension("pipeline") @QueryParam("pipeline") final String pipeline) {
LOG.info("Get lineage Graph for pipeline:({})", pipeline);
List<Process> processes = new ArrayList<>();
if (StringUtils.isNotBlank(pipeline)) {
try {
Collection<String> res = ConfigurationStore.get().getEntities(EntityType.PROCESS);
for (String processName : res) {
Process p = EntityUtil.getEntity(EntityType.PROCESS, processName);
String tags = p.getPipelines();
if (StringUtils.isNotEmpty(tags)) {
for (String tag : tags.split(",")) {
if (StringUtils.equals(tag.trim(), pipeline.trim())) {
processes.add(p);
}
}
}
}
} catch (Exception e) {
LOG.error("Error while fetching entity lineage: ", e);
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
if (processes.isEmpty()) {
throw FalconWebException.newAPIException("No processes belonging to pipeline " + pipeline);
}
return Response.ok(buildJSONGraph(processes)).build();
} else {
throw FalconWebException.newAPIException("Pipeline name can not be blank");
}
}
/**
* Get all vertices.
*
* GET http://host/metadata/lineage/vertices/all
* graph.getVertices();
* @return All vertices in lineage graph.
*/
@GET
@Path("/vertices/all")
@Produces({MediaType.APPLICATION_JSON})
public Response getVertices() {
LOG.info("Get All Vertices");
try {
JSONObject response = buildJSONResponse(getGraph().getVertices());
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
/**
* Get a single vertex with a unique id.
*
* GET http://host/metadata/lineage/vertices/id
* graph.getVertex(id);
* @param vertexId The unique id of the vertex.
* @return Vertex with the specified id.
*/
@GET
@Path("/vertices/{id}")
@Produces({MediaType.APPLICATION_JSON})
public Response getVertex(@PathParam("id") final String vertexId) {
LOG.info("Get vertex for vertexId= {}", vertexId);
validateInputs("Invalid argument: vertex id passed is null or empty.", vertexId);
try {
Vertex vertex = findVertex(vertexId);
JSONObject response = new JSONObject();
response.put(RESULTS, GraphSONUtility.jsonFromElement(
vertex, getVertexIndexedKeys(), GraphSONMode.NORMAL));
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
private Vertex findVertex(String vertexId) {
Vertex vertex = getGraph().getVertex(vertexId);
if (vertex == null) {
String message = "Vertex with [" + vertexId + "] cannot be found.";
LOG.info(message);
throw FalconWebException.newMetadataResourceException(
JSONObject.quote(message), Response.Status.NOT_FOUND);
}
return vertex;
}
/**
* Get properties for a single vertex with a unique id.
* This is NOT a rexster API.
* <p/>
* GET http://host/metadata/lineage/vertices/properties/id
* @param vertexId The unique id of the vertex.
* @param relationships It has default value of false. Pass true if relationships should be fetched.
* @return Properties associated with the specified vertex.
*/
@GET
@Path("/vertices/properties/{id}")
@Produces({MediaType.APPLICATION_JSON})
public Response getVertexProperties(@PathParam("id") final String vertexId,
@DefaultValue("false") @QueryParam("relationships")
final String relationships) {
LOG.info("Get vertex for vertexId= {}", vertexId);
validateInputs("Invalid argument: vertex id passed is null or empty.", vertexId);
try {
Vertex vertex = findVertex(vertexId);
Map<String, String> vertexProperties = getVertexProperties(vertex, Boolean.valueOf(relationships));
JSONObject response = new JSONObject();
response.put(RESULTS, new JSONObject(vertexProperties));
response.put(TOTAL_SIZE, vertexProperties.size());
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
private Map<String, String> getVertexProperties(Vertex vertex, boolean captureRelationships) {
Map<String, String> vertexProperties = new HashMap<String, String>();
for (String key : vertex.getPropertyKeys()) {
vertexProperties.put(key, vertex.<String>getProperty(key));
}
RelationshipType vertexType = RelationshipType.fromString(
vertex.<String>getProperty(RelationshipProperty.TYPE.getName()));
// get the properties from relationships
if (captureRelationships && (vertexType == RelationshipType.FEED_INSTANCE
|| vertexType == RelationshipType.PROCESS_INSTANCE)) {
for (Edge edge : vertex.getEdges(Direction.OUT)) {
Vertex toVertex = edge.getVertex(Direction.IN);
addRelationships(vertexType, toVertex, vertexProperties);
}
}
return vertexProperties;
}
private void addRelationships(RelationshipType fromVertexType, Vertex toVertex,
Map<String, String> vertexProperties) {
String value = toVertex.getProperty(RelationshipProperty.NAME.getName());
RelationshipType toVertexType = RelationshipType.fromString(
toVertex.<String>getProperty(RelationshipProperty.TYPE.getName()));
switch (toVertexType) {
case CLUSTER_ENTITY:
String key = fromVertexType == RelationshipType.FEED_INSTANCE
? RelationshipLabel.FEED_CLUSTER_EDGE.getName()
: RelationshipLabel.PROCESS_CLUSTER_EDGE.getName();
vertexProperties.put(key, value);
break;
case USER:
vertexProperties.put(RelationshipLabel.USER.getName(), value);
break;
case FEED_ENTITY:
addEntityRelationships(toVertex, vertexProperties);
break;
case PROCESS_ENTITY:
addEntityRelationships(toVertex, vertexProperties);
break;
default:
}
}
private void addEntityRelationships(Vertex vertex, Map<String, String> vertexProperties) {
for (Edge edge : vertex.getEdges(Direction.OUT)) {
Vertex toVertex = edge.getVertex(Direction.IN);
String value = toVertex.getProperty(RelationshipProperty.NAME.getName());
RelationshipType toVertexType = RelationshipType.fromString(
toVertex.<String>getProperty(RelationshipProperty.TYPE.getName()));
switch (toVertexType) {
case TAGS:
vertexProperties.put(edge.getLabel(), value);
break;
case GROUPS:
vertexProperties.put(RelationshipLabel.GROUPS.getName(), value);
break;
default:
}
}
}
/**
* Get a list of vertices matching a property key and a value.
* <p/>
* GET http://host/metadata/lineage/vertices?key=<key>&value=<value>
* graph.getVertices(key, value);
* @param key The key to be matched.
* @param value The associated value of the key.
* @return All vertices matching given property key and a value.
*/
@GET
@Path("/vertices")
@Produces({MediaType.APPLICATION_JSON})
public Response getVertices(@QueryParam("key") final String key,
@QueryParam("value") final String value) {
LOG.info("Get vertices for property key= {}, value= {}", key, value);
validateInputs("Invalid argument: key or value passed is null or empty.", key, value);
try {
JSONObject response = buildJSONResponse(getGraph().getVertices(key, value));
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newMetadataResourceException(e.getMessage(),
Response.Status.INTERNAL_SERVER_ERROR);
}
}
/**
* Get a list of adjacent edges with a direction.
*
* <br/>
* To get the adjacent out vertices of vertex pass direction as out, in to get adjacent in vertices and both to get
* both in and out adjacent vertices.<br/>
* Similarly to get the out edges of vertex pass outE, inE to get in edges and bothE to get the both in and out
* edges of vertex.<br/>
* out : get the adjacent out vertices of vertex<br/>
* in : get the adjacent in vertices of vertex<br/>
* both : get the both adjacent in and out vertices of vertex<br/>
* outCount : get the number of out vertices of vertex<br/>
* inCount : get the number of in vertices of vertex<br/>
* bothCount : get the number of adjacent in and out vertices of vertex<br/>
* outIds : get the identifiers of out vertices of vertex<br/>
* inIds : get the identifiers of in vertices of vertex<br/>
* bothIds : get the identifiers of adjacent in and out vertices of vertex<br/>
* GET http://host/metadata/lineage/vertices/id/direction
* graph.getVertex(id).get{Direction}Edges();
* direction: {(?!outE)(?!bothE)(?!inE)(?!out)(?!both)(?!in)(?!query).+}
* @param vertexId The id of the vertex.
* @param direction The direction associated with the edges.
* @return Adjacent vertices of the vertex for the specified direction.
*/
@GET
@Path("vertices/{id}/{direction}")
@Produces({MediaType.APPLICATION_JSON})
public Response getVertexEdges(@PathParam("id") String vertexId,
@PathParam("direction") String direction) {
LOG.info("Get vertex edges for vertexId= {}, direction= {}", vertexId, direction);
// Validate vertex id. Direction is validated in VertexQueryArguments.
validateInputs("Invalid argument: vertex id or direction passed is null or empty.", vertexId, direction);
try {
Vertex vertex = findVertex(vertexId);
return getVertexEdges(vertex, direction);
} catch (JSONException e) {
throw FalconWebException.newMetadataResourceException(e.getMessage(),
Response.Status.INTERNAL_SERVER_ERROR);
}
}
private Response getVertexEdges(Vertex vertex, String direction) throws JSONException {
// break out the segment into the return and the direction
VertexQueryArguments queryArguments = new VertexQueryArguments(direction);
// if this is a query and the _return is "count" then we don't bother to send back the result array
boolean countOnly = queryArguments.isCountOnly();
// what kind of data the calling client wants back (vertices, edges, count, vertex identifiers)
ReturnType returnType = queryArguments.getReturnType();
// the query direction (both, out, in)
Direction queryDirection = queryArguments.getQueryDirection();
VertexQuery query = vertex.query().direction(queryDirection);
JSONArray elementArray = new JSONArray();
long counter = 0;
if (returnType == ReturnType.VERTICES || returnType == ReturnType.VERTEX_IDS) {
Iterable<Vertex> vertexQueryResults = query.vertices();
for (Vertex v : vertexQueryResults) {
if (returnType.equals(ReturnType.VERTICES)) {
elementArray.put(GraphSONUtility.jsonFromElement(
v, getVertexIndexedKeys(), GraphSONMode.NORMAL));
} else {
elementArray.put(v.getId());
}
counter++;
}
} else if (returnType == ReturnType.EDGES) {
Iterable<Edge> edgeQueryResults = query.edges();
for (Edge e : edgeQueryResults) {
elementArray.put(GraphSONUtility.jsonFromElement(
e, getEdgeIndexedKeys(), GraphSONMode.NORMAL));
counter++;
}
} else if (returnType == ReturnType.COUNT) {
counter = query.count();
}
JSONObject response = new JSONObject();
if (!countOnly) {
response.put(RESULTS, elementArray);
}
response.put(TOTAL_SIZE, counter);
return Response.ok(response).build();
}
/**
* Get all edges.
*
* GET http://host/metadata/lineage/edges/all
* graph.getEdges();
* @return All edges in lineage graph.
*/
@GET
@Path("/edges/all")
@Produces({MediaType.APPLICATION_JSON})
public Response getEdges() {
LOG.info("Get All Edges.");
try {
JSONObject response = buildJSONResponse(getGraph().getEdges());
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
/**
* Get a single edge with a unique id.
*
* GET http://host/metadata/lineage/edges/id
* graph.getEdge(id);
* @param edgeId The unique id of the edge.
* @return Edge with the specified id.
*/
@GET
@Path("/edges/{id}")
@Produces({MediaType.APPLICATION_JSON})
public Response getEdge(@PathParam("id") final String edgeId) {
LOG.info("Get vertex for edgeId= {}", edgeId);
validateInputs("Invalid argument: edge id passed is null or empty.", edgeId);
try {
Edge edge = getGraph().getEdge(edgeId);
if (edge == null) {
String message = "Edge with [" + edgeId + "] cannot be found.";
LOG.info(message);
throw FalconWebException.newMetadataResourceException(
JSONObject.quote(message), Response.Status.NOT_FOUND);
}
JSONObject response = new JSONObject();
response.put(RESULTS, GraphSONUtility.jsonFromElement(
edge, getEdgeIndexedKeys(), GraphSONMode.NORMAL));
return Response.ok(response).build();
} catch (JSONException e) {
throw FalconWebException.newAPIException(e, Response.Status.INTERNAL_SERVER_ERROR);
}
}
private <T extends Element> JSONObject buildJSONResponse(Iterable<T> elements) throws JSONException {
JSONArray vertexArray = new JSONArray();
long counter = 0;
for (Element element : elements) {
counter++;
vertexArray.put(GraphSONUtility.jsonFromElement(
element, getVertexIndexedKeys(), GraphSONMode.NORMAL));
}
JSONObject response = new JSONObject();
response.put(RESULTS, vertexArray);
response.put(TOTAL_SIZE, counter);
return response;
}
private LineageGraphResult buildJSONGraph(List<Process> processes) {
LineageGraphResult result = new LineageGraphResult();
List<String> vertexArray = new LinkedList<String>();
List<LineageGraphResult.Edge> edgeArray = new LinkedList<LineageGraphResult.Edge>();
Map<String, String> feedProducerMap = new HashMap<String, String>();
Map<String, List<String>> feedConsumerMap = new HashMap<String, List<String>>();
if (processes != null && !processes.isEmpty()) {
for (Process producer : processes) {
String processName = producer.getName();
vertexArray.add(processName);
if (producer.getOutputs() != null) {
//put all produced feeds in feedProducerMap
for (Output output : producer.getOutputs().getOutputs()) {
feedProducerMap.put(output.getFeed(), processName);
}
}
if (producer.getInputs() != null) {
//put all consumed feeds in feedConsumerMap
for (Input input : producer.getInputs().getInputs()) {
//if feed already exists then append it, else insert it with a list
if (feedConsumerMap.containsKey(input.getFeed())) {
feedConsumerMap.get(input.getFeed()).add(processName);
} else {
List<String> value = new LinkedList<String>();
value.add(processName);
feedConsumerMap.put(input.getFeed(), value);
}
}
}
}
LOG.debug("feedProducerMap = {}", feedProducerMap);
// discard feeds which aren't edges between two processes
Set<String> pipelineFeeds = Sets.intersection(feedProducerMap.keySet(), feedConsumerMap.keySet());
for (String feedName : pipelineFeeds) {
String producerProcess = feedProducerMap.get(feedName);
// make an edge from producer to all the consumers
for (String consumerProcess : feedConsumerMap.get(feedName)) {
edgeArray.add(new LineageGraphResult.Edge(producerProcess, consumerProcess, feedName));
}
}
}
result.setEdges(edgeArray.toArray(new LineageGraphResult.Edge[edgeArray.size()]));
result.setVertices(vertexArray.toArray(new String[vertexArray.size()]));
LOG.debug("result = {}", result);
return result;
}
private static void validateInputs(String errorMsg, String... inputs) {
for (String input : inputs) {
if (StringUtils.isEmpty(input)) {
throw FalconWebException.newMetadataResourceException(errorMsg, Response.Status.BAD_REQUEST);
}
}
}
private enum ReturnType {VERTICES, EDGES, COUNT, VERTEX_IDS}
public static final String OUT_E = "outE";
public static final String IN_E = "inE";
public static final String BOTH_E = "bothE";
public static final String OUT = "out";
public static final String IN = "in";
public static final String BOTH = "both";
public static final String OUT_COUNT = "outCount";
public static final String IN_COUNT = "inCount";
public static final String BOTH_COUNT = "bothCount";
public static final String OUT_IDS = "outIds";
public static final String IN_IDS = "inIds";
public static final String BOTH_IDS = "bothIds";
/**
* Helper class for query arguments.
*/
public static final class VertexQueryArguments {
private final Direction queryDirection;
private final ReturnType returnType;
private final boolean countOnly;
public VertexQueryArguments(String directionSegment) {
if (OUT_E.equals(directionSegment)) {
returnType = ReturnType.EDGES;
queryDirection = Direction.OUT;
countOnly = false;
} else if (IN_E.equals(directionSegment)) {
returnType = ReturnType.EDGES;
queryDirection = Direction.IN;
countOnly = false;
} else if (BOTH_E.equals(directionSegment)) {
returnType = ReturnType.EDGES;
queryDirection = Direction.BOTH;
countOnly = false;
} else if (OUT.equals(directionSegment)) {
returnType = ReturnType.VERTICES;
queryDirection = Direction.OUT;
countOnly = false;
} else if (IN.equals(directionSegment)) {
returnType = ReturnType.VERTICES;
queryDirection = Direction.IN;
countOnly = false;
} else if (BOTH.equals(directionSegment)) {
returnType = ReturnType.VERTICES;
queryDirection = Direction.BOTH;
countOnly = false;
} else if (BOTH_COUNT.equals(directionSegment)) {
returnType = ReturnType.COUNT;
queryDirection = Direction.BOTH;
countOnly = true;
} else if (IN_COUNT.equals(directionSegment)) {
returnType = ReturnType.COUNT;
queryDirection = Direction.IN;
countOnly = true;
} else if (OUT_COUNT.equals(directionSegment)) {
returnType = ReturnType.COUNT;
queryDirection = Direction.OUT;
countOnly = true;
} else if (BOTH_IDS.equals(directionSegment)) {
returnType = ReturnType.VERTEX_IDS;
queryDirection = Direction.BOTH;
countOnly = false;
} else if (IN_IDS.equals(directionSegment)) {
returnType = ReturnType.VERTEX_IDS;
queryDirection = Direction.IN;
countOnly = false;
} else if (OUT_IDS.equals(directionSegment)) {
returnType = ReturnType.VERTEX_IDS;
queryDirection = Direction.OUT;
countOnly = false;
} else {
throw FalconWebException.newMetadataResourceException(
JSONObject.quote(directionSegment + " segment was invalid."), Response.Status.BAD_REQUEST);
}
}
public Direction getQueryDirection() {
return queryDirection;
}
public ReturnType getReturnType() {
return returnType;
}
public boolean isCountOnly() {
return countOnly;
}
}
}