/* Copyright 2014 MITRE Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mitre.provenance.dag;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import org.mitre.provenance.EdgeMarking;
import org.mitre.provenance.Metadata;
import org.mitre.provenance.PLUSException;
import org.mitre.provenance.db.neo4j.Neo4JPLUSObjectFactory;
import org.mitre.provenance.npe.NonProvenanceEdge;
import org.mitre.provenance.plusobject.FocusedCollection;
import org.mitre.provenance.plusobject.PLUSActor;
import org.mitre.provenance.plusobject.PLUSEdge;
import org.mitre.provenance.plusobject.PLUSObject;
import org.mitre.provenance.plusobject.ProvenanceCollection;
import org.mitre.provenance.plusobject.marking.Taint;
import org.mitre.provenance.surrogate.SignPost;
import org.mitre.provenance.surrogate.SurrogateDetail;
import org.mitre.provenance.tools.PLUSUtils;
import org.mitre.provenance.user.User;
/**
* This object represents a limited lineage DAG view composed of PLUSObjects and PLUSEdges.
* Actual DAGs could grow to be very large, so we'll always be functionally limited by a starting node,
* and a specified maximum number of nodes.
*
* <p>Important: DAGs are not tied to workflows per se, they are only bound by BLING and FLING. So provenance links that
* go outside of workflows will be honored and added to the DAG. (I.e. you create a workflow A->B. Later someone tacks
* on B->C that wasn't part of that workflow. A, B, and C are all in the same DAG)
*
* <p>This class contains most of the implementation of the surrogate function described in the "Surrogate Parenthood"
* paper. To generate new LineageDAGs from raw provenance, using the surrogate algorithm, see the fromCollection() method.
* @author moxious
*/
public class LineageDAG extends ViewedCollection implements FocusedCollection {
private static final Logger log = Logger.getLogger(LineageDAG.class.getName());
/** This tag will be associated with a node OID in the graph, when it is known that
* the database contains more information than the graph.
* @see LineageDAG#getTags(String)
*/
public static final String TAG_MORE_AVAILABLE = "more";
/** An object that stores a statistical summary of various aspects of the graph. */
protected FingerPrint fingerPrint;
/** Nodes in DAG that are a source of taint, i.e. those directly tainted. **/
protected Map <PLUSObject,List<Taint>> taintSources = new HashMap<PLUSObject,List<Taint>> ();
/** The starting node of the DAG */
protected PLUSObject focus;
/**
* Create a new lineage dag for a given viewer.
* @param viewer the user who is viewing the DAG
*/
public LineageDAG(User viewer) {
super(viewer);
metadata = new Metadata();
fingerPrint = new FingerPrint();
fingerPrint.setDagId(getId());
fingerPrint.setCreated();
empty();
}
public LineageDAG clone() {
LineageDAG dag = (LineageDAG)super.clone();
dag.taintSources = this.taintSources;
dag.fingerPrint = this.fingerPrint;
dag.focus = this.focus;
return dag;
} // End clone
/** Get the fingerprint associated with this DAG */
public FingerPrint getFingerPrint() { return fingerPrint; }
/**
* Get the metadata associated with this DAG. At this point, it has stats on the DAG and the amount of time
* spent building it, useful for experiment papers.
* @return a Metadata object.
*/
public Metadata getMetadata() {
Metadata base = super.getMetadata();
Metadata fp = fingerPrint.asMetadata();
for(String k : fp.keySet())
base.put(k, ""+fp.get(k));
return base;
} // End getMetadata
/**
* Returns strings corresponding to the OIDs of the objects that taint the object provided.
* @param obj
* @return an array of OIDs
*/
public String [] getTaintSources(PLUSObject obj) {
if(!isTainted(obj)) return new String [] {};
String val = getTags(obj.getId()).get(TAINT_FLAG);
log.fine("getTaintSources raw string '" + val + "'");
String [] toks = val.split(",");
return toks;
}
/**
* Returns true if obj is known to be tainted under this graph, false otherwise.
* Warning: because graph objects are not necessarily complete, this is not a guarantee of taint/no-taint, it is only
* a statement of whether any taint is discoverable within this graph.
* @param obj
* @return true if tainted, false otherwise.
*/
public boolean isTainted(PLUSObject obj) {
return hasTag(obj.getId(), TAINT_FLAG);
}
/**
* Empty the contents of the DAG.
*/
protected void empty() {
super.empty();
taintSources = new HashMap<PLUSObject,List<Taint>> ();
} // End empty()
public DAGPath getPath(String fromOID, String toOID) throws PLUSException {
PLUSObject from = getNode(fromOID);
PLUSObject to = getNode(toOID);
if(from == null) throw new PLUSException("No such object " + fromOID + " in DAG");
if(to == null) throw new PLUSException("No such object " + toOID + " in DAG");
return new DAGPath(this, from, to);
}
/**
* @return a ProvenanceCollection containing this object's graph feet, that is, the objects that have no outbound
* provenance edges under this DAG. This is not a guarantee that there is no further provenance available, only that
* there is no further provenance available in this object instance.
*/
public ProvenanceCollection getGraphFeet() {
ViewedCollection c = new ViewedCollection(getViewer());
for(PLUSObject o : getNodes()) {
if(isFoot(o.getId())) c.addNode(o);
}
return c;
} // End getGraphFeet
/**
* @return a ProvenanceCollection containing this object's graph heads, that is, the objects that have no inbound
* provenance edges under this DAG. This is not a guarantee that there is no earlier provenance available, only that
* there is no earlier provenance available in this object instance.
*/
public ProvenanceCollection getGraphHeads() {
ViewedCollection c = new ViewedCollection(getViewer());
for(PLUSObject o : getNodes()) {
if(isHead(o.getId())) c.addNode(o);
}
return c;
} // End getGraphHeads
/**
* @param oid
* @return true if the object is in the graph and is a graph head; false if the object is not a head (or isn't in the graph)
*/
protected boolean isHead(String oid) { return containsObjectID(oid) && hasTag(oid, TAG_HEAD); }
/**
* @param oid
* @return true if the object is in the graph and is a graph foot; false if the object is not a foot (or isn't in the graph)
*/
protected boolean isFoot(String oid) { return containsObjectID(oid) && hasTag(oid, TAG_FOOT); }
/**
* @see ProvenanceCollection#removeNode(PLUSObject)
*/
public PLUSObject removeNode(PLUSObject node) {
PLUSObject o = super.removeNode(node);
if(o != null) fingerPrint.nodeRemoved(node);
return o;
}
/**
* @see ProvenanceCollection#removeNode(String)
*/
public PLUSObject removeNode(String oid) {
if(oid == null) {
log.severe("Cannot remove null from the graph");
return null;
}
if(focus != null && focus.getId().equals(oid))
log.warning("LineageDAG#removeNode is removing the root!");
PLUSObject o = super.removeNode(oid);
if(o != null) fingerPrint.nodeRemoved(o);
return o;
} // End removeNode
/**
* This method does the same thing as the super-class method, but keeps the graph
* fingerprint up to date.
* @see ProvenanceCollection#addAll(ProvenanceCollection, boolean)
*/
public int addAll(ProvenanceCollection col, boolean force) {
int i = 0;
for(PLUSObject o : col.getNodes()) {
if(super.addNode(o, force)) {
fingerPrint.nodeAdded(o);
i++;
}
}
for(PLUSEdge e : col.getEdges()) {
if(super.addEdge(e, force)) {
fingerPrint.edgeAdded(e);
i++;
}
}
for(NonProvenanceEdge npe : col.getNonProvenanceEdges()) {
if(super.addNonProvenanceEdge(npe, force)) {
i++;
}
}
for(PLUSActor a : col.getActors()) {
if(super.addActor(a, force)) {
i++;
}
}
return i;
} // End addAll
/**
* This method does the same thing as the super-class method, but keeps the graph
* fingerprint up to date.
* @see ProvenanceCollection#addAll(ProvenanceCollection)
*/
public int addAll(ProvenanceCollection col) {
return addAll(col, false);
}
/**
* Add a particular node to the graph
* @param obj the node to add.
* @return true if it was added, false if it was already present in the DAG.
*/
public boolean addNode(PLUSObject obj) {
boolean s = super.addNode(obj);
if(s) fingerPrint.nodeAdded(obj);
return s;
} // End addNode
/**
* Modify the focus of the DAG
*/
public void setFocus(PLUSObject focus) {
this.focus = focus;
fingerPrint.setStartId(focus.getId());
}
/**
* Return true if the node in question has outbound or inbound edges under this graph, false otherwise.
* @param node
* @return
*/
protected boolean nodeIsConnected(PLUSObject node) {
if(!contains(node)) return false;
String id = node.getId();
return (getOutboundEdgesByNode(id).size() > 0) || (getInboundEdgesByNode(id).size() > 0);
} // End nodeIsConnected
/**
* Add an edge to the DAG.
* @param edge the edge to add.
* @param force if true, this will be added overwriting anything similar already there. If false, the edge
* will be added only if its candidate nodes aren't already connected.
* @return true if the edge was added, false otherwise.
*/
public boolean addEdge(PLUSEdge edge, boolean force) {
// boolean previouslyHad = contains(edge);
boolean s = super.addEdge(edge, force);
if(s) fingerPrint.edgeAdded(edge);
return s;
} // End addEdge
/**
* Return a list of siblings of a particular node under this graph. Siblings are nodes that have a common parent.
* Note that if the node you are asking about has more than one parent node, it may have siblings from multiple
* different parents.
* @param obj the object in question.
* @return an empty list if the node is not in the graph, otherwise a list of siblings.
*/
public List<PLUSObject> getSiblings(PLUSObject obj) {
HashMap<String,PLUSObject> results = new HashMap<String,PLUSObject>();
if(!contains(obj)) return new ArrayList<PLUSObject>();
List<PLUSObject> parents = getBLING(obj);
for(PLUSObject p : parents) {
List<PLUSObject>siblings = getFLING(p);
for(PLUSObject s : siblings)
results.put(s.getId(), s);
}
return new ArrayList<PLUSObject>(results.values());
} // End getSiblings
/**
* @param obj the object of interest
* @return all objects that are one step away from the given object in this DAG, via either FLING or BLING. In other words, this treats edges as undirected.
*/
public List <PLUSObject> getNeighbors(PLUSObject obj) {
List <PLUSObject> b = getBLING(obj);
b.addAll(getFLING(obj));
return b;
} // End getNeighbors
/**
* @param obj the object of interest
* @return all objects that are one step of BLING away from the given object in this DAG.
*/
public List <PLUSObject> getBLING(PLUSObject obj) {
return getLineageOfNode(obj, "bling");
}
/**
* @param obj the object of interest
* @return all objects that are one step of FLING away from the given object in this DAG.
*/
public List <PLUSObject> getFLING(PLUSObject obj) {
return getLineageOfNode(obj, "fling");
}
/**
* Get the set of objects in a particular lineage direction.
* @param obj the starting point
* @param direction either "bling" or "fling" only.
* @return the set of objects in the "bling" or "fling" of the given object in this DAG.
*/
private List <PLUSObject> getLineageOfNode(PLUSObject obj, String direction) {
List <PLUSObject> objs = new ArrayList <PLUSObject> ();
List <PLUSEdge> es;
if("bling".equals(direction)) es = getInboundEdgesByNode(obj.getId());
else es = getOutboundEdgesByNode(obj.getId());
for(int x=0; x<es.size(); x++) {
PLUSObject o = null;
if("bling".equals(direction)) o = es.get(x).getFrom();
else o = es.get(x).getTo();
// Some edges may be dangling so don't assume the other end of the edge is in the DAG.
if(o != null) objs.add(o);
}
return objs;
} // End getLineage
/**
* @deprecated
*/
public List <PLUSObject> getFullFlingForExperiments(PLUSObject obj) {
List <PLUSObject> objs = new ArrayList <PLUSObject> ();
List <PLUSEdge> es = getOutboundEdgesByNode(obj.getId());
for(int x=0; x<es.size(); x++) {
PLUSObject o = es.get(x).getTo();
// Some edges may be dangling so don't assume the other end of the edge is in the DAG.
if(o != null){
objs.add(o);
List <PLUSObject> dec = getFullFlingForExperiments( o );
ListIterator <PLUSObject> di = dec.listIterator();
while (di.hasNext()) {
PLUSObject p = (PLUSObject) di.next();
if ( !objs.contains(p) ){
objs.add(p);
}
}
}
}
return objs;
} // End getLineage
/**
* @deprecated
*/
public List <PLUSObject> getLineageOfMyNode(PLUSObject obj, String direction) {
return getLineageOfNode(obj, direction);
}
/**
* Remove an edge from the DAG.
* @param edge the edge to remove.
*/
public void removeEdge(PLUSEdge edge) {
fingerPrint.edgeRemoved(edge);
super.removeEdge(edge);
} // End removeEdge
public void traverse(LineageDAGTraverseFn function, String direction, PLUSObject startingPoint) throws PLUSException {
if(!contains(startingPoint)) throw new PLUSException("DAG doesn't contain that object!");
ArrayList<String>queue = new ArrayList<String>();
queue.add(startingPoint.getId());
traverse(function, queue, direction);
} // End traverse
/**
* Traverse the entire DAG, beginning with the heads. Apply the provided function upon visiting each node.
* @param function the action to take upon visiting each node.
* @throws PLUSException
*/
public void traverse(LineageDAGTraverseFn function) throws PLUSException {
ArrayList<String>queue = new ArrayList<String>();
for(PLUSObject obj : getGraphHeads().getNodes()) {
queue.add(obj.getId());
}
assert(queue.size() != 0);
// Entire graph can be traversed by starting at the heads, and going FLING.
traverse(function, queue, "fling");
} // End traverse
private void traverse(LineageDAGTraverseFn function, ArrayList<String>queue, String direction) throws PLUSException {
HashSet<String>seen = new HashSet<String>();
if(queue.size() <= 0 && countNodes() > 0) throw new PLUSException("No nodes to traverse!");
if(!"fling".equals(direction) && !"bling".equals(direction)) throw new PLUSException("Direction may only be 'fling' or 'bling'!");
while(!queue.isEmpty()) {
String oid = queue.remove(0);
if(seen.contains(oid)) continue;
PLUSObject obj = getNode(oid);
function.visitNode(this, obj);
// Because we started with graph heads (and they're already in the queue)
// we only need to examine the FLING (not the BLING).
List<PLUSObject>next = null;
if("fling".equals(direction)) next = getFLING(obj);
if("bling".equals(direction)) next = getBLING(obj);
for(PLUSObject o : next) { if(!seen.contains(o.getId())) queue.add(o.getId()); }
seen.add(oid);
} // End while
} // End traverse
/**
* Given a provenance collection, and a starting point ID that it was loaded from, determine
* which object in the collection should be the focus. Only PLUSObjects can be the focus, but
* this can be tricky because the starting point might have been an NPE-ID.
* @param col the collection
* @param startingPointID the starting point used to load the collection
* @return the suggested object to use as a focus. (This object may be null).
*/
public static PLUSObject chooseFocus(ProvenanceCollection col, String startingPointID) {
if(col.countNodes() <= 0) return null; // Can't be any focus.
// If the starting point is actually in the graph, that's the focus.
if(PLUSUtils.isPLUSOID(startingPointID) && col.containsObjectID(startingPointID))
return col.getNode(startingPointID);
if(!PLUSUtils.isPLUSOID(startingPointID)) {
ProvenanceCollection incidentToNPE = new ProvenanceCollection();
for(NonProvenanceEdge npe : col.getNonProvenanceEdges()) {
if(npe.getFrom().equals(startingPointID) && PLUSUtils.isPLUSOID(npe.getTo()) && col.containsObjectID(npe.getTo())) {
incidentToNPE.addNode(col.getNode(npe.getTo()));
} else if(npe.getTo().equals(startingPointID) && PLUSUtils.isPLUSOID(npe.getFrom()) && col.containsObjectID(npe.getFrom())) {
incidentToNPE.addNode(col.getNode(npe.getFrom()));
}
} // End for
if(incidentToNPE.countNodes() > 0) {
// Get the most recent object incident to that NPE.
try {
return incidentToNPE.getNodesInOrderedList(ProvenanceCollection.SORT_BY_CREATION).get(0);
} catch(Exception exc) {
log.severe("Failed to get first temporal object incident to NPE: " + exc.getMessage());
}
} else {
log.warning("THIS MAY BE A BUG. Starting point ID '" + startingPointID + "' isn't in this graph!");
}
} // End if
// No other trick worked, so we basically have to choose some focus somewhere.
PLUSObject artificialFocus = (PLUSObject)(col.getNodes().toArray()[0]);
log.warning("Failed to discover node for starting point " + startingPointID +
" assigning instead: " + artificialFocus.getName());
return artificialFocus;
} // End chooseFocus
/**
* Look through the edge list in a LineageDAG and tag nodes as a "head" or "foot" depending
* on whether or not there is anything further upstream/downstream.
* @param col the LineageDAG to tag
* @return the same DAG, with its markings updated.
*/
public static LineageDAG tagHeadAndFeet(LineageDAG col) {
col.getFingerPrint().startTimer("TagHeadAndFeet");
for(PLUSObject fo : col.getNodes()) {
List<PLUSEdge> fling = col.getOutboundEdgesByNode(fo.getId());
List<PLUSEdge> bling = col.getInboundEdgesByNode(fo.getId());
if(fling.size() <= 0) col.tagNode(fo.getId(), LineageDAG.TAG_FOOT, "true");
else {
boolean downstreamConnection = false;
for(PLUSEdge e : fling) { if(col.contains(e.getTo())) { downstreamConnection = true; break; } }
if(!downstreamConnection) col.tagNode(fo.getId(), LineageDAG.TAG_FOOT, "true");
}
if(bling.size() <= 0) col.tagNode(fo.getId(), LineageDAG.TAG_HEAD, "true");
else {
boolean upstreamConnection = false;
for(PLUSEdge e : fling) { if(col.contains(e.getFrom())) { upstreamConnection = true; break; } }
if(!upstreamConnection) col.tagNode(fo.getId(), LineageDAG.TAG_HEAD, "true");
}
} // End for
col.getFingerPrint().stopTimer("TagHeadAndFeet");
return col;
} // End tagHeadAndFeet
/**
* Given a LineageDAG, detect edges that are in the collection which "dangle" or point to
* nodes that the collection doesn't contain.
* @param col
* @return a list of dangling edges.
*/
public static List<PLUSEdge>detectDanglers(LineageDAG col) {
ArrayList<PLUSEdge>edges = new ArrayList<PLUSEdge>();
for(PLUSEdge e : col.getEdges()) {
if(!col.contains(e.getFrom()) || !col.contains(e.getTo()))
edges.add(e);
}
return edges;
} // End detectDanglers
/**
* Iterates through all of the nodes in the LineageDAG and tags the head and feet for quick retrieval later.
* @param col
* @return the same collection passed.
*/
protected static LineageDAG tagGraphHeadsAndFeet(LineageDAG col) {
col.getFingerPrint().startTimer("HeadsAndFeet");
for(PLUSObject o : col.getNodes()) {
if(col.getOutboundEdgesByNode(o.getId()).size()<=0)
col.tagNode(o.getId(), ProvenanceCollection.TAG_FOOT, ProvenanceCollection.TAG_VALUE_TRUE);
else if(col.getInboundEdgesByNode(o.getId()).size()<=0)
col.tagNode(o.getId(), ProvenanceCollection.TAG_HEAD, ProvenanceCollection.TAG_VALUE_TRUE);
}
col.getFingerPrint().stopTimer("HeadsAndFeet");
return col;
} // End tagGraphHeadsAndFeet
/**
* Part of the surrogate algorithm calls for "edge voting", which is the process by which each node incident to an
* edge gets to vote whether the edge is shown, hidden, or inferred. This function implements that voting, and replaces
* relevant edges in the DAG with "MarkedEdge" objects corresponding to the result of the voting.
* <b>This function modifies its argument and returns the same object.</b>
*/
public static LineageDAG computeEdgeVoting(LineageDAG dag) {
dag.getFingerPrint().startTimer("EdgeVoting");
Iterator <PLUSEdge> edgeIt = dag.getEdges().iterator();
int votesShow = 0;
int votesHide = 0;
int votesInfer = 0;
while(edgeIt.hasNext()) {
PLUSEdge pedge = (PLUSEdge)edgeIt.next();
PLUSObject fromObj = pedge.getFrom();
PLUSObject toObj = pedge.getTo();
EdgeMarking fromVote = null;
EdgeMarking toVote = null;
//log.info("pruneByEdgeVoting: " + fromObj + " , " + toObj);
//log.info("pruneEdgeVoting: " + pedge.getFrom() + " -> " + pedge.getTo());
// This happens when the from or the to node was never added to the graph because
// there was no suitable surrogate. (User not authorized to see any version)
if(fromObj == null || toObj == null) continue;
if(fromObj.isSurrogate()) {
SurrogateDetail fromSurrogate = fromObj.getSurrogateDetail();
fromVote = fromSurrogate.getMarking(pedge, toObj);
// System.out.println("FROMVOTE: " + fromVote);
}
if(toObj.isSurrogate()) {
SurrogateDetail toSurrogate = toObj.getSurrogateDetail();
toVote = toSurrogate.getMarking(pedge, fromObj);
// System.out.println("TOVOTE: " + toVote);
}
if(fromVote == null) fromVote = EdgeMarking.SHOW;
if(toVote == null) toVote = EdgeMarking.SHOW;
if(EdgeMarking.SHOW.equals(fromVote)) votesShow++;
else if(EdgeMarking.HIDE.equals(fromVote)) votesHide++;
else votesInfer++;
if(EdgeMarking.SHOW.equals(toVote)) votesShow++;
else if(EdgeMarking.HIDE.equals(toVote)) votesHide++;
else votesInfer++;
// log.info("Setting fromVote " + fromVote + " toVote " + toVote);
pedge.setFromMarking(fromVote);
pedge.setToMarking(toVote);
log.fine("computeEdgeVoting: " +
pedge.getFrom().getName() + " => " +
pedge.getTo().getName() + " " +
pedge.getFromMarking() + ", " + pedge.getToMarking() + "=" +
pedge.getVerdict());
// Add it back in its new form. True means force it in.
dag.addEdge(pedge, true);
if(!pedge.getVerdict().equals(EdgeMarking.SHOW)) {
// Mark these nodes as connected. There is an edge that connects them to something
// else.
dag.tagNode(fromObj.getId(), "connected", "true");
dag.tagNode(toObj.getId(), "connected", "true");
} // End if
} // End while
dag.getMetadata().put("Votes-Show", ""+votesShow);
dag.getMetadata().put("Votes-Hide", ""+votesHide);
dag.getMetadata().put("Votes-Infer", ""+votesInfer);
dag.getFingerPrint().stopTimer("EdgeVoting");
return dag;
} // End computeEdgeVoting
/**
* When a DAG is first built, the code comes across some list of nodes that are directly tainted.
* This function traces forwards in the graph, and marks everything downstream of any directly tainted node
* as also being tainted (indirectly).
* TODO: Taint propagation "blockers". Shouldn't some nodes have some signal that prevents further
* propagation? (I.e. the tainted input is too remote, or not important). This method needs to get smarter.
*/
public static LineageDAG traceTaintSources(LineageDAG dag) {
HashMap<String,ArrayList<Taint>> taintSources = Neo4JPLUSObjectFactory.getTaintSources(dag);
Set <String> e = taintSources.keySet();
dag.getFingerPrint().startTimer("TraceTaintSources");
// Run through each object in the tainted sources, and mark everything forwards in the graph as tainted.
// Note that some nodes will be tainted by multiple sources, and may get multiple markings.
for(String taintedOID : e) {
dag.tagNode(taintedOID, "tainted-by-" + taintedOID, taintedOID);
// Check to see if the original source of taint is itself marked as tainted.
// It should be.
if(!dag.hasTag(taintedOID, ProvenanceCollection.TAINT_FLAG))
dag.tagNode(taintedOID, ProvenanceCollection.TAINT_FLAG, taintedOID);
// Build a queue and a visitation list.
List <PLUSObject> queue = new ArrayList <PLUSObject> ();
queue = dag.getFLING(dag.getNode(taintedOID));
Hashtable <PLUSObject, Boolean> visited = new Hashtable <PLUSObject, Boolean> ();
while(queue.size() > 0) {
PLUSObject next = queue.remove(0);
if(visited.containsKey(next)) continue; // Skip it if we've already seen it (avoids cycles)
// Tag that it's tainted, and by which ID.
dag.tagNode(next.getId(), "tainted-by-" + taintedOID, taintedOID);
HashMap<String,String> tflags = dag.getTags(next.getId());
if(tflags == null || tflags.isEmpty()) {
dag.tagNode(next.getId(), ProvenanceCollection.TAINT_FLAG, taintedOID);
} else if(!(""+tflags.get(ProvenanceCollection.TAINT_FLAG)).contains(taintedOID)) {
String val = tflags.get(ProvenanceCollection.TAINT_FLAG);
val = (val == null ? "" : ",") + taintedOID;
dag.tagNode(next.getId(), ProvenanceCollection.TAINT_FLAG, val);
}
List <PLUSObject> fling = dag.getFLING(next);
for(int z=0; z<fling.size(); z++) { queue.add(fling.get(z)); }
} // End while
} // End while
dag.getFingerPrint().stopTimer("TraceTaintSources");
return dag;
} // End traceTaintSources
/**
* Given a LineageDAG that contains MarkedEdges, this function tries to draw new lines to cover up the ones
* that were inferred. It further DELETES all edges from the graph marked inferred.
* @param dag the LineageDAG to use -- this argument will be modified and returned.
* @throws PLUSException
*/
public static LineageDAG drawInferrableEdges(LineageDAG dag) throws PLUSException {
Iterator <PLUSEdge> edgeIt = dag.getEdges().iterator();
// log.fine("Starting to look for inferrable edges in DAG...");
ArrayList <PLUSEdge> hitList = new ArrayList <PLUSEdge> (); // List of things we'll prune later.
ArrayList <PLUSEdge> toAdd = new ArrayList <PLUSEdge> (); // List of newly-generated edges.
// Keep track of a list of IDs that have removed edges. After the surrogate algorithm
// runs, we'll want to remove any orphaned nodes that got disconnected by the algorithm.
HashSet<String> checkForOrphanedNodes = new HashSet<String>();
dag.getFingerPrint().startTimer("NewEdgeComputing");
int inferredEdges = 0;
// There's a bunch of variables in here that are just performance profiling junk.
long visSetsTotal = 0;
long visSetsTimes = 0;
long visSetMax = -1000000;
long visSetMin = 100000000;
// General algorithm:
// For each edge in the graph marked "infer", do:
// (1) Get upstream visible nodes
// (2) Get downstream visible nodes.
// (3) Draw edges from all of (1) to all of (2)
while(edgeIt.hasNext()) {
PLUSEdge pedge = null;
try { pedge = (PLUSEdge)edgeIt.next(); }
catch(ClassCastException exc) {
throw new PLUSException("This DAG does not contain marked edges! Did you use computeEdgeVoting() first?");
} // End catch
// The verdict is the overall marking. So if one side of the edge votes "show" and another side votes
// "hide", the verdict is "hide" because that domainates show.
EdgeMarking mark = pedge.getVerdict();
if(!EdgeMarking.INFER.equals(mark)) {
// If the edge is hidden, add it to the list to be removed.
if(EdgeMarking.HIDE.equals(mark)) {
checkForOrphanedNodes.add(pedge.getFrom().getId());
checkForOrphanedNodes.add(pedge.getTo().getId());
hitList.add(pedge);
}
// Otherwise if it's not an infer edge, just skip it. This method
// is for drawing new edges that are hidden by infers, so "show" links
// don't matter here.
continue;
}
// OK so now we have to figure out which side of the edge wants this
// inferred.
EdgeMarking fMark = pedge.getFromMarking();
EdgeMarking tMark = pedge.getToMarking();
inferredEdges++;
//log.info("drawInferrableEdges: " +
// pedge.getFrom().getName() + " => " +
// pedge.getTo().getName() + " is inferred.");
// All inferred edges need to get pruned later, so this always needs to
// happen with inferred links.
hitList.add(pedge);
String from = pedge.getFrom().getId();
String to = pedge.getTo().getId();
checkForOrphanedNodes.add(from);
checkForOrphanedNodes.add(to);
String blingSetID = null;
String flingSetID = null;
// How you find the visible sets depends on how the edge is marked.
// Remember the visible set is the set of the nearest visible nodes
// upstream and downstream in the graph.
// At this point in the code, the edge could be marked:
// (infer, show), (show, infer), or (infer, infer).
// What we use as the basis for building the bling and fling visible sets depends on which case this is.
if(fMark.equals(EdgeMarking.SHOW) && tMark.equals(EdgeMarking.INFER)) {
// Get visible BLING and FLING only from to's perspective
blingSetID = to;
flingSetID = to;
} else if(fMark.equals(EdgeMarking.INFER) && tMark.equals(EdgeMarking.SHOW)) {
// Get visible BLING and FLING only from from's perspective.
blingSetID = from;
flingSetID = from;
} else {
// If both marked it infer, then get BLING from downstream's perspective, and FLING
// from upstream's perspective.
blingSetID = from;
flingSetID = to;
} // End else
long s = System.currentTimeMillis();
ArrayList <PLUSObject> blingSet = buildVisibleSet(dag, dag.getNode(blingSetID), "bling");
long e = System.currentTimeMillis();
// Timing/performance junk.
long i = e-s; visSetsTotal += i; visSetsTimes++;
if(i > visSetMax) visSetMax = i;
if(i < visSetMin) visSetMin = i;
s = System.currentTimeMillis();
ArrayList <PLUSObject> flingSet = buildVisibleSet(dag, dag.getNode(flingSetID), "fling");
e = System.currentTimeMillis();
i = e-s;
visSetsTotal += i; visSetsTimes++;
if(i > visSetMax) visSetMax=i;
if(i < visSetMin) visSetMin=i;
// Debugging...
// log.info("Bling visible set for " + dag.getNode(to).getName() + ": " + blingSet.size());
// for(PLUSObject o : blingSet) log.fine(o.getName());
// log.info("Fling visible set for " + dag.getNode(from).getName() + ": " + flingSet.size());
// for(PLUSObject o : flingSet) log.fine(o.getName());
// Now connect blingset * flingset with edges.
for(int x=0; x<blingSet.size(); x++) {
PLUSObject b = blingSet.get(x);
for(int y=0; y<flingSet.size(); y++) {
PLUSObject f = flingSet.get(y);
// log.info("Drawing computable edge " + b.getName() + " => " + f.getName());
PLUSEdge inferrable = new PLUSEdge(b, f, pedge.getWorkflow(), PLUSEdge.EDGE_TYPE_UNSPECIFIED);
inferrable.setSourceHints(new SignPost("Surrogate Algorithm"));
// Inferrable edges always are marked show.
inferrable.setFromMarking(EdgeMarking.SHOW);
inferrable.setToMarking(EdgeMarking.SHOW);
toAdd.add(inferrable);
// This shouldn't happen. But we want to know about it if it does.
if(b.getId().equals(f.getId())) log.info("LOOP EDGE! " + b.getName());
} // End for
} // End for
} // End while
dag.getFingerPrint().stopTimer("NewEdgeComputing");
// More performance logging junk.
float visSetAvg = ((float)visSetsTotal/(float)visSetsTimes);
if(visSetsTimes == 0) {
visSetMin = 0;
visSetMax = 0;
visSetAvg = 0;
} // End if
dag.getMetadata().put("VisibleSets", ""+visSetsTimes);
dag.getMetadata().put("VisibleSetAvg", ""+visSetAvg);
dag.getMetadata().put("VisibleSetMax", ""+visSetMax);
dag.getMetadata().put("VisibleSetMin", ""+visSetMin);
dag.getMetadata().put("preMarkEdges", ""+dag.countEdges());
dag.getMetadata().put("inferredEdges", ""+inferredEdges);
dag.getFingerPrint().startTimer("AddComputedEdges");
// Have to add these outside the loop to avoid concurrent modifications.
for(int x=0; x<toAdd.size(); x++) {
log.fine("Adding computed edge " +
toAdd.get(x).getFrom().getName() + " -> " +
toAdd.get(x).getTo().getName());
dag.addEdge(toAdd.get(x), true);
} // End for
dag.getFingerPrint().stopTimer("AddComputedEdges");
dag.getFingerPrint().startTimer("PruneInferredEdges");
for(int x=0; x<hitList.size(); x++) {
log.fine("LineageDAG: Removing inferred edge " +
hitList.get(x).getFrom().getName() + " -> " +
hitList.get(x).getTo().getName());
dag.removeEdge(hitList.get(x));
} // End for
dag.getFingerPrint().stopTimer("PruneInferredEdges");
dag.getFingerPrint().startTimer("PruneOrphanedNodes");
for(String id : checkForOrphanedNodes) {
if(dag.getEdgesByNode(id).size() == 0) {
// This node has no inbound or outbound edges. It got
// orphaned by the algorithm, and is now disconnected and should
// get pruned.
log.fine("Removing orphaned node " + dag.getNode(id));
try { dag.removeNode(id); } catch(Exception exc) {
exc.printStackTrace();
log.severe("Exception removing orphaned node: " + exc.getMessage());
}
}
}
dag.getFingerPrint().stopTimer("PruneOrphanedNodes");
dag.getMetadata().put("postMarkEdges", ""+dag.countEdges());
return dag;
} // End drawInferrableEdges
/**
* Given a particular node, find the "visible set" in a particular direction (bling or fling). The visible set
* is the list of nodes upstream or downstream whose outbound or inbound links are visible.
* @param dag the source DAG where the object exists
* @param source the object starting point
* @param operation "bling" or "fling"
* @return a list of objects that are related via that operation, that have no inferred links further upstream.
* @throws PLUSException
*/
protected static ArrayList <PLUSObject> buildVisibleSet(LineageDAG dag, PLUSObject source, String operation) throws PLUSException {
if(!"bling".equals(operation) && !"fling".equals(operation))
throw new PLUSException("Illegal operation");
dag.getFingerPrint().startTimer("VisibleSet");
ArrayList <PLUSObject> visibleSet = new ArrayList <PLUSObject> ();
List<PLUSEdge> oedges = null;
// log.info("VISIBLE SET starting with " + source + " DIRECTION " + operation);
if(operation.equals("fling")) oedges = dag.getOutboundEdgesByNode(source.getId());
else oedges = dag.getInboundEdgesByNode(source.getId());
for(PLUSEdge e : oedges) {
PLUSObject nextNode = null;
PLUSEdge me = dag.getEdge(e.getFrom(), e.getTo());
ArrayList <PLUSObject> appendList = null;
// Get the next upstream or downstream node.
if(me == null) {
log.warning("****** Marked Edge from " + e.getFrom().getName() + " => " +
e.getTo().getName() + " was null!");
continue;
}
if(me.getVerdict().equals(EdgeMarking.HIDE)) {
// log.info("buildVisibleSet: Skipping HIDE link");
continue;
}
if("bling".equals(operation)) nextNode = me.getFrom();
else nextNode = me.getTo();
if(nextNode == source)
throw new PLUSException("Horrors! " + source.getName() + " " + operation + " is topsy-turvy!");
// The marking that's relevant depends on whether it's upstream or downstream.
// If we're going BLING-direction, then we want to know if nextNode's outgoing marking is visible.
// If we're going FLING-direction, then we want to know if nextNode's incoming marking is visible.
EdgeMarking relevant = null;
if("bling".equals(operation)) relevant = me.getFromMarking();
else relevant = me.getToMarking();
// Remember that null also counts as visible. If the node didn't vote...
if(relevant == null) relevant = EdgeMarking.SHOW;
if(relevant.equals(EdgeMarking.SHOW))
visibleSet.add(nextNode);
else if(relevant.equals(EdgeMarking.INFER))
appendList = buildVisibleSet(dag, nextNode, operation);
if(appendList != null) {
for(PLUSObject p : appendList) visibleSet.add(p);
} // End if
} // End for
dag.getFingerPrint().stopTimer("VisibleSet");
return visibleSet;
} // End buildVisibleSet
/**
* Create a LineageDAG from a collection. This is subject to the surrogate algorithm.
* @param col the collection of objects (some of which may not be vieweable by viewer)
* @param viewer the viewer for the final LineageDAG
* @return a LineageDAG consisting of an account of col viewable by viewer
* @throws PLUSException
*/
public static LineageDAG fromCollection(ProvenanceCollection col, User viewer) throws PLUSException {
LineageDAG d = new LineageDAG(viewer);
d.addAll(col);
d = LineageDAG.computeEdgeVoting(d); // Edge voting for surrogates
d = LineageDAG.traceTaintSources(d); // Trace indirect taints from direct taints
d = LineageDAG.drawInferrableEdges(d); // Draw inferred edges based on surrogate alg.
d = LineageDAG.tagHeadAndFeet(d);
return d;
} // End fromCollection
public PLUSObject getFocus() { return focus; }
} // End LineageDAG