package org.mitre.provenance.plusobject.prov; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.StringWriter; import java.net.MalformedURLException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.logging.Logger; import javax.xml.bind.JAXBException; import org.mitre.provenance.Metadata; import org.mitre.provenance.PLUSException; import org.mitre.provenance.dag.TraversalSettings; import org.mitre.provenance.db.neo4j.Neo4JPLUSObjectFactory; import org.mitre.provenance.npe.NonProvenanceEdge; import org.mitre.provenance.plusobject.PLUSActivity; import org.mitre.provenance.plusobject.PLUSActor; import org.mitre.provenance.plusobject.PLUSDataObject; import org.mitre.provenance.plusobject.PLUSEdge; import org.mitre.provenance.plusobject.PLUSFile; import org.mitre.provenance.plusobject.PLUSInvocation; import org.mitre.provenance.plusobject.PLUSObject; import org.mitre.provenance.plusobject.PLUSURL; import org.mitre.provenance.plusobject.PLUSWorkflow; import org.mitre.provenance.plusobject.ProvenanceCollection; import org.mitre.provenance.simulate.DAGAholic; import org.mitre.provenance.simulate.SyntheticGraphProperties; import org.mitre.provenance.tools.PLUSUtils; import org.mitre.provenance.user.PrivilegeClass; import org.mitre.provenance.user.User; import org.openprovenance.prov.interop.InteropFramework; import org.openprovenance.prov.model.Activity; import org.openprovenance.prov.model.Agent; import org.openprovenance.prov.model.Document; import org.openprovenance.prov.model.Entity; import org.openprovenance.prov.model.HasOther; import org.openprovenance.prov.model.Name; import org.openprovenance.prov.model.Namespace; import org.openprovenance.prov.model.Other; import org.openprovenance.prov.model.ProvFactory; import org.openprovenance.prov.model.QualifiedName; import org.openprovenance.prov.model.Statement; import org.openprovenance.prov.model.Used; import org.openprovenance.prov.model.WasDerivedFrom; import org.openprovenance.prov.model.WasGeneratedBy; import org.openprovenance.prov.model.WasInformedBy; /** * A class that knows how to convert ProvenanceCollection objects into a PROV-DM representation. * Information on PROV-DM can be found at http://www.w3.org/TR/prov-dm/ * * <p>This code follows mappings provided in the MAPPINGS-PLUS-TO-PROV-DM.txt file found in the source * distribution. The mappings in that file are necessary to understand what is happening with this code. * * <p>This is an initial cut - it is still in need of substantial development, testing, and verification. * * <p><strong>Warning</strong>: the PROV API doesn't do good input checking when you use the factory to * create object instances. Because the model objects need to get serialized several different ways, * this creates situations where the model is created just fine, but when you go to serialize, you get * various exceptions due to invalid data. PLUS data objects don't exist in the W3C/XML space, and so * when we do this translation we're creating artifacts like QNames that don't natively exist in PLUS. * Beware situations where data in PLUS translates into invalid XML NCNames, anyURI, and so on. This * won't be caught by the PROV API, but can cause failure to serialize. * * @author moxious */ public class PROVConverter { public static final String BASE_NAMESPACE = "http://github.com/plus-provenance/plus#"; public static final String METADATA_NAMESPACE = BASE_NAMESPACE + "metadata"; private static Logger log = Logger.getLogger(PROVConverter.class.getName()); public enum Format { RDF, XML, TTL }; protected ProvFactory factory = null; protected Name name = null; // Store mappings from the PLUS OID of the original object, to its PROV-DM counterpart type. // This lets us look up later on what the connection is between two PLUSObjects and their related types. protected HashMap<String,Activity> provActivities = new HashMap<String,Activity>(); protected HashMap<String,Entity> provEntities = new HashMap<String,Entity>(); protected HashMap<String,Agent> provAgents = new HashMap<String,Agent>(); protected HashMap<String,Statement> provStatements = new HashMap<String,Statement>(); protected static final String PLUSTYPE_PREF = "plustype"; protected QualifiedName INVOCATION_TYPE = new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + "PLUSInvocation", "invocation", PLUSTYPE_PREF); protected QualifiedName DATA_TYPE = new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + "PLUSDataObject", "data", PLUSTYPE_PREF); /** * Create a new converter object. */ public PROVConverter() { factory = new org.openprovenance.prov.xml.ProvFactory(); name = factory.getName(); } /** * This is the main method that most callers should use. * @param col a provenance collection * @return a PROV-DM Document object, suitable for serialization and writing to multiple formats. * @throws PLUSException */ public Document provenanceCollectionToPROV(ProvenanceCollection col) throws PLUSException { for(PLUSActor a : col.getActors()) { // System.out.println(a); Agent agent = actorToAgent(a); agent.getOther().add(makeObjectProperty("created", a.getCreatedAsDate())); provAgents.put(a.getId(), agent); } for(PLUSObject o : col.getNodes()) { HasOther item = null; // System.out.println(o); if(o.isActivity()) { Activity a = activityToActivity(o); item = a; provActivities.put(o.getId(), a); } else if(o.isWorkflow()) { Entity e = workflowToBundle(o); item = e; provEntities.put(o.getId(), e); } else if(o.isDataItem()) { Entity e = dataObjectToEntity(o); item = e; provEntities.put(o.getId(), e); } else if(o.isInvocation()) { Activity a = invocationToActivity(o); item = a; provActivities.put(o.getId(), a); } else { log.warning("Don't know what this object is, skipping: " + o); } // Log properties common to all PLUSObjects. if(item != null) { convertMetadata(o, item); convertOwnership(o); item.getOther().add(makeObjectProperty("name", o.getName())); item.getOther().add(makeObjectProperty("created", o.getCreatedAsDate())); item.getOther().add(makeObjectProperty("plus_type", o.getObjectType())); item.getOther().add(makeObjectProperty("plus_subtype", o.getObjectSubtype())); for(PrivilegeClass pc : o.getPrivileges().getPrivilegeSet()) { item.getOther().add(makeObjectProperty("requires", pc.toString())); } for(String sgf : o.getSGFs()) { item.getOther().add(makeObjectProperty("hasSGF", sgf)); } // System.out.println(item); } } for(PLUSEdge e : col.getEdges()) { // System.out.println(e); Statement stmt = edgeToStatement(e); provStatements.put(e.getFrom().getId() + "/" + e.getTo().getId(), stmt); } for(NonProvenanceEdge npe : col.getNonProvenanceEdges()) { // System.out.println(npe); String oid = npe.getFrom(); String npid = npe.getTo(); String type = npe.getType(); if(PLUSUtils.isPLUSOID(npid)) { log.warning("NPEs connecting two PLUSObjects are not yet supported: " + npe); continue; } PLUSObject node = col.getNode(oid); if(node == null) { log.warning("NPE " + npe + " references OID which isn't in collection; skipping"); continue; } HasOther o = getHasOther(node); if(o == null) { log.warning("NPE " + npe + " references HasOther which wasn't created; skipping"); continue; } // TODO NPEs are basically not yet supported. // makeObjectProperty(type, npid, "npe"); } // End for /* NamedBundle collectionBundle = factory.newNamedBundle(getQualifiedName(col), provActivities.values(), provEntities.values(), provAgents.values(), provStatements.values()); collectionBundle. */ // Assemble final document. Document d = factory.newDocument(provActivities.values(), provEntities.values(), provAgents.values(), provStatements.values()); Namespace n = Namespace.gatherNamespaces(d); n.addKnownNamespaces(); n.setDefaultNamespace(BASE_NAMESPACE); d.setNamespace(n); return d; } // End provenanceCollectionToPROV /** * Indicates whether or not a given edge can be converted. * @param e the edge * @param f the PROV object corresponding to the from part * @param t the PROV object corresponding to the to part * @return false if the edge cannot be converted; true if it can be. */ private boolean canConvert(PLUSEdge e, Object f, Object t) { if(f == null || t == null) { log.warning("Will not convert dangling edge " + e + " with PROV-from " + (f == null ? "null" : f.getClass().getSimpleName()) + " PROV-to " + (t == null ? "null" : t.getClass().getSimpleName())); return false; } return true; } // End canConvert /** * Locate the PROV "HasOther" object associated with a given PLUSObject in the collection being converted. * * @param obj input object * @return the HasOther instance associated with this in conversion. */ public HasOther getHasOther(PLUSObject obj) { if(provEntities.containsKey(obj.getId())) return provEntities.get(obj.getId()); if(provActivities.containsKey(obj.getId())) return provActivities.get(obj.getId()); return null; } public QualifiedName findEntityOrActivity(PLUSObject obj) { if(provEntities.containsKey(obj.getId())) return provEntities.get(obj.getId()).getId(); if(provActivities.containsKey(obj.getId())) return provActivities.get(obj.getId()).getId(); return null; } /** * Convert a general PLUSEdge object into a PROV Statement. * See the mappings file for description of which kinds of edges are translated into which kinds of statements. * @param e the edge to convert * @return a statement representing that edge * @throws PROVConversionException */ protected Statement edgeToStatement(PLUSEdge e) throws PROVConversionException { String edgeType = e.getType(); if(PLUSEdge.EDGE_TYPE_INPUT_TO.equals(edgeType)) { Activity act = provActivities.get(e.getTo().getId()); Entity ent = provEntities.get(e.getFrom().getId()); if(!canConvert(e, ent, act)) return null; Used u = factory.newUsed(getQualifiedName(e), act.getId(), ent.getId()); // System.out.println(u); return u; } else if(PLUSEdge.EDGE_TYPE_TRIGGERED.equals(edgeType)) { Activity act1 = provActivities.get(e.getFrom().getId()); Activity act2 = provActivities.get(e.getTo().getId()); if(!canConvert(e, act1, act2)) return null; WasInformedBy wib = factory.newWasInformedBy(getQualifiedName(e), act2.getId(), act1.getId()); // System.out.println(wib); return wib; } else if(PLUSEdge.EDGE_TYPE_GENERATED.equals(edgeType)) { Entity ent = provEntities.get(e.getTo().getId()); Activity act = provActivities.get(e.getFrom().getId()); if(!canConvert(e, act, ent)) return null; WasGeneratedBy wgb = factory.newWasGeneratedBy(getQualifiedName(e), ent.getId(), act.getId()); //wgb.setTime(factory.newTime(e.getTo().getCreatedAsDate())); //System.out.println(wgb); return wgb; } else if(PLUSEdge.EDGE_TYPE_MARKS.equals(edgeType) || PLUSEdge.EDGE_TYPE_UNSPECIFIED.equals(edgeType)) { QualifiedName q1 = findEntityOrActivity(e.getFrom()); QualifiedName q2 = findEntityOrActivity(e.getTo()); if(!canConvert(e, q1, q2)) return null; WasInformedBy wib = factory.newWasInformedBy(getQualifiedName(e), q2, q1); //System.out.println(wib); return wib; } else if(PLUSEdge.EDGE_TYPE_CONTRIBUTED.equals(edgeType)) { Entity e1 = provEntities.get(e.getFrom().getId()); Entity e2 = provEntities.get(e.getTo().getId()); if(!canConvert(e, e1, e2)) return null; WasDerivedFrom wdf = factory.newWasDerivedFrom(getQualifiedName(e), e2.getId(), e1.getId()); //System.out.println(wdf); return wdf; } else { log.warning("Don't understand edge " + e + " : skipping."); return null; } } // End edgeToStatement /** * If applicable, convert the ownership relationship between an object and its actor into a "wasAssociatedWith" statement, * or a "wasAttributedTo" statement, depending on the type of object. * @param o a PLUSObject. */ protected void convertOwnership(PLUSObject o) { // Ownership gets mapped onto a "wasAssociatedWith" or "wasAttributedTo" relationship if(o.getOwner() != null) { PLUSActor a = o.getOwner(); if(provAgents.containsKey(a.getId())) { Agent agent = provAgents.get(a.getId()); QualifiedName ownership = new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE, a.getId() + "/" + o.getId(), "owns"); QualifiedName other = findEntityOrActivity(o); if(other != null && provEntities.containsKey(o.getId())) { // Entities are related to agents via "wasAttributedTo" provStatements.put(a.getId(), factory.newWasAttributedTo(ownership, other, agent.getId())); } else if(other != null && provActivities.containsKey(o.getId())) { provStatements.put(a.getId(), factory.newWasAssociatedWith(ownership, other, agent.getId())); } else { log.warning("Could not find appropriate owner for other " + other + " on " + o); } } else { log.warning("Owner of " + o + " not in list of converted agents."); } } } // End convertOwnership protected void convertMetadata(PLUSObject obj, HasOther convertedObj) throws PROVConversionException { Metadata md = obj.getMetadata(); for(String key : md.keySet()) { String val = ""+md.get(key); // Sometimes metadata key names can contain invalid XML characters that cause syntax errors, because // the PROV library doesn't check for this. String local = key.replaceAll("[^A-Za-z0-9]", "_"); if(!key.equals(local)) { // System.out.println("METADATA: '" + key + "' '" + val + "'" + " local '" + local + "'"); } Other o = factory.newOther(BASE_NAMESPACE, local, "metadata", val, name.XSD_STRING); convertedObj.getOther().add(o); } } /** * Make a general object property under the prefix prop. * @param name * @param value * @return an Other object representing the object property */ protected Other makeObjectProperty(String name, Object value) { return makeObjectProperty(name, value, "prop"); } /** * Make a single object property into an "Other" statement. * @param name property name * @param value property value * @param prefix ns prefix * @return an Other object associated with the object property. */ protected Other makeObjectProperty(String name, Object value, String prefix) { QualifiedName nameType = this.name.XSD_STRING; if(value instanceof Date) { nameType = this.name.XSD_DATETIME; // Needs to be valid XML date format. SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); value = sdf.format((Date)value); } else if(value instanceof Integer) { nameType = this.name.XSD_INTEGER; } else if(value instanceof Double) { nameType = this.name.XSD_DOUBLE; } else { value = ""+value; } // if("npe".equals(prefix)) System.out.println("OBJECT PROPERTY: " + name + " " + value + " " + prefix + " " + nameType); return factory.newOther(BASE_NAMESPACE, name, prefix, value, nameType); } // End makeObjectProperty /** * TODO PLUSWorkflow objects should likely get converted into PROV Bundles. * Right now, workflows are just turned into generic entities, which often lack connection to other * Entities and so on. * @param obj the object to convert * @return a bundle entity * @throws PROVConversionException */ public Entity workflowToBundle(PLUSObject obj) throws PROVConversionException { if(!obj.isWorkflow()) throw new PROVConversionException("Object is not a workflow: " + obj); PLUSWorkflow w = (PLUSWorkflow)obj; Entity e = factory.newEntity(getQualifiedName(w), obj.getName()); e.getOther().add(makeObjectProperty("when_start", w.getWhenStart())); e.getOther().add(makeObjectProperty("when_end", w.getWhenEnd())); return e; } /** * Convert a PLUSActivity into a PROV Activity. * @param obj the object to convert * @return a PROV-DM Activity * @throws PROVConversionException if the input isn't a PLUSActivity. */ public Activity activityToActivity(PLUSObject obj) throws PROVConversionException { if(!obj.isActivity()) throw new PROVConversionException("Object is not an activity: " + obj); PLUSActivity act = (PLUSActivity)obj; Activity a = factory.newActivity(getQualifiedName(act), obj.getName()); a.getOther().add(makeObjectProperty("inputs", act.getInputs())); a.getOther().add(makeObjectProperty("outputs", act.getOutputs())); return a; } /** * Convert a PLUSInvocation to a PROV Activity. * @param obj the object to convert * @return an activity representing the invocation * @throws PROVConversionException if the input isn't an invocation. */ public Activity invocationToActivity(PLUSObject obj) throws PROVConversionException { if(!obj.isInvocation()) throw new PROVConversionException("Object is not an invocation: " + obj); PLUSInvocation inv = (PLUSInvocation)obj; Activity a = factory.newActivity(getQualifiedName(inv), obj.getName()); return a; } /** * Convert a PLUSDataObject into a PROV Entity. * @param obj the object to convert * @return a PROV Entity * @throws PROVConversionException if input isn't a data item. */ public Entity dataObjectToEntity(PLUSObject obj) throws PROVConversionException { if(!obj.isDataItem()) throw new PROVConversionException("Object is not a data item: " + obj); Entity e = factory.newEntity(getQualifiedName((PLUSDataObject)obj), obj.getName()); // factory.addType(e, DATA_TYPE, name.XSD_QNAME); if(obj instanceof PLUSFile) { PLUSFile f = (PLUSFile)obj; try { e.getOther().add(makeObjectProperty("path", f.getFile().getAbsolutePath())); } catch(Exception exc) { log.warning(exc.getMessage()); } } else if(obj instanceof PLUSURL) { PLUSURL u = (PLUSURL) obj; try { e.getOther().add(makeObjectProperty("url", u.getURL())); } catch (MalformedURLException e1) { log.severe(e1.getMessage()); } } return e; } // End dataObjectToEntity public Agent actorToAgent(PLUSActor actor) throws PROVConversionException { // TODO attributes Agent a = factory.newAgent(getQualifiedName(actor), actor.getName()); return a; } public QualifiedName getQualifiedName(PLUSEdge e) { return new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + e.getClass().getSimpleName(), e.getFrom().getId() + ":" + e.getTo().getId(), e.getType().replaceAll(" ", "_")); } public QualifiedName getQualifiedName(ProvenanceCollection col) { return new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + "ProvenanceCollection", col.getId(), "provcollection"); } public QualifiedName getQualifiedName(PLUSObject obj) { String className = obj.getClass().getSimpleName(); return new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + className, obj.getId(), className.replaceAll("PLUS", "").toLowerCase()); } public QualifiedName getQualifiedName(PLUSActor actor) { return new org.openprovenance.prov.xml.QualifiedName(BASE_NAMESPACE + actor.getClass().getSimpleName(), actor.getId(), "actor"); } public static void main(String [] args) throws Exception { String oid = "urn:uuid:mitre:plus:bf894a51-3f5e-4134-ba62-bff5b24cd19a"; ProvenanceCollection col = null; if(oid != null) { col = Neo4JPLUSObjectFactory.newDAG(oid, User.DEFAULT_USER_GOD, new TraversalSettings()); } else { SyntheticGraphProperties props = new SyntheticGraphProperties().setConnectivity(0.5).setComponents(10).protectN(0).percentageData(0.5); col = new DAGAholic(props); } Document d = new PROVConverter().provenanceCollectionToPROV(col); System.out.println("Writing..."); System.out.println(consume(formatAs(Format.XML, d))); System.out.println(consume(formatAs(Format.RDF, d))); System.out.println(consume(formatAs(Format.TTL, d))); } public static BufferedReader formatAs(Format fmt, Document d) throws IOException { InteropFramework fmk = new InteropFramework(); String suffix = ".xml"; if(fmt == Format.XML) { suffix = ".xml"; } else if(fmt == Format.TTL) { suffix = ".ttl"; } else if(fmt == Format.RDF) { suffix = ".rdf"; } else { suffix = ".xml"; } File f = File.createTempFile("prov", suffix); f.deleteOnExit(); String target = f.getAbsolutePath(); // System.out.println("Tmpfile: " + target); fmk.writeDocument(target, d); BufferedReader br = new BufferedReader(new FileReader(target)); return br; } public static String consume(BufferedReader br) throws IOException { StringBuffer b = new StringBuffer(); char [] buf = new char[1024*4]; int x=0; while((x = br.read(buf)) > 0) { b.append(buf, 0, x); } try { br.close(); } catch(Exception exc) { exc.printStackTrace(); } return b.toString(); } /** * Serialize a document as XML, and then turn it into one large string. * @param d a document * @return an XML serialized form of the document. * @throws JAXBException */ public static String asXMLString(Document d) throws JAXBException { Namespace.withThreadNamespace(d.getNamespace()); org.openprovenance.prov.xml.ProvSerialiser serializer = new org.openprovenance.prov.xml.ProvSerialiser(); StringWriter sw = new StringWriter(); serializer.serialiseDocument(sw, d, true); return sw.toString(); } } // End PROVConverter