/* Copyright 2015 MITRE Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mitre.provenance.asias;
import org.mitre.provenance.Metadata;
import org.mitre.provenance.PLUSException;
import org.mitre.provenance.client.AbstractProvenanceClient;
import org.mitre.provenance.client.LocalProvenanceClient;
import org.mitre.provenance.client.ProvenanceClientException;
import org.mitre.provenance.client.RESTProvenanceClient;
import org.mitre.provenance.plusobject.*;
import org.mitre.provenance.npe.*;
import org.mitre.provenance.plusobject.json.JSONConverter;
import org.mitre.provenance.user.User;
import org.mitre.provenance.contenthash.ContentHasher;
import org.mitre.provenance.contenthash.MD5ContentHasher;
import org.mitre.provenance.contenthash.SHA256ContentHasher;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.List;
import java.util.Iterator;
import javax.ws.rs.client.Client;
/**
* @author piekut
* This class ingests the JSON that ASIAS processes generate and parses them into a Provenance collection,
* which is then reported (saved) to the PLUS repository as specified in the local or remote "client" as specified below.
* The ingest attempts to capture the content in as general form as possible.
* However, actions for several of the top level keys in the ingested JSON *must* be hardcoded to determine
* specific aspects of the provenance graph (e.g., "name" as invocation node name,
* "params_json" broken into nodes representing input parameters, etc.)
*/
public class Ingest_AsiasJson {
public static TreeMap<String,PLUSObject> nodeLookup = new TreeMap<String,PLUSObject>();
public static TreeMap<String,List<PLUSObject>> hashedParamNodeLookup = new TreeMap<String,List<PLUSObject>>();
public static List<TreeMap<String,String>> edgeMap = new java.util.ArrayList<TreeMap<String,String>>();
public static RESTProvenanceClient client;
public static Gson g = new GsonBuilder().create();
public static void main(String [] args) throws Exception {
FileInputStream fis = null;
client = new RESTProvenanceClient("localhost", "8080");
ArrayList<String> jsonContent = new ArrayList();
System.out.println("Opening JSON file.");
try {
// EDIT VALUE here if the file is to be loaded from another location.
String ingestDIR = System.getProperty("user.home").toString()+File.separator.toString()+"Desktop";
// EDIT VALUE here if the ingest file has a different name.
String ingestJSON = "/seq/dataManifestDownstream.json";
//String ingestJSON = "/seq/metademo2.json";
//String ingestJSON = "/seq/metadev5.json";
File userDesktopJSON = new File(ingestDIR+File.separator+ingestJSON);
fis = new FileInputStream(userDesktopJSON);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
String strLine;
//Read File Line By Line
int idx = 0;
while ((strLine = br.readLine()) != null) {
if (strLine.contains("ObjectId")) {
strLine = strLine.replaceFirst("ObjectId\\(", "");
strLine = strLine.replaceFirst("\\),", ",");
}
if (strLine==null || strLine.equals("")) { continue; }
jsonContent.add(idx, strLine);
idx++;
}
br.close();
}
catch (IOException e) {
e.printStackTrace();
}
finally { fis.close(); }
// see processASIASJSON function directly below for details.
ProvenanceCollection col = processASIASJSON(jsonContent);
// REST client reports via localhost:8080/api/plus/graph/new/
client.report(col);
System.out.println("Done!");
System.exit(0);
}
public static ProvenanceCollection processASIASJSON(ArrayList<String> asiasJSON) throws Exception {
ProvenanceCollection col = new ProvenanceCollection();
System.out.println("Parsing submitted JSON.");
System.out.println("Starting to add nodes...");
for (int i=0; i<asiasJSON.size(); i++) {
// The below call will create invocation node, params (if exists) and output-data node, for each JSON obj.
createInvocationAndDataNode(asiasJSON.get(i), col);
}
System.out.println("Done adding nodes.");
// Edges connecting the DAG components were specified in the previous loop, and stored in a Map until all
// nodes were processed. This step loops through the edge map and instantiates them in the collection.
System.out.println("Starting to add edges...");
Iterator<TreeMap<String,String>> edgeIt = edgeMap.iterator();
while (edgeIt.hasNext()) {
TreeMap<String,String> edge = edgeIt.next();
String from = edge.firstKey();
String to = edge.get(from);
if (nodeLookup.get(from)!=null) {
System.out.println(" adding edge from "+ nodeLookup.get(from).getName() + " to " + nodeLookup.get(to).getName());
col.addEdge(new PLUSEdge(nodeLookup.get(from), nodeLookup.get(to), PLUSWorkflow.DEFAULT_WORKFLOW, PLUSEdge.EDGE_TYPE_INPUT_TO));
}
else { // This handles nodes previously loaded.
System.out.println(" adding edge from '"+ from + "' to " + nodeLookup.get(to).getName());
System.out.println(" INFO: input node not in load set. Checking datastore for preexisting match...");
Metadata parameters = new Metadata();
parameters.put("joins", from);
Iterator<PLUSObject> joinsNodes = client.search(parameters, 500).getNodes().iterator();
PLUSObject previouslyLoadedNode = null;
if (joinsNodes.hasNext()) { previouslyLoadedNode = joinsNodes.next(); }
if (previouslyLoadedNode!=null) {
col.addNode(previouslyLoadedNode);
col.addEdge(new PLUSEdge(previouslyLoadedNode, nodeLookup.get(to), PLUSWorkflow.DEFAULT_WORKFLOW, PLUSEdge.EDGE_TYPE_INPUT_TO));
}
else {
System.out.println(" WARNING: node with meta_id '" + from + "' was not found!");
System.out.println(" Skipping over edge joining '" + from + "' to '"+to+"'.");
}
}
}
edgeMap.clear(); nodeLookup.clear();
System.out.println("Done adding edges.");
System.out.println("ASIAS ingest complete.");
return col;
} // End main
private static void createInvocationAndDataNode(String jsonString, ProvenanceCollection col) throws Exception {
// System.out.println(jsonString);
JsonElement elem = g.fromJson(jsonString, JsonElement.class);
if(!elem.isJsonObject()) throw new Exception("Server response wasn't a JSON object " + elem);
JsonObject obj = elem.getAsJsonObject();
// "name" is a required field for ASIAS invocation nodes.
String analytic_name = obj.get("name").getAsString();
PLUSInvocation invocation= new PLUSInvocation(analytic_name);
System.out.println("Adding Node '" + analytic_name+"'. ("+obj.get("meta_id").getAsString()+")");
String output_name = analytic_name+"_output";
if (obj.get("output_name")!=null) { output_name = obj.get("output_name").getAsString(); }
PLUSString data = new PLUSString(output_name);
String content = obj.get("output_schemas_json").getAsString();
data.setContent(content);
SHA256ContentHasher myHasher = new SHA256ContentHasher();
String hashAsString = ContentHasher.formatAsHexString(myHasher.hash(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8))));
data.getMetadata().put(Metadata.CONTENT_HASH_SHA_256, hashAsString);
/* Adding properties with as few exceptions as possible, in light of requirements. */
for (Map.Entry<String,JsonElement> entry : obj.entrySet()) {
JsonElement jsonElement = entry.getValue();
String topLevelElement = entry.getKey();
if (topLevelElement.equals("params_json")) {
//first, transform out to JSON Object from string
JsonObject jParams = (JsonObject) toJsonElement(jsonElement);
// second, establish separate data nodes that feed into the invocation node.
// Caveat: This assumes json_params depth is 1. There has been no discussion on "nesting" in param nodes.
for (Map.Entry<String,JsonElement> param : jParams.entrySet()) {
JsonElement paramElement = param.getValue();
String paramKey = param.getKey();
String paramValue;
if (paramElement.isJsonPrimitive()) { paramValue = paramElement.getAsString(); }
else { paramValue = paramElement.toString(); }
PLUSString paramNode = new PLUSString(paramKey);
paramNode.setContent(paramValue);
// criteria for hashing/uniqueness of param nodes is key plus its value. (possibly open to revision)
String paramValueUnique = paramKey+"_"+paramValue;
String hashString = ContentHasher.formatAsHexString(myHasher.hash(new ByteArrayInputStream(paramValueUnique.getBytes(StandardCharsets.UTF_8))));
paramNode.getMetadata().put(Metadata.CONTENT_HASH_SHA_256, hashString);
// Now add hash of value plus process it feeds. This is so we can make the distinction for NPE types later.
String paramValueProcessUnique = paramValueUnique + analytic_name;
String hashStringProcessUnique = ContentHasher.formatAsHexString(myHasher.hash(new ByteArrayInputStream(paramValueProcessUnique.getBytes(StandardCharsets.UTF_8))));
paramNode.getMetadata().put("sha256hashSpecficProcess", hashStringProcessUnique);
col.addNode(paramNode);
// third, establish link from *parameter* node to *invocation* node.
col.addEdge(new PLUSEdge(paramNode, invocation, PLUSWorkflow.DEFAULT_WORKFLOW, PLUSEdge.EDGE_TYPE_INPUT_TO));
// Add non-provenance edges for params, but only if they match a previously-loaded parameter.
if (hashString!=null) {
// First look through already loaded nodes to see if other params match.
List<PLUSObject> loadList = new ArrayList<PLUSObject>();
if (hashedParamNodeLookup.get(hashString)!=null) {
loadList = hashedParamNodeLookup.get(hashString);
Iterator<PLUSObject> listIterator = loadList.iterator();
while (listIterator.hasNext()) {
PLUSObject otherParam = listIterator.next();
String type = "Same Parameter Value, Different Process";
if (otherParam.getMetadata().get("sha256hashSpecficProcess")
.equals(paramNode.getMetadata().get("sha256hashSpecficProcess"))) {
type = "Same Parameter Value, Same Process";
}
NonProvenanceEdge npe = new NonProvenanceEdge(paramNode, otherParam, type);
System.out.println("adding '"+type+"' NPE from "+ paramNode.getId()+ " to " +otherParam.getId());
col.addNonProvenanceEdge(npe);
}
}
loadList.add(paramNode);
hashedParamNodeLookup.put(hashString, loadList);
// Secondly, do the same thing for previously-loaded nodes in the database.
ProvenanceCollection match = new ProvenanceCollection();
PLUSObject otherParam = null;
Metadata parameters = new Metadata();
parameters.put(Metadata.CONTENT_HASH_SHA_256, hashString);
match = client.search(parameters, 500);
Iterator<PLUSObject> equivalentValues = match.getNodes().iterator();
while (equivalentValues.hasNext()) {
otherParam = equivalentValues.next();
if (!col.contains(otherParam)) {
col.addNode(otherParam); // duplicate, only to add NPE.
}
String type = "Same Parameter Value, Different Process";
if (otherParam.getMetadata().get("sha256hashSpecficProcess")
.equals(paramNode.getMetadata().get("sha256hashSpecficProcess"))) {
type = "Same Parameter Value, Same Process";
}
NonProvenanceEdge npe = new NonProvenanceEdge(paramNode, otherParam, type);
System.out.println("adding '"+type+"' NPE from "+ paramNode.getId()+ " to " +otherParam.getId());
col.addNonProvenanceEdge(npe);
}
equivalentValues = null;
}
}
}
else if (topLevelElement.equals("job_counters") ) {
// only need special handling for "job_counters" because it comes in as a string.
// if it changes to be JSON in its natural state, this block can be removed.
addProperty(invocation, topLevelElement, toJsonElement(jsonElement));
}
else if (!topLevelElement.equals("name")
//&& !topLevelElement.equals("meta_id")
&& !topLevelElement.equals("output_name")
&& !topLevelElement.equals("output_schemas_json")
&& !topLevelElement.equals("path")) { // If not one of our special use elements
addProperty(invocation, topLevelElement, jsonElement);
}
else if (topLevelElement.equals("output_schemas_json") || topLevelElement.equals("path")) {
// special handling for the output data node's parameters.
addProperty(data, topLevelElement, jsonElement);
}
if (topLevelElement.equals("input_schemas_json")) {
String hashString = ContentHasher.formatAsHexString(myHasher.hash(new ByteArrayInputStream(jsonElement.toString().getBytes(StandardCharsets.UTF_8))));
invocation.getMetadata().put(Metadata.CONTENT_HASH_SHA_256, hashString);
}
}
/* Lookups getting set here so that the backwards/forwards lineage edges can be determined afterward. */
String meta_id = obj.get("meta_id").getAsString();
nodeLookup.put(meta_id, data); // might be non-intuitive, but we're assigning meta_id as key for data node.
nodeLookup.put(analytic_name+"_" +meta_id, invocation); // analytic name is lookup key for invocation node.
data.getMetadata().put("joins", meta_id); // This is for lookup later, should later runs reference this output.
invocation.setCreated(); // timestamp "now".
//invocation.getMetadata().put("ingest", "ASIAS-JSON"); // tag for deleting draft nodes later. Uncomment if desired.
if (obj.get("input_meta_ids")!=null) {
JsonArray input_meta_id = obj.getAsJsonArray("input_meta_ids");
for(JsonElement in_meta_id : input_meta_id) {
TreeMap<String,String> addEdge = new TreeMap<String,String>();
addEdge.put(in_meta_id.getAsString(), analytic_name+"_" +meta_id);
edgeMap.add(addEdge);
}
}
col.addNode(invocation);
col.addNode(data);
col.addEdge(new PLUSEdge(invocation, data, PLUSWorkflow.DEFAULT_WORKFLOW, PLUSEdge.EDGE_TYPE_GENERATED));
}
public static void addProperty(PLUSObject node, String propertyName, JsonElement propertyValue) {
if (propertyValue.isJsonPrimitive()) {
//System.out.println(" Adding property " + propertyName+".");
node.getMetadata().put(propertyName, propertyValue.getAsString());
}
// ***************************************
// For arrays and Objects, break out into 1st-level properties, with property label reflecting object hierarchy.
// ***************************************
else if (propertyValue.isJsonArray()) {
JsonArray arrayObj = propertyValue.getAsJsonArray();
if (arrayObj.size()==1) {
addProperty(node, propertyName, arrayObj.get(0));
}
else if (arrayObj.size()>1) {
int count = 1;
for(JsonElement arrayElement : arrayObj) {
addProperty(node, propertyName +"_" + count, arrayElement);
count++;
}
}
}
else {
JsonObject obj = propertyValue.getAsJsonObject();
for (Map.Entry<String,JsonElement> entry : obj.entrySet()) {
JsonElement subElement = entry.getValue();
String elementName = entry.getKey();
addProperty(node, propertyName +"__" + elementName, subElement);
}
}
}
public static JsonElement toJsonElement(JsonElement jsonElement) {
JsonElement returnObj;
if (jsonElement.isJsonPrimitive()) {
String p_j = jsonElement.getAsString();
p_j = p_j.replaceAll("\\\"", "\""); //clean up escaping so we can parse as JsonObj.
return g.fromJson(p_j, JsonElement.class);
}
else { return jsonElement; }
}
} // End ASIAS Ingest