package dgm.degraphmalizr.recompute;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Optional;
import com.google.common.collect.Iterables;
import com.tinkerpop.blueprints.*;
import dgm.*;
import dgm.configuration.*;
import dgm.exceptions.*;
import dgm.modules.elasticsearch.QueryFunction;
import dgm.modules.elasticsearch.ResolvedPathElement;
import dgm.GraphUtilities;
import dgm.modules.bindingannotations.Fetches;
import dgm.modules.bindingannotations.Recomputes;
import dgm.trees.*;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import org.nnsoft.guice.sli4j.core.InjectLogger;
import org.slf4j.Logger;
import javax.inject.Inject;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import static dgm.GraphUtilities.toJSON;
public class RecomputerFactoryImpl implements Recomputer
{
@InjectLogger
Logger log;
protected final Client client;
protected final Graph graph;
protected final ExecutorService recomputeQueue;
protected final ExecutorService fetchQueue;
protected final QueryFunction queryFn;
protected final ObjectMapper objectMapper;
@Inject
public RecomputerFactoryImpl(Client client, Graph graph,
@Fetches ExecutorService fetchQueue,
@Recomputes ExecutorService recomputeQueue,
ObjectMapper objectMapper,
QueryFunction queryFunction)
{
this.fetchQueue = fetchQueue;
this.recomputeQueue = recomputeQueue;
this.graph = graph;
this.client = client;
this.queryFn = queryFunction;
this.objectMapper = objectMapper;
}
class Recomputer
{
protected final RecomputeRequest request;
protected final RecomputeCallback callback;
public Recomputer(RecomputeRequest request, RecomputeCallback callback)
{
this.request = request;
this.callback = callback;
}
private HashMap<String,JsonNode> walkResults() throws ExecutionException, InterruptedException
{
final HashMap<String, JsonNode> walkResults = new HashMap<String, JsonNode>();
if (request.config.walks().entrySet().isEmpty())
return walkResults;
boolean isAbsent = false;
for (Map.Entry<String, WalkConfig> walkCfg : request.config.walks().entrySet())
{
// walk graph, and fetch all the children in the opposite direction of the walk
final Tree<Pair<Edge, Vertex>> tree =
GraphUtilities.childrenFrom(request.root.vertex(), walkCfg.getValue().direction());
// write size information to log
if (log.isDebugEnabled())
{
final int size = Iterables.size(Trees.bfsWalk(tree));
log.debug("Retrieving {} documents from ES", size);
}
// get all documents in the tree from Elasticsearch (in parallel)
final Tree<Optional<ResolvedPathElement>> docTree = Trees.pmap(fetchQueue, queryFn, tree);
// if some value is absent from the tree, abort the computation
final Optional<Tree<ResolvedPathElement>> fullTree = Trees.optional(docTree);
// TODO split various failure modes
if (!fullTree.isPresent())
{
isAbsent = true;
break;
}
// reduce each property to a value based on the walk result
for (final Map.Entry<String, ? extends PropertyConfig> propertyCfg : walkCfg.getValue().properties().entrySet())
walkResults.put(propertyCfg.getKey(), propertyCfg.getValue().reduce(fullTree.get()));
}
// something failed, so we abort the whole re-computation
if (isAbsent)
{
log.debug("Some results were absent, aborting re-computation for {}", request.root.id());
// TODO return list of expired nodes/IDs
//return factory.recomputeExpired(request, Collections.<ID>emptyList());
return null;
}
return walkResults;
}
private IndexResponse writeToES(ObjectNode document)
{
final TypeConfig conf = request.config;
final ID sourceID = request.root.id();
final ID targetID = sourceID.index(conf.targetIndex()).type(conf.targetType());
// write the source version to the document
document.put("_fromSource", toJSON(objectMapper, sourceID));
final String documentSource = document.toString();
// write document to Elasticsearch
final IndexResponse ir = client.prepareIndex(targetID.index(), targetID.type(), targetID.id())
.setSource(documentSource).execute().actionGet();
// log some stuff
final Object[] args = new Object[]{targetID.index(), targetID.type(), targetID.id(), ir.version()};
log.debug("Written /{}/{}/{}, version={}", args);
log.debug("Content: {}", documentSource);
return ir;
}
private JsonNode getFromES() throws IOException
{
// TODO oops this doesn't work at the moment, we have the REDUCE results in walkResults :(
// // we are always on the root node of a walk result, so use that if it's there
// if(walkResults != null && !walkResults.isEmpty())
// return walkResults.values().iterator().next();
// when no walks are defined, we just get the document ourselves.
// TODO handle this properly...
//todo: what if Optional.absent??
//todo: queryFn.apply may produce null
// retrieve the raw document from ES
final Optional<ResolvedPathElement> r = queryFn.apply(new Pair<Edge, Vertex>(null, request.root.vertex()));
if(!r.isPresent() || !r.get().getResponse().isPresent())
throw new SourceMissingException();
return objectMapper.readTree(r.get().getResponse().get().sourceAsString());
}
public RecomputeResult recompute() throws IOException, ExecutionException, InterruptedException
{
log.info("Recompute {} started", request.root.id().toString());
// ideally, this is handled in a monad, but with this boolean we keep track of failures
boolean isAbsent = false;
// Now we are going to:
// - fetch the current ElasticSearch document,
final JsonNode rawDocument = getFromES();
// - Return when this document does not need to be processed.
if (!request.config.filter(rawDocument))
{
log.info("Aborted recompute for {} because filter=false for this document", request.root.id().toString());
throw new DocumentFiltered();
}
// Now we are going to iterate over all the walks configured for this input document. For each walk:
// - We fetch a tree of children non-recursively from our document in the inverted direction of the walk, as Graph vertices
// - We convert the tree of vertices to a tree of ElasticSearch documents
// - We call the reduce() method for this walk, with the tree of documents as argument.
// - We collect the result.
final HashMap<String, JsonNode> walkResults = walkResults();
if(walkResults == null)
{
log.info("Aborted recompute for {} because graph is expired for this node", request.root.id().toString());
throw new ExpiredException(Collections.<ID>emptyList());
}
// Now we are going to:
// - Transform it, if transformation is required
// - Add the walk properties
// - Add a reference to the source document.
// - And store it as target document type in target index.
// pre-process document using javascript
final JsonNode transformed = request.config.transform(rawDocument);
if (!transformed.isObject())
{
log.info("Aborted recompute for {} because the source document is not a JSON object", request.root.id().toString());
throw new SourceNotObjectException();
}
final ObjectNode document = (ObjectNode) transformed;
// add the results to the document
for (Map.Entry<String, JsonNode> e : walkResults.entrySet())
document.put(e.getKey(), e.getValue());
// write the result document to the target index
final IndexResponse ir = writeToES(document);
log.info("Recompute completed for {}, wrote /{}/{}/{}/{}",
new Object[]{request.root.id().toString(), ir.index(), ir.type(), ir.id(), ir.version()});
return new RecomputeResult(ir, rawDocument, document, walkResults);
}
}
/**
* This procuedure actually performes the recompute of individual documents. It performs transformation,
* applies walks and inserts/updates the target document.
*
* @param request represents the source document and recompute configuration.
* @return the ES IndexRespons to the insert of the target document.
*/
@Override
public RecomputeResult recompute(final RecomputeRequest request, RecomputeCallback callback)
{
final Recomputer recomputer = new Recomputer(request, callback);
try
{
return recomputer.recompute();
}
catch (DegraphmalizerException e)
{
throw e;
}
catch (Exception e) {
throw new WrappedException(e);
}
}
}