//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.orderers; import java.io.FileWriter; import java.io.IOException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.jcas.tcas.Annotation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.uci.ics.jung.graph.Graph; import edu.uci.ics.jung.graph.SparseMultigraph; import edu.uci.ics.jung.graph.util.EdgeType; import edu.uci.ics.jung.graph.util.Pair; import uk.gov.dstl.baleen.core.pipelines.PipelineBuilder; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineActionStore; import uk.gov.dstl.baleen.core.pipelines.orderers.IPipelineOrderer; /** * Orders analysis engines by constructing a dependency graph and iteratively * removing analysis engines that have no dependencies. */ public class DependencyGraph implements IPipelineOrderer { private static final Logger LOGGER = LoggerFactory.getLogger(DependencyGraph.class); private Integer edgeId = 0; @Override public List<AnalysisEngine> orderPipeline(List<AnalysisEngine> analysisEngines) { Graph<AnalysisEngine, Integer> graph = createDependencyGraph(analysisEngines); removeLoops(graph); List<AnalysisEngine> ordered = new ArrayList<>(analysisEngines.size()); while(true){ Set<AnalysisEngine> toRemove = removeLayer(graph); ordered.addAll(toRemove); if(toRemove.isEmpty() && graph.getVertexCount() == 0){ break; }else if(toRemove.isEmpty()){ LOGGER.error("Unsolvable dependency graph. Original order will be used."); debugUnresolvedGraph(analysisEngines.get(0).getConfigParameterValue(PipelineBuilder.PIPELINE_NAME).toString(), graph, ordered); return analysisEngines; } } return ordered; } private Graph<AnalysisEngine, Integer> createDependencyGraph(List<AnalysisEngine> analysisEngines){ Graph<AnalysisEngine, Integer> graph = new SparseMultigraph<>(); //First, add all annotators onto the graph for(AnalysisEngine ae : analysisEngines) graph.addVertex(ae); //Now add dependencies between annotators for(AnalysisEngine ae1 : analysisEngines){ for(AnalysisEngine ae2 : analysisEngines){ if(ae1 == ae2) continue; addAnnotatorDependencies(graph, ae1, ae2); } } return graph; } private void addAnnotatorDependencies(Graph<AnalysisEngine, Integer> graph, AnalysisEngine ae1, AnalysisEngine ae2){ //If there's already a dependency, then just return as we don't want multiple edges if(graph.findEdge(ae1, ae2) != null) return; //If the inputs of ae1 match the outputs of ae2, then ae1 is dependent on ae2 //We don't need to check both ways as this will be caught by the loop, although //we could be more efficient here. AnalysisEngineAction a1 = getAction(ae1); AnalysisEngineAction a2 = getAction(ae2); if(overlaps(a1.getInputs(), a2.getOutputs())){ graph.addEdge(++edgeId, ae2, ae1, EdgeType.DIRECTED); return; } } private void debugUnresolvedGraph(String pipeline, Graph<AnalysisEngine, Integer> graph, List<AnalysisEngine> ordered){ if(!LOGGER.isDebugEnabled()) return; String timeStamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")); //Output unresolved analysis engines as CSV try( FileWriter fileWriter = new FileWriter(timeStamp+"-"+pipeline+"-unsolvedDependencyGraph.csv") ){ for(Integer id : graph.getEdges()){ Pair<AnalysisEngine> pair = graph.getEndpoints(id); fileWriter.append(pair.getFirst().getAnalysisEngineMetaData().getName() + "#" + pair.getFirst().getAnalysisEngineMetaData().getUUID()); fileWriter.append(','); fileWriter.append(pair.getSecond().getAnalysisEngineMetaData().getName() + "#" + pair.getSecond().getAnalysisEngineMetaData().getUUID()); fileWriter.append('\n'); } }catch(IOException ioe){ LOGGER.warn("Unable to save unsolvable dependency graph to disk", ioe); } //Output original and ordered annotators try( FileWriter fileWriter = new FileWriter(timeStamp+"-"+pipeline+"-unsolvedDependencyGraph.txt") ){ fileWriter.write("Ordered annotators:\n"); for(AnalysisEngine ae : ordered){ fileWriter.write("- "+ae.getAnalysisEngineMetaData().getName() + "#" + ae.getAnalysisEngineMetaData().getUUID()+"\n"); } fileWriter.write("\nRemaining annotators:\n"); for(AnalysisEngine ae : graph.getVertices()){ fileWriter.write("- "+ae.getAnalysisEngineMetaData().getName() + "#" + ae.getAnalysisEngineMetaData().getUUID()+"\n"); } }catch(IOException ioe){ LOGGER.warn("Unable to save dependency information to disk", ioe); } } /** * Determine whether two sets of classes overlap (i.e. contain any of the same classes), * taking into account inheritance and allowing subclasses to count towards any overlap. */ public static boolean overlaps(Set<Class<? extends Annotation>> s1, Set<Class<? extends Annotation>> s2){ for(Class<? extends Annotation> c1 : s1){ for(Class<? extends Annotation> c2 : s2){ if(c1.isAssignableFrom(c2)) return true; } } return false; } /** * Find and remove simple loops (e.g. a -> b -> a) from a Jung graph */ public static <V, E> void removeLoops(Graph<V, E> graph){ for(V v : graph.getVertices()){ for(E e : graph.getOutEdges(v)){ V dest = graph.getDest(e); E returnEdge = graph.findEdge(dest, v); if(returnEdge != null){ LOGGER.warn("Loop detected between {} and {}. Original order will be preserved.", getName(v), getName(dest)); graph.removeEdge(returnEdge); } } } } /** * Remove an outer layer of the graph (i.e. any nodes with an inDegree of 0) * * Returns the set of removed vertices */ public static <V, E> Set<V> removeLayer(Graph<V, E> graph){ Set<V> toRemove = new HashSet<>(); for(V v : graph.getVertices()){ if(graph.inDegree(v) == 0){ toRemove.add(v); } } for(V v : toRemove) graph.removeVertex(v); return toRemove; } private static String getName(Object o){ if(o instanceof AnalysisEngine){ return ((AnalysisEngine)o).getAnalysisEngineMetaData().getName(); }else{ return o.toString(); } } private AnalysisEngineAction getAction(AnalysisEngine ae){ String uuid = (String) ae.getConfigParameterValue(PipelineBuilder.ANNOTATOR_UUID); return AnalysisEngineActionStore.getInstance().get(uuid); } }