package org.openbel.framework.api; import static org.openbel.framework.api.EdgeDirectionType.FORWARD; import static org.openbel.framework.api.EdgeDirectionType.REVERSE; import static org.openbel.framework.api.KamUtils.copy; import static org.openbel.framework.common.BELUtilities.constrainedHashSet; import static org.openbel.framework.common.BELUtilities.noItems; import static org.openbel.framework.common.BELUtilities.sizedHashMap; import static org.openbel.framework.common.enums.RelationshipType.ACTS_IN; import static org.openbel.framework.common.enums.RelationshipType.ORTHOLOGOUS; import static org.openbel.framework.common.enums.RelationshipType.TRANSCRIBED_TO; import static org.openbel.framework.common.enums.RelationshipType.TRANSLATED_TO; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.openbel.framework.api.Kam.KamEdge; import org.openbel.framework.api.Kam.KamNode; import org.openbel.framework.api.internal.KAMStoreDaoImpl.BelTerm; import org.openbel.framework.api.internal.KAMStoreDaoImpl.Namespace; import org.openbel.framework.api.internal.KAMStoreDaoImpl.TermParameter; import org.openbel.framework.common.InvalidArgument; import org.openbel.framework.common.enums.FunctionEnum; import org.openbel.framework.common.enums.RelationshipType; import org.openbel.framework.common.util.Pair; public class DefaultOrthologize implements Orthologize { private static final RelationshipType[] INFERRED_ORTHOLOGIZED_EDGES = new RelationshipType[] { ACTS_IN, TRANSCRIBED_TO, TRANSLATED_TO }; /** * {@inheritDoc} */ @Override public Map<KamNode, KamNode> orthologousNodes(Kam kam, KAMStore kAMStore, SpeciesDialect dialect) { // create resource location set for species namespaces final List<org.openbel.framework.common.model.Namespace> spl = dialect .getSpeciesNamespaces(); final Set<String> rlocs = constrainedHashSet(spl.size()); for (final org.openbel.framework.common.model.Namespace n : spl) { rlocs.add(n.getResourceLocation()); } final Collection<KamEdge> edges = kam.getEdges(); final Map<Integer, Set<Integer>> oedges = new LinkedHashMap<Integer, Set<Integer>>(); Map<KamNode, KamNode> onodes = new HashMap<KamNode, KamNode>(); for (final KamEdge e : edges) { // only evaluate orthologous edges if (ORTHOLOGOUS.equals(e.getRelationshipType())) { final KamNode edgeSource = e.getSourceNode(); final KamNode edgeTarget = e.getTargetNode(); // invalid; skip orthologous self edges if (edgeSource == edgeTarget) { continue; } TermParameter speciesParam = findParameter(kam, kAMStore, edgeSource, rlocs); if (speciesParam != null) { // source node matches target species Integer id = edgeSource.getId(); Set<Integer> adjacentEdges = oedges.get(id); if (adjacentEdges == null) { adjacentEdges = new LinkedHashSet<Integer>(); oedges.put(id, adjacentEdges); } // collect adjacent edges (except this edge) for the // orthologous target node final Set<KamEdge> orthoEdges = kam.getAdjacentEdges(edgeTarget); for (final KamEdge orthoEdge : orthoEdges) { if (orthoEdge != e) { adjacentEdges.add(orthoEdge.getId()); } } onodes.put(edgeTarget, edgeSource); continue; } speciesParam = findParameter(kam, kAMStore, edgeTarget, rlocs); if (speciesParam != null) { // target node matches target species Integer id = edgeTarget.getId(); Set<Integer> adjacentEdges = oedges.get(id); if (adjacentEdges == null) { adjacentEdges = new LinkedHashSet<Integer>(); oedges.put(id, adjacentEdges); } // collect adjacent edges (except this edge) for the // orthologous source node final Set<KamEdge> orthoEdges = kam.getAdjacentEdges(edgeSource); for (final KamEdge orthoEdge : orthoEdges) { if (orthoEdge != e) { adjacentEdges.add(orthoEdge.getId()); } } onodes.put(edgeSource, edgeTarget); } } } return onodes; } /** * {@inheritDoc} */ @Override public OrthologizedKam orthologize(Kam kam, KAMStore kAMStore, SpeciesDialect dialect) { Kam copy = copy(kam); Map<KamNode, KamNode> ortho = orthologousNodes(copy, kAMStore, dialect); EdgeFilter inferf = copy.createEdgeFilter(); final RelationshipTypeFilterCriteria c = new RelationshipTypeFilterCriteria(); c.getValues().addAll(Arrays.asList(INFERRED_ORTHOLOGIZED_EDGES)); inferf.add(c); final Collection<KamNode> speciesNodes = ortho.values(); final Set<KamNode> species = new LinkedHashSet<KamNode>( speciesNodes.size()); species.addAll(speciesNodes); replaceOrthologousEdges(copy, ortho); removeOrthologousNodes(copy, ortho); Pair<Map<Integer, TermParameter>, Map<Integer, TermParameter>> tpm = inferOrthologs(copy, kAMStore, dialect, inferf, species, ortho); Map<Integer, TermParameter> ntp = tpm.getFirst(); Map<Integer, TermParameter> etp = tpm.getSecond(); return new OrthologizedKam(copy, dialect, ntp, etp, ortho); } /** * Replace orthologous relationships by collapsing to the species * {@link KamNode node}. In particular: * <ul> * <li>Remove {@link RelationshipType#ORTHOLOGOUS} edges.</li> * <li>Redirect {@link KamNode ortholog node}'s edges to the * {@link KamNode species replacement node}. * </ul> * * @param kam {@link Kam} * @param ortho {@link Map} of K: {@link KamNode ortho node} and * V: {@link KamNode species target node} */ private static void replaceOrthologousEdges(Kam kam, Map<KamNode, KamNode> ortho) { final Collection<KamEdge> edges = kam.getEdges(); for (final KamEdge edge : edges) { if (ORTHOLOGOUS.equals(edge.getRelationshipType())) { // remove all orthologous edges kam.removeEdge(edge); } else { // redirect ortholog's relationships to species replacement final KamNode sn = edge.getSourceNode(); final KamNode tn = edge.getTargetNode(); // when edge's source node is the orthologous node // find the corresponding species KamNode species = ortho.get(sn); if (species != null) { // replace the edge's source as the species node KamNode speciesNode = kam.findNode(species.getId()); kam.removeEdge(edge); kam.createEdge(edge.getId(), speciesNode, edge.getRelationshipType(), tn); continue; } // when edge's target node is the orthologous node // find the corresponding species species = ortho.get(tn); if (species != null) { // replace the edge's target as the species node KamNode speciesNode = kam.findNode(species.getId()); kam.removeEdge(edge); kam.createEdge(edge.getId(), sn, edge.getRelationshipType(), speciesNode); continue; } } } } /** * Remove {@link KamNode orthologous nodes}. * * @param kam {@link Kam} * @param ortho {@link Map} of K: {@link Integer orthologous node id} and * V: {@link Integer species replacement node id} */ private static void removeOrthologousNodes(Kam kam, Map<KamNode, KamNode> ortho) { for (KamNode orthoNode : ortho.keySet()) kam.removeNode(orthoNode); } /** * Infers orthologous {@link KamEdge edges} downstream and upstream from * all {@link KamNode species replacement nodes}. * * @param kam {@link Kam} * @param inferf {@link EdgeFilter} * @param species {@link Set} of {@link Integer} species replacement node * ids */ private static Pair<Map<Integer, TermParameter>, Map<Integer, TermParameter>> inferOrthologs( Kam kam, KAMStore kAMStore, SpeciesDialect dialect, EdgeFilter inferf, Set<KamNode> species, Map<KamNode, KamNode> ortho) { final List<org.openbel.framework.common.model.Namespace> spl = dialect .getSpeciesNamespaces(); final Set<String> rlocs = constrainedHashSet(spl.size()); for (final org.openbel.framework.common.model.Namespace n : spl) { rlocs.add(n.getResourceLocation()); } Map<Integer, TermParameter> ntp = sizedHashMap(species.size()); Map<Integer, TermParameter> etp = sizedHashMap(species.size()); for (final KamNode snode : species) { if (snode != null) { // XXX term parameter looked up 2x; may impact perf/determinism // TODO redesign orthologousNodes / inferOrthologs TermParameter p = findParameter(kam, kAMStore, snode, rlocs); // recurse incoming connections from species node recurseConnections(kam, snode, p, inferf, REVERSE, ortho, ntp, etp); // recurse outgoing connections from species node recurseConnections(kam, snode, p, inferf, FORWARD, ortho, ntp, etp); } } return new Pair<Map<Integer, TermParameter>, Map<Integer, TermParameter>>( ntp, etp); } /** * Walks {@link OrthologizedKam#RELS certain relationship types} and infers * orthologous edges based on matching relationships. * * <p> * For instance if there are two {@code transcribedTo} edges from an * orthologized {@code geneAbundance} then we infer that the downstream * {@code rnaAbundance}s are also orthologous and collapse to the first * one. * </p> * * @param kam {@link Kam} * @param snode {@link KamNode} species node to walk from * @param param {@link TermParameter} for orthologous species node * @param direction {@link EdgeDirectionType} direction to walk * @param ortho {@link Map} of orthologous node to species node * @param ntp {@link Map} of node id to {@link TermParameter} * @param etp {@link Map} of edge id to {@link TermParameter} */ private static void recurseConnections(Kam kam, KamNode snode, TermParameter param, EdgeFilter inferf, EdgeDirectionType direction, Map<KamNode, KamNode> ortho, Map<Integer, TermParameter> ntp, Map<Integer, TermParameter> etp) { // get adjacent edges that can be inferred final Set<KamEdge> out = kam.getAdjacentEdges(snode, direction, inferf); // map ACTS_IN edges by activity function final Map<FunctionEnum, KamNode> acts = new HashMap<FunctionEnum, KamNode>(); final Map<RelationshipType, KamNode> rels = new HashMap<RelationshipType, Kam.KamNode>(); for (final KamEdge e : out) { // get correct edge opposite node based on search direction final KamNode opnode = (direction == FORWARD ? e.getTargetNode() : e.getSourceNode()); // handle ACTS_IN edge independently since we care about similar // activity functions if (e.getRelationshipType() == ACTS_IN) { final FunctionEnum actfun = opnode.getFunctionType(); // lookup first seen node for activity function KamNode node = acts.get(actfun); // if not yet seen mark opposite node and edge as species collapse // target. continue to next edge. if (node == null) { acts.put(opnode.getFunctionType(), opnode); ntp.put(opnode.getId(), param); etp.put(e.getId(), param); continue; } kam.collapseNode(opnode, node); // hang on to collapsed node ortho.put(opnode, node); } else { // handle all other edges by relationship type final RelationshipType rel = e.getRelationshipType(); // lookup first seen relationship type KamNode node = rels.get(rel); // if not yet seen mark opposite node and edge as species collapse // target. continue to next edge. if (node == null) { rels.put(rel, opnode); ntp.put(opnode.getId(), param); etp.put(e.getId(), param); continue; } // hang on to collapsed node ortho.put(opnode, node); kam.collapseNode(opnode, node); } } // recursively process all collapsed nodes Collection<KamNode> actn = acts.values(); Collection<KamNode> reln = rels.values(); final Set<KamNode> nodes = constrainedHashSet(actn.size() + reln.size()); nodes.addAll(actn); nodes.addAll(reln); for (final KamNode n : nodes) { recurseConnections(kam, n, param, inferf, direction, ortho, ntp, etp); } } /** * Matches {@link BelTerm terms} for a {@link KamNode node} against a * species-specific {@link Namespace}. * * @param node {@link KamNode} kam node to match * @param rlocs {@link Set} of {@link String resource location}, matches * against nodes to find orthologous targets * @return first species-matching {@link TermParameter term parameter} or * {@code null} if one could not be found * @throws InvalidArgument Thrown if {@code node} is null while retrieving * supporting terms * @throws KAMStoreException Thrown if the {@link Kam kam} could not be * determined while retrieving supporting terms or parameters */ private static TermParameter findParameter(final Kam kam, final KAMStore kAMStore, final KamNode node, final Set<String> rlocs) throws KAMStoreException { // no resource locations to match against, no match if (noItems(rlocs)) { return null; } final List<BelTerm> terms = kAMStore.getSupportingTerms(node); for (final BelTerm term : terms) { final List<TermParameter> params = kAMStore.getTermParameters( kam.getKamInfo(), term); for (final TermParameter p : params) { // skip empty namespace, continue final Namespace ns = p.getNamespace(); if (ns == null) { continue; } // if parameter namespace in rlocs, match if (rlocs.contains(ns.getResourceLocation())) { return p; } } } return null; } }