/** * Copyright (C) 2012-2013 Selventa, Inc. * * This file is part of the OpenBEL Framework. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The OpenBEL Framework is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the OpenBEL Framework. If not, see <http://www.gnu.org/licenses/>. * * Additional Terms under LGPL v3: * * This license does not authorize you and you are prohibited from using the * name, trademarks, service marks, logos or similar indicia of Selventa, Inc., * or, in the discretion of other licensors or authors of the program, the * name, trademarks, service marks, logos or similar indicia of such authors or * licensors, in any marketing or advertising materials relating to your * distribution of the program or any covered product. This restriction does * not waive or limit your obligation to keep intact all copyright notices set * forth in the program as delivered to you. * * If you distribute the program in whole or in part, or any modified version * of the program, and you assume contractual liability to the recipient with * respect to the program or modified version, then you will indemnify the * authors and licensors of the program for any liabilities that these * contractual assumptions directly impose on those licensors and authors. */ package org.openbel.framework.compiler; import static java.util.Collections.emptySet; import static org.openbel.framework.common.BELUtilities.constrainedHashSet; import static org.openbel.framework.common.BELUtilities.hasItems; import static org.openbel.framework.common.BELUtilities.noItems; import static org.openbel.framework.common.BELUtilities.sizedArrayList; import static org.openbel.framework.common.BELUtilities.sizedHashMap; import static org.openbel.framework.common.enums.FunctionEnum.PROTEIN_ABUNDANCE; import static org.openbel.framework.common.enums.RelationshipType.IS_A; import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.NAMESPACE_INDEX; import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.PARAM_INDEX; import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.TERM_INDEX; import static org.openbel.framework.common.protonetwork.model.TermTable.PARAMETER_SUBSTITUTION; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.openbel.framework.common.enums.FunctionEnum; import org.openbel.framework.common.enums.RelationshipType; import org.openbel.framework.common.lang.ComplexAbundance; import org.openbel.framework.common.lang.ProteinAbundance; import org.openbel.framework.common.model.DataFileIndex; import org.openbel.framework.common.model.Document; import org.openbel.framework.common.model.EquivalenceDataIndex; import org.openbel.framework.common.model.Namespace; import org.openbel.framework.common.model.Parameter; import org.openbel.framework.common.model.Statement; import org.openbel.framework.common.model.Statement.Object; import org.openbel.framework.common.model.StatementGroup; import org.openbel.framework.common.model.Term; import org.openbel.framework.common.protonetwork.model.NamespaceTable; import org.openbel.framework.common.protonetwork.model.NamespaceTable.TableNamespace; import org.openbel.framework.common.protonetwork.model.ParameterTable; import org.openbel.framework.common.protonetwork.model.ParameterTable.TableParameter; import org.openbel.framework.common.protonetwork.model.ProtoNetwork; import org.openbel.framework.common.protonetwork.model.ProtoNetworkError; import org.openbel.framework.common.protonetwork.model.SkinnyUUID; import org.openbel.framework.common.protonetwork.model.TermParameterMapTable; import org.openbel.framework.common.protonetwork.model.TermTable; import org.openbel.framework.core.equivalence.EquivalenceMapResolutionFailure; import org.openbel.framework.core.indexer.JDBMEquivalenceLookup; import org.openbel.framework.core.protonetwork.ProtoNetworkDescriptor; import org.openbel.framework.core.protonetwork.ProtoNetworkService; /** * BEL compiler phase three implementation. */ public class PhaseThreeImpl implements DefaultPhaseThree { private final ProtoNetworkService protoNetworkService; private final DefaultPhaseTwo p2; private final static String PF_SHORT; private final static String PF_LONG; private final static String NC_SHORT; private final static String NC_LONG; static { String suffix = "(" + PARAMETER_SUBSTITUTION + ")"; // proteinAbundance(#) - the p() form w/in the proto-network PF_SHORT = ProteinAbundance.ABBREVIATION.concat(suffix); PF_LONG = ProteinAbundance.NAME.concat(suffix); // complexAbundance(#) - the complex() form w/in the proto-network NC_SHORT = ComplexAbundance.ABBREVIATION.concat(suffix); NC_LONG = ComplexAbundance.NAME.concat(suffix); } /** * Creates the phase three implementation with the associated * {@link ProtoNetworkService}. * * @param protoNetSvc Proto-network service * @param p2 Selventa phase two */ public PhaseThreeImpl(final ProtoNetworkService protoNetSvc, final DefaultPhaseTwo p2) { this.protoNetworkService = protoNetSvc; this.p2 = p2; } /** * {@inheritDoc} */ @Override public DocumentModificationResult pruneFamilies(boolean expand, final Document families, final ProtoNetwork network) { DocumentModificationResult pr = new DocumentModificationResult(); // Search for all p(#) parameters within the network. Set<TableEntry> networkLiterals = search(network, PF_SHORT, PF_LONG); // Load the equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure f) { // Unrecoverable error pr.addError(f.getUserFacingMessage()); pr.setSuccess(false); return pr; } // Map namespace to lookup Map<String, JDBMEquivalenceLookup> lookups = sizedHashMap(equivs.size()); for (final EquivalenceDataIndex edi : equivs) { String rl = edi.getNamespaceResourceLocation(); DataFileIndex dfi = edi.getEquivalenceIndex(); lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath())); } // Open the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.open(); } catch (IOException e) { pr.addError(e.getMessage()); pr.setSuccess(false); return pr; } } // It's helpful to reference the contents of the protein families // BEL document in understanding this section. // Establish set of UUIDs relevant for the document's member parameters // (this only matters if the expand flag is set) Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>(); final List<Statement> familyStmts = families.getAllStatements(); if (expand) { uuids = findUUIDs(network, lookups, familyStmts); } boolean haveUUIDs = !uuids.isEmpty(); int pruned = 0, total = familyStmts.size(); List<Parameter> parameters; // Pruning is simply a matter of iterating all statements in families... FAMILIES: for (final Statement stmt : familyStmts) { // ... all protein family parameters parameters = stmt.getSubject().getAllParameters(); for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); // ... do we have the family? if (networkLiterals.contains(new TableEntry(val, rl))) { continue FAMILIES; } } Object stmtObj = stmt.getObject(); if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) { // ... all family members parameters = stmtObj.getTerm().getAllParameters(); for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); // ... do we have set membership? JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup != null) { SkinnyUUID uuid = jdbmLookup.lookup(val); // Check for set membership, uuid ∊ uuids if (uuids.contains(uuid)) { // This indicates equivalent parameters are // used by both document and network. This // statement will remain (i.e., not be pruned). continue FAMILIES; } } } } // Map statement group to statement set Map<StatementGroup, Set<Statement>> groupMap = families.mapStatements(); // Failed to match both either family or family member. // Prune the statement. Set<Entry<StatementGroup, Set<Statement>>> entries = groupMap.entrySet(); for (final Entry<StatementGroup, Set<Statement>> entry : entries) { StatementGroup group = entry.getKey(); Set<Statement> stmts = entry.getValue(); if (stmts.contains(stmt)) { group.getStatements().remove(stmt); pruned++; break; } } } // represent pruned statements as negative pr.setDeltaStatements(-(pruned)); pr.setTotalStatements(total); pr.setSuccess(true); for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.close(); } catch (IOException e) { pr.addWarning(e.getMessage()); } } return pr; } /** * {@inheritDoc} */ @Override public DocumentModificationResult inferFamilies(Document d, ProtoNetwork p) { final DocumentModificationResult pr = new DocumentModificationResult(); // Load the equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure f) { // Unrecoverable error pr.addError(f.getUserFacingMessage()); pr.setSuccess(false); return pr; } // Map namespace to lookup Map<String, JDBMEquivalenceLookup> lookups = sizedHashMap(equivs.size()); for (final EquivalenceDataIndex edi : equivs) { String rl = edi.getNamespaceResourceLocation(); DataFileIndex dfi = edi.getEquivalenceIndex(); lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath())); } // Open the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.open(); } catch (IOException e) { pr.addError(e.getMessage()); pr.setSuccess(false); return pr; } } // get all statements for protein family document final List<Statement> stmts = d.getAllStatements(); // the number of inferred statements int inferred = 0; // find molecular activity functions referencing protein family param TableEntry entry; // iterate all activity functions of a proteinAbundance final List<Statement> isaStmts = new ArrayList<Statement>(); final Set<FunctionEnum> actFunctions = FunctionEnum.getActivities(); for (final FunctionEnum f : actFunctions) { final String actShort = f.getShortestForm().concat("(") .concat(PF_SHORT).concat(")"); final String actLong = f.getDisplayValue().concat("(") .concat(PF_LONG).concat(")"); // find parameters for this activity function in the proto network final Set<TableEntry> entries = search(p, actShort, actLong); // if no results for activity function, continue to next one if (entries.isEmpty()) { continue; } Set<SkinnyUUID> eu = null; // entries found, iterate protein family statements for (final Statement stmt : stmts) { // inspect protein family parameter final List<Parameter> sp = stmt.getSubject().getAllParameters(); for (final Parameter fp : sp) { if (!validParameter(fp)) { continue; } Namespace ns = fp.getNamespace(); String val = fp.getValue(); String rl = ns.getResourceLocation(); // have we seen a molecular activity function for this // protein family parameter entry = new TableEntry(val, rl); if (entries.contains(entry)) { // construct family term once, in case we find matches final Term fpt = new Term(PROTEIN_ABUNDANCE); fpt.addFunctionArgument(new Parameter(ns, val)); final Term fat = new Term(f); fat.addFunctionArgument(fpt); // since we found a molecular activity function for a // protein family look up UUIDs for all entries // ... loading uuids on demand to save resources if (eu == null) { eu = findUUIDs(entries, lookups); } // search protein family member parameters for same // molecular activity function final List<Parameter> op = stmt.getObject().getTerm() .getAllParameters(); for (final Parameter mp : op) { Namespace mns = mp.getNamespace(); String mval = mp.getValue(); String mrl = mns.getResourceLocation(); // lookup UUID for protein family member JDBMEquivalenceLookup jdbmLookup = lookups.get(mrl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(mval); // do we have protein family member parameter uuid // in molecular activity function entries? if (eu.contains(uuid)) { // construct member term final Term mpt = new Term(PROTEIN_ABUNDANCE); mpt.addFunctionArgument(new Parameter(mns, mval)); final Term mat = new Term(f); mat.addFunctionArgument(mpt); // construct isA statement between member and family isaStmts.add(new Statement(mat, null, null, new Object(fat), IS_A)); inferred++; } } } } } } // add isA relationships to protein family document, if necessary if (!isaStmts.isEmpty()) { final StatementGroup isaStmtGroup = new StatementGroup(); isaStmtGroup.setStatements(isaStmts); d.addStatementGroup(isaStmtGroup); } pr.setDeltaStatements(inferred); pr.setTotalStatements(d.getStatementCount()); pr.setSuccess(true); // Close the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.close(); } catch (IOException e) { pr.addWarning(e.getMessage()); } } return pr; } /** * {@inheritDoc} */ @Override public DocumentModificationResult pruneComplexes(boolean expand, final Document complexes, final ProtoNetwork network) { DocumentModificationResult pr = new DocumentModificationResult(); // Search for all complex(#) parameters within the network. Set<TableEntry> networkLiterals = search(network, NC_SHORT, NC_LONG); // Load the equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure f) { // Unrecoverable error pr.addError(f.getUserFacingMessage()); pr.setSuccess(false); return pr; } // Map namespace to lookup Map<String, JDBMEquivalenceLookup> lookups = sizedHashMap(equivs.size()); for (final EquivalenceDataIndex edi : equivs) { String rl = edi.getNamespaceResourceLocation(); DataFileIndex dfi = edi.getEquivalenceIndex(); lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath())); } // Open the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.open(); } catch (IOException e) { pr.addError(e.getMessage()); pr.setSuccess(false); return pr; } } // It's helpful to reference the contents of the protein families // BEL document in understanding this section. // Establish set of UUIDs relevant for the document's member parameters // (this only matters if the expand flag is set) Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>(); final List<Statement> complexStmts = complexes.getAllStatements(); if (expand) { uuids = findUUIDs(network, lookups, complexStmts); } boolean haveUUIDs = !uuids.isEmpty(); int pruned = 0, total = complexStmts.size(); List<Parameter> parameters; // Pruning is simply a matter of iterating all statements in document... COMPLEXES: for (final Statement stmt : complexStmts) { // ... all named complex parameters parameters = stmt.getSubject().getAllParameters(); for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); // ... do we have the complex? if (networkLiterals.contains(new TableEntry(val, rl))) { continue COMPLEXES; } } Object stmtObj = stmt.getObject(); if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) { // ... all complex components parameters = stmtObj.getTerm().getAllParameters(); for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); // ... do we have set membership? JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup != null) { SkinnyUUID uuid = jdbmLookup.lookup(val); // Check for set membership, uuid ∊ uuids if (uuids.contains(uuid)) { // This indicates equivalent parameters are being // used by both document and network. This // statement will remain (i.e., not be pruned). continue COMPLEXES; } } } } // Map statement group to statement set Map<StatementGroup, Set<Statement>> groupMap = complexes.mapStatements(); // Failed to match both either complex or component. // Prune the statement. Set<Entry<StatementGroup, Set<Statement>>> entries = groupMap.entrySet(); for (final Entry<StatementGroup, Set<Statement>> entry : entries) { StatementGroup group = entry.getKey(); Set<Statement> stmts = entry.getValue(); if (entry.getValue().contains(stmt)) { stmts.remove(stmt); group.getStatements().remove(stmt); pruned++; break; } } } pr.setDeltaStatements(-(pruned)); pr.setTotalStatements(total); pr.setSuccess(true); for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.close(); } catch (IOException e) { pr.addWarning(e.getMessage()); } } return pr; } /** * {@inheritDoc} */ @Override public DocumentModificationResult pruneGene(final Document genes, final ProtoNetwork network) { DocumentModificationResult pr = new DocumentModificationResult(); // Load the equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure f) { // Unrecoverable error pr.addError(f.getUserFacingMessage()); pr.setSuccess(false); return pr; } // Map namespace to lookup Map<String, JDBMEquivalenceLookup> lookups = sizedHashMap(equivs.size()); for (final EquivalenceDataIndex edi : equivs) { String rl = edi.getNamespaceResourceLocation(); DataFileIndex dfi = edi.getEquivalenceIndex(); lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath())); } // Open the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.open(); } catch (IOException e) { pr.addError(e.getMessage()); pr.setSuccess(false); return pr; } } // It's helpful to reference the contents of the gene scaffolding // BEL document in understanding this section. // Establish set of UUIDs relevant for the document's member parameters Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>(); pr.setTotalStatements(genes.getNumberOfStatements()); for (final Statement stmt : genes) { // Add all the parameters final List<Parameter> parameters = stmt.getAllParameters(); for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(val); if (uuid != null) { uuids.add(uuid); } } } // Establish set of UUIDs relevant for the proto-network Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>(); ParameterTable paramTbl = network.getParameterTable(); TableParameter[] paramArr = paramTbl.getTableParameterArray(); for (final TableParameter tp : paramArr) { if (!validParameter(tp)) { continue; } TableNamespace namespace = tp.getNamespace(); String value = tp.getValue(); String rl = namespace.getResourceLocation(); JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(value); if (uuid != null) { pnUUIDs.add(uuid); } } // Two sets of UUIDs have been established at this point. // The first set, uuids, is the set based on the genes. // The second set, pnUUIDs, is the set based on the proto-network. // Make uuids the intersection of the two: (uuids ∩ pnUUIDs) uuids.retainAll(pnUUIDs); final Set<Statement> toPrune = new HashSet<Statement>(); int pruned = 0; // Pruning is simply a matter of iterating all statements in genes... GENES: for (final Statement stmt : genes) { List<Parameter> parameters = stmt.getAllParameters(); // ... and all parameters in each statement... for (final Parameter p : parameters) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); // ... do we have set membership? JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(val); // Check for set membership, uuid ∊ uuids if (uuids.contains(uuid)) { // This indicates equivalent parameters are being // used by both document and network. This // statement will remain (i.e., not be pruned). continue GENES; } } toPrune.add(stmt); } for (final StatementGroup sg : genes.getStatementGroups()) { List<Statement> curstmts = sg.getStatements(); List<Statement> newstmts = sizedArrayList(curstmts.size()); for (final Statement stmt : curstmts) { if (!toPrune.contains(stmt)) { newstmts.add(stmt); } else { pruned++; } } sg.setStatements(newstmts); } pr.setDeltaStatements(-(pruned)); pr.setSuccess(true); for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.close(); } catch (IOException e) { pr.addWarning(e.getMessage()); } } return pr; } /** * Finds the {@link SkinnyUUID} for every {@link Parameter parameter} in * the {@link ProtoNetwork proto network} that is also referenced in the * {@link Term object term} of each {@link Statement statement}. * * @param network {@link ProtoNetwork}, the proto network containing * {@link Parameter parameters} that we'll get {@link String uuids} for * @param lookups {@link Map} of {@link JDBMEquivalenceLookup} keyed by * resource location and assumed to be open / ready for lookups * @param stmts {@link List} of {@link Statement}, the statements * that contain object-term {@link Parameter parameters} that we'll get * {@link SkinnyUUID}s for * @return the {@link SkinnyUUID} intersection between the statement's * object term {@link Parameter parameters} and the proto network's * {@link Parameter parameters} */ private Set<SkinnyUUID> findUUIDs(final ProtoNetwork network, Map<String, JDBMEquivalenceLookup> lookups, final List<Statement> stmts) { final Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>(); for (final Statement stmt : stmts) { // Add all the object's parameters Object stmtObj = stmt.getObject(); if (stmtObj == null || stmtObj.getTerm() == null) { continue; } for (final Parameter p : stmtObj.getTerm()) { if (!validParameter(p)) { continue; } Namespace ns = p.getNamespace(); String val = p.getValue(); String rl = ns.getResourceLocation(); JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(val); if (uuid != null) { uuids.add(uuid); } } } // Establish set of UUIDs relevant for the proto-network Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>(); ParameterTable paramTbl = network.getParameterTable(); TableParameter[] paramArr = paramTbl.getTableParameterArray(); for (final TableParameter tp : paramArr) { if (!validParameter(tp)) { continue; } TableNamespace namespace = tp.getNamespace(); String value = tp.getValue(); String rl = namespace.getResourceLocation(); JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(value); if (uuid != null) { pnUUIDs.add(uuid); } } // Two sets of UUIDs have been established at this point. // The first set, uuids, is the set based on the statements. // The second set, pnUUIDs, is the set based on the proto-network. // Make uuids the intersection of the two: (uuids ∩ pnUUIDs) uuids.retainAll(pnUUIDs); return uuids; } private Set<SkinnyUUID> findUUIDs(final Set<TableEntry> entries, Map<String, JDBMEquivalenceLookup> lookups) { final Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>(); for (final TableEntry e : entries) { String val = e.value; String rl = e.resourceLocation; JDBMEquivalenceLookup jdbmLookup = lookups.get(rl); if (jdbmLookup == null) { continue; } SkinnyUUID uuid = jdbmLookup.lookup(val); if (uuid != null) { uuids.add(uuid); } } return uuids; } /** * Aggregate the set of all table entries for a proto-network that match at * least one of the provided term literals. * * @param literals Literals, e.g., {@code proteinAbundance(#)} * @return Table entry set */ private Set<TableEntry> search(final ProtoNetwork pn, String... literals) { Set<TableEntry> ret; if (noItems(literals)) { return emptySet(); } // An initial capacity would be ideal for this set, but we don't // have any reasonable estimations. ret = new HashSet<TableEntry>(); // Establish access to the relevant table objects final ParameterTable paramTbl = pn.getParameterTable(); final TermTable termTbl = pn.getTermTable(); final List<String> terms = termTbl.getTermValues(); final NamespaceTable nsTbl = pn.getNamespaceTable(); final int[][] indata = pn.getTermIndices(); final Set<String> literalSet = constrainedHashSet(literals.length); for (final String s : literals) { literalSet.add(s); } // Search indata for elements of the term literal set for (int i = 0; i < indata.length; i++) { int tid = indata[i][TERM_INDEX]; String string = terms.get(tid); if (!literalSet.contains(string)) { continue; } int pid = indata[i][PARAM_INDEX]; final TableParameter tp = paramTbl.getTableParameter(pid); final String inval = tp.getValue(); int nid = indata[i][NAMESPACE_INDEX]; final TableNamespace tns = nsTbl.getTableNamespace(nid); String inrl = null; if (tns != null) inrl = tns.getResourceLocation(); final TableEntry te = new TableEntry(inval, inrl); ret.add(te); } return ret; } /** * {@inheritDoc} */ @Override public DocumentModificationResult pruneOrthologyDocument(final Document d, final ProtoNetwork pn) { DocumentModificationResult result = new DocumentModificationResult(); // Load the equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure f) { // Unrecoverable error result.addError(f.getUserFacingMessage()); result.setSuccess(false); return result; } // Map namespace to lookup Map<String, JDBMEquivalenceLookup> lookups = sizedHashMap(equivs.size()); for (final EquivalenceDataIndex edi : equivs) { String rl = edi.getNamespaceResourceLocation(); DataFileIndex dfi = edi.getEquivalenceIndex(); lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath())); } // Open the indices for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.open(); } catch (IOException e) { result.addError(e.getMessage()); result.setSuccess(false); return result; } } // Approach // 1. map UUID to list of terms for proto network // 2. iterate ortho statements // a. if valid orthologous statement // - lookup exact match of subject term in proto network, if found // continue (orthologous statement intersects the proto network) // - find subject parameter's uuid in a proto network term, if found // continue (orthologous statement intersects the proto network) // - lookup exact match of object term in proto network, if found // continue (orthologous statement intersects the proto network) // - find object parameter's uuid in a proto network term, if found // continue (orthologous statement intersects the proto network) // 1. Map parameter UUIDs to containing term ids in the proto network final TermTable tt = pn.getTermTable(); final TermParameterMapTable tpmt = pn.getTermParameterMapTable(); final ParameterTable pt = pn.getParameterTable(); final Map<Integer, Integer> pglob = pt.getGlobalIndex(); final Map<Integer, SkinnyUUID> puuid = pt.getGlobalUUIDs(); final Set<Integer> tidset = tt.getIndexedTerms().keySet(); final Map<SkinnyUUID, Set<Integer>> uuidterms = sizedHashMap(puuid .size()); final Set<Term> pnterms = new HashSet<Term>(tt.getVisitedTerms().keySet()); // for each term for (final Integer tid : tidset) { // get its parameters final List<Integer> pids = tpmt.getParameterIndexes(tid); for (final Integer pid : pids) { // find global parameter index for pid Integer globalpid = pglob.get(pid); // find UUID for global parameter index final SkinnyUUID pu = puuid.get(globalpid); // save this term to this UUID Set<Integer> terms = uuidterms.get(pu); if (terms == null) { terms = new HashSet<Integer>(); uuidterms.put(pu, terms); } terms.add(tid); } } // get all statement in orthology document final List<Statement> orthoStmts = d.getAllStatements(); // map them to statement groups for efficient pruning Map<StatementGroup, Set<Statement>> orthomap = d.mapStatements(); // set up pruning result int total = orthoStmts.size(); int pruned = 0; // establish cache to skinny uuids to avoid superfluous jdbm lookups final Map<Parameter, SkinnyUUID> paramcache = sizedHashMap(total * 2); // iterate all statements in the orthology document ORTHO_STATEMENT: for (final Statement orthoStmt : orthoStmts) { // rule out invalid or non-orthologous statements if (validOrthologousStatement(orthoStmt)) { // break down subject final Term sub = orthoStmt.getSubject(); final FunctionEnum subf = sub.getFunctionEnum(); final List<Parameter> subp = sub.getParameters(); final Parameter subjectParam = subp.get(0); // break down object final Term obj = orthoStmt.getObject().getTerm(); final FunctionEnum objf = obj.getFunctionEnum(); final List<Parameter> objp = obj.getParameters(); final Parameter objectParam = objp.get(0); // lookup exact match of subject term if (pnterms.contains(sub)) { pnterms.add(obj); continue; } // find UUID for subject parameter SkinnyUUID uuid = paramcache.get(subjectParam); if (uuid == null) { final Namespace ns = subjectParam.getNamespace(); final JDBMEquivalenceLookup lookup = lookups.get(ns .getResourceLocation()); if (lookup == null) { continue; } uuid = lookup.lookup(subjectParam.getValue()); paramcache.put(subjectParam, uuid); } // if there is a proto network term with this UUID contained, then // this orthologous statement intersects the proto network, continue if (uuid != null) { Set<Integer> tids = uuidterms.get(uuid); if (hasItems(tids)) { for (final Integer tid : tids) { final Term t = tt.getIndexedTerms().get(tid); if (t.getFunctionEnum() == subf) { pnterms.add(sub); pnterms.add(t); pnterms.add(obj); continue ORTHO_STATEMENT; } } } } // lookup exact match of object term if (pnterms.contains(obj)) { pnterms.add(sub); continue; } // find UUID for object parameter uuid = paramcache.get(objectParam); if (uuid == null) { final Namespace ns = objectParam.getNamespace(); final JDBMEquivalenceLookup lookup = lookups.get(ns .getResourceLocation()); if (lookup == null) { continue; } uuid = lookup.lookup(objectParam.getValue()); paramcache.put(objectParam, uuid); } // if there is a proto network term with this UUID contained, then // this orthologous statement intersects the proto network, continue if (uuid != null) { Set<Integer> tids = uuidterms.get(uuid); if (hasItems(tids)) { for (final Integer tid : tids) { final Term t = tt.getIndexedTerms().get(tid); if (t.getFunctionEnum() == objf) { pnterms.add(obj); pnterms.add(t); pnterms.add(sub); continue ORTHO_STATEMENT; } } } } // proto network does not contain either equivalent term so prune // from its parent statement group Set<Entry<StatementGroup, Set<Statement>>> entries = orthomap .entrySet(); for (final Entry<StatementGroup, Set<Statement>> e : entries) { StatementGroup group = e.getKey(); Set<Statement> stmts = e.getValue(); if (stmts.contains(orthoStmt)) { group.getStatements().remove(orthoStmt); pruned++; break; } } } } // close equivalences for (final JDBMEquivalenceLookup jl : lookups.values()) { try { jl.close(); } catch (IOException e) { result.addWarning(e.getMessage()); } } // set result data result.setDeltaStatements(-pruned); result.setTotalStatements(total); result.setSuccess(true); return result; } /** * Determines whether the {@link Statement statement} is a valid * {@link RelationshipType#ORTHOLOGOUS orthologous} statement. * * @param stmt {@link Statement} to evaluate * @return {@code true} if it is valid, {@code false} otherwise */ private boolean validOrthologousStatement(final Statement stmt) { // test statement is well-formed if (stmt.getRelationshipType() == RelationshipType.ORTHOLOGOUS && stmt.getObject() != null && stmt.getObject().getTerm() != null) { // test subject has only one namespace parameter final Term subject = stmt.getSubject(); List<Parameter> subparams = subject.getParameters(); if (subparams == null || subparams.size() != 1 || subparams.get(0).getNamespace() == null) { return false; } // test object has only one namespace parameter final Term object = stmt.getObject().getTerm(); List<Parameter> objparams = object.getParameters(); if (objparams == null || objparams.size() != 1 || objparams.get(0).getNamespace() == null) { return false; } return true; } return false; } /** * {@inheritDoc} */ @Override public ProtoNetwork compile(final Document d) { return protoNetworkService.compile(d); } /** * {@inheritDoc} */ @Override public void merge( final ProtoNetwork pnDestination, final ProtoNetwork pndSource) throws ProtoNetworkError { protoNetworkService.merge(pnDestination, pndSource); } /** * {@inheritDoc} */ @Override public ProtoNetworkDescriptor write(final String path, final ProtoNetwork network) throws ProtoNetworkError { return protoNetworkService.write(path, network); } /** * Returns true if the parameter has a namespace and value, false if not. * * @param p Parameter */ private static boolean validParameter(final Parameter p) { if (p.getNamespace() == null) { return false; } if (p.getValue() == null) { return false; } return true; } /** * Returns true if the table parameter has a namespace and value, false if * not. * * @param tp Table parameter */ private static boolean validParameter(final TableParameter tp) { if (tp.getNamespace() == null) { return false; } if (tp.getValue() == null) { return false; } return true; } /** * Table entry. */ private static class TableEntry { /** Parameter value. */ private final String value; /** Namespace containing this parameter value. */ private final String resourceLocation; private final int hash; /** * Creates a table entry; establishes a hash code. * * @param val String value * @param rl String resource location */ private TableEntry(final String val, final String rl) { this.value = val; this.resourceLocation = rl; int prime = 31; int result = prime; result *= prime; result += value.hashCode(); if (rl != null) { result *= prime; result += rl.hashCode(); } hash = result; } /** * {@inheritDoc} */ @Override public int hashCode() { return hash; } /** * {@inheritDoc} */ @Override public boolean equals(final java.lang.Object o) { if (this == o) { return true; } if (!(o instanceof TableEntry)) { return false; } final TableEntry te = (TableEntry) o; if (!value.equals(te.value)) { return false; } if (resourceLocation == null) { if (te.resourceLocation != null) { return false; } } else if (!resourceLocation.equals(te.resourceLocation)) { return false; } return true; } } }