/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.metadata; import act.server.NoSQLAPI; import act.shared.Reaction; import org.apache.commons.lang3.tuple.Pair; import org.json.JSONObject; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; public class ProteinMetadataComparator implements Comparator { //The ranking is contextualized on a host private Host host; //The ranking is contextualized on a location within that host private Localization localization; public ProteinMetadataComparator(Host host, Localization localization) { this.host = host; this.localization = localization; } public Host getHost() { return host; } public void setHost(Host host) { this.host = host; } public Localization getLocalization() { return localization; } public void setLocalization(Localization localization) { this.localization = localization; } @Override public int compare(Object o1, Object o2) { ProteinMetadata p1 = (ProteinMetadata) o1; ProteinMetadata p2 = (ProteinMetadata) o2; int score1 = score(p1); int score2 = score(p2); return score2 - score1; } public int score(ProteinMetadata pmd) { double out = 0.0; // Score enzyme efficiency, will result in picking the highest values as // the dominant consideration, biased on kcatkm if(pmd.kcatkm != null) { out += (Math.log(pmd.kcatkm)) * 20; } if(pmd.specificActivity != null) { out += (Math.log(pmd.specificActivity)) * 20; } //Score modifications if(pmd.modifications == null) { //No prediction no change } else if(pmd.modifications == true) { out += -50; } else { out += 30; } //Score subunits if(pmd.heteroSubunits == null) { //No prediction no change } else if(pmd.heteroSubunits == true) { //If needs multiple subunits, this is potentially problematic out += -10; } else { //Great if there is a clear indication that there are no subunits out += 30; } //Score cloned if(pmd.cloned != null) { Integer cloned = pmd.cloned.get(host); if (cloned == null) { //No prediction no change } else { //Will be positive or negative, scales with organism similarity up to 140 (or -140) out += cloned * 20; } } //Score localization if(pmd.localization != null) { Localization prediction = pmd.localization.get(host); if (prediction == Localization.unknown) { //No prediction no change } else if (prediction != Localization.questionable) { out += -20; //Small penalty for ambiguity about the location } else if (prediction != localization) { out += -30; //larger penalty if the prediction is not where you want it } else if (prediction == localization) { out += 20; //Otherwise a small bonus if things match up } else { System.err.println("This should never happen - localization"); } } double round = Math.round(out); return (int) round; } public static void main(String[] args) throws Exception { // THIS main appears to be a debugging call. Does not seem to be used anywhere String INDB = "SHOULD_COME_FROM_CMDLINE"; // "jarvis_2016-12-09"; String OUTDB = "SHOULD_COME_FROM_CMDLINE"; // was collection reactions createProteinMetadataTable(INDB, OUTDB); } public static Map<Long, List<Pair<ProteinMetadata, Integer>>> createProteinMetadataTable(String sourceDB, String destDB) throws Exception { ProteinMetadataComparator comp = new ProteinMetadataComparator(Host.Ecoli, Localization.cytoplasm); NoSQLAPI api = new NoSQLAPI(sourceDB, destDB); Iterator<Reaction> iterator = api.readRxnsFromInKnowledgeGraph(); //Create a single instance of the factory method to use for all json ProteinMetadataFactory factory = ProteinMetadataFactory.initiate(); //Create a list to aggregate the results of the database scan List<ProteinMetadata> agg = new ArrayList<>(); //Scan the database and store ProteinMetadata objects while (iterator.hasNext()) { Reaction rxn = iterator.next(); Reaction.RxnDataSource source = rxn.getDataSource(); if (!source.equals(Reaction.RxnDataSource.BRENDA)) { continue; } Set<JSONObject> jsons = rxn.getProteinData(); for (JSONObject json : jsons) { ProteinMetadata meta = factory.create(json); Long rxnId; if (rxn.getUUID() < 0) { rxnId = (long) Reaction.reverseID(rxn.getUUID()); } else { rxnId = (long) rxn.getUUID(); } meta.setReactionId(rxnId); agg.add(meta); } } System.out.println("All Metadata's parsed: " + agg.size()); //For each protein metadata, gather up ones that have a non-zero score into a new list List<ProteinMetadata> agg2 = new ArrayList<>(); Map<Long, List<Pair<ProteinMetadata, Integer>>> reactionIdToScore = new HashMap<>(); for(ProteinMetadata pmd : agg) { //Consider if it is invalid (meaning a really crappy enzyme) and if so ignore it if(!pmd.isValid(Host.Ecoli)) { continue; } //Score the protein int score = comp.score(pmd); if (!reactionIdToScore.containsKey(pmd.reactionId)) { reactionIdToScore.put(pmd.reactionId, new ArrayList<>()); } reactionIdToScore.get(pmd.reactionId).add(Pair.of(pmd, score)); if(score > 0) { agg2.add(pmd); } } return reactionIdToScore; } }