/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.reachables;
import act.shared.Chemical;
import act.shared.helpers.P;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
public class ConditionalReachable extends OutdatedWavefrontExpansion {
boolean conditionalReachPhase;
Set<Long> R_saved;
HashMap<Long, List<Long>> rxn_needs_saved;
Set<Long> unR_saved;
HashMap<Long, List<EnvCond>> reachableUnder;
HashMap<EnvCond, Integer> extraReached;
List<P<EnvCond, Integer>> extraReachedSortedBySize;
List<EnvCond> guesses;
int size_guesses;
int partial; // >0 indicates do only those partial many, 0 indicates not partial
boolean debug = false;
public ConditionalReachable(int partial) {
super(); // will get me R, and substrate preconditions of rxns (rxn_needs)
this.conditionalReachPhase = false; // first do normal reachability, then conditional
this.extraReached = new HashMap<EnvCond, Integer>(); // the number of nodeMapping reached if this node is enabled
this.reachableUnder = new HashMap<Long, List<EnvCond>>();
this.guesses = null;
this.size_guesses = -1;
this.partial = partial;
}
@Override
public double percentDone() {
if (!conditionalReachPhase) {
if (anyEnabledReactions(null))
return 0; // if any rxns are enabled then we are still in normal phase
else {
conditionalReachPhase = true;
this.guesses = getEnvironmentConditionTuples();
// picking which partial set to lookup is pretty arbitrary, the
// getEnvCondTuples function above returns them in ascending order
// of their immediate fanout. But that does not mean those preconditions
// will eventually lead to a large subtree. So we just pick the first-partial num.
if (partial > 0)
this.guesses = this.guesses.subList(0, partial);
// save the current state by doing a deep copy
saveState();
return 50; // normal reachability done, move onto conditional reachability phase
}
} else {
if (debug) logProgress("At " + this.guesses.size() + "/" + this.size_guesses);
return 100 - 50 * ((double) this.guesses.size() / this.size_guesses);
}
}
private List<EnvCond> getEnvironmentConditionTuples() {
// read all reactions in rxn_needs, check their "needs" and create a speculation tuple
// out of those needs. Ensure that you keep a count of the number of times the tuple is
// seen. Then output the sorted list.
HashMap<EnvCond, Integer> counts = new HashMap<EnvCond, Integer>();
for (Long r : super.rxn_needs.keySet()) {
EnvCond tuple = new EnvCond(super.rxn_needs.get(r));
if (counts.containsKey(tuple))
counts.put(tuple, counts.get(tuple) + 1);
else
counts.put(tuple, 1);
}
return sortByCounts(counts);
}
private List<EnvCond> sortByCounts(HashMap<EnvCond, Integer> counts) {
List<P<EnvCond, Integer>> sc = new ArrayList<P<EnvCond, Integer>>();
for (EnvCond se : counts.keySet())
sc.add(new P<EnvCond, Integer>(se, counts.get(se)));
Collections.sort(sc, new PairComparator<EnvCond>());
List<EnvCond> s = new ArrayList<EnvCond>();
for (P<EnvCond, Integer> e : sc)
s.add(e.fst());
return s;
}
public class PairComparator<T> implements Comparator<P<T,Integer>> {
@Override
public int compare(P<T,Integer> o1, P<T,Integer> o2) {
return o1.snd().compareTo(o2.snd());
}
}
private void saveState() {
this.R_saved = deepCopy(super.R);
this.rxn_needs_saved = deepCopy(super.rxn_needs);
this.unR_saved = new HashSet<Long>(ActData.instance().chemsReferencedInRxns);
this.unR_saved.removeAll(this.R_saved);
}
private void restoreState() {
super.R = deepCopy(this.R_saved);
super.rxn_needs = deepCopy(this.rxn_needs_saved);
}
private HashMap<Long, List<Long>> deepCopy(HashMap<Long, List<Long>> map) {
HashMap<Long, List<Long>> copy = new HashMap<Long, List<Long>>();
for (Long r : map.keySet())
copy.put(r, new ArrayList<Long>(map.get(r)));
return copy;
}
private Set<Long> deepCopy(Set<Long> parentR) {
return new HashSet<Long>(parentR);
}
private static String _fileloc = "com.act.reachables.ConditionalReachable";
private static void logProgress(String format, Object... args) {
if (!GlobalParams.LOG_PROGRESS)
return;
System.err.format(_fileloc + ": " + format, args);
}
private static void logProgress(String msg) {
if (!GlobalParams.LOG_PROGRESS)
return;
System.err.println(_fileloc + ": " + msg);
}
@Override
public void doMoreWork() {
if (!conditionalReachPhase)
super.doMoreWork();
else {
EnvCond envCond = this.guesses.remove(0);
if (debug) logProgress("Assume: " + envCond);
// pop the stack back to normal reachability
restoreState();
super.R.addAll(envCond.speculatedChems()); // ASSUME(reachable(new node))
super.updateEnabled(envCond.speculatedChems());
while (super.anyEnabledReactions(null)) {
super.doMoreWork(); // compute reachability
}
// delta from saved_R, modulo the chems we assumed and added as is
int newReachCount = super.R.size() - this.R_saved.size() - envCond.speculatedChems().size();
storeNewlyReached(envCond, newReachCount, super.R, this.R_saved);
}
}
private void storeNewlyReached(EnvCond ec, int N, Set<Long> newReach, Set<Long> oldReach) {
for (Long id : newReach) {
if (oldReach.contains(id))
continue;
if (this.reachableUnder.containsKey(id))
this.reachableUnder.get(id).add(ec);
else {
List<EnvCond> l = new ArrayList<EnvCond>(); l.add(ec);
this.reachableUnder.put(id, l);
}
}
// save the number of nodeMapping, and nodeMapping themselves, that are enabled by ec
this.extraReached.put(ec, N);
if (debug) logProgress("\t-> " + N);
}
@Override
public void finalize(TaskMonitor tm) {
int N = this.extraReached.size();
int i = 0;
EnvCond high = null;
int highest = Integer.MIN_VALUE;
tm.setStatus("Conditional Reachability evaluated for " + N + " nodeMapping. Setting isConditionalReachable.");
List<P<EnvCond, Integer>> sc = new ArrayList<P<EnvCond, Integer>>();
HashMap<Long, Integer> chemEnvironmentalImp = new HashMap<Long, Integer>();
for (EnvCond c : this.extraReached.keySet()) {
int enables = this.extraReached.get(c);
sc.add(new P<EnvCond, Integer>(c, enables));
tm.setPercentCompleted((int)(100 * ((double)(i++)/N)));
for (Long cc : c.speculatedChems()) {
if (!ActData.instance().chemsInAct.containsKey(cc))
continue; // in cases where the native is also a cofactor, it would not have a node.
Integer enables_through_some_other_pairing = (Integer)Node.getAttribute(ActData.instance().chemsInAct.get(cc).getIdentifier(), "ifReachThenEnables");
if (enables_through_some_other_pairing != null && enables < enables_through_some_other_pairing)
continue;
// new max enables found through this pairing....
// set the attributes in the act network
Long n1 = ActData.instance().chemsInAct.get(cc).getIdentifier();
Node.setAttribute(n1, "ifReachThenEnables", enables);
// log it
chemEnvironmentalImp.put(cc, enables);
}
if (high == null || highest < enables) {
high = c;
highest = enables;
}
}
// cache the sorted sized clusters
Collections.sort(sc, new PairComparator<EnvCond>());
this.extraReachedSortedBySize = sc;
// dump to log... <install loc>/output.log
logEnvCondsAndNodes(sc, chemEnvironmentalImp);
// add reachability ease
addReachabilityEase(sc);
// we wish to highlight n1 and n2
ActData.instance().Act.setSelectedNodeState(allNodes(ActData.instance().chemsInAct, high), true);
// cache this reachability computation (useful in other actions later)
ActData.instance()._LastReachabilityComputation = this;
// announce it to the user
// "Computed ifReachThenEnables values. Highest enabler is node " +
// high + " who makes " + highest + " others reachable." );
}
private void addReachabilityEase(List<P<EnvCond, Integer>> sc) {
HashMap<EnvCond, Integer> precondition_ease = new HashMap<EnvCond, Integer>();
for (P<EnvCond, Integer> p : sc)
precondition_ease.put(p.fst(), p.snd());
int ease = -1;
for (Long cid : ActData.instance().chemsReferencedInRxns) {
if (!ActData.instance().chemsInAct.containsKey(cid))
continue;
if (isReachable(cid)) {
// reachable without any preconditions
ease = 1000;
} else {
// potentially null, but never is because each node is at
// least reachable from the one reaction coming into it.
// unless there are nodeMapping that are only consumed
int max = 0;
List<EnvCond> underConditions = envCondForReachability(cid);
if (underConditions != null) {
for (EnvCond ec : underConditions) {
Integer pc = precondition_ease.get(ec);
if (max < pc) max = pc;
}
ease = max;
} else {
ease = -1;
}
}
Long n1 = ActData.instance().chemsInAct.get(cid).getIdentifier();
Node.setAttribute(n1, "reachabilityEase", ease);
}
}
private void logEnvCondsAndNodes(List<P<EnvCond, Integer>> ecs, HashMap<Long, Integer> chemImp) {
List<Long> chems = new ArrayList<Long>(ActData.instance().chemsReferencedInRxns);
Collections.sort(chems);
logProgress("========================================");
logProgress("===========Chemical Metadata============");
logProgress("No chemical metadata loaded.");
logProgress("===============================================");
logProgress("====Reasons for chemicals being unreachable====");
logProgress("Chemical ID\tWould be reachable if these other groups are reachable");
for (Long id : chems) {
logProgress("%d\t%s\n", id, namify(GetChemReachability(id)));
}
logProgress("===============================================");
logProgress("===How many chemicals are enabled by a tuple===");
logProgress("Number of new reachables\tIf this tuple is reachable");
for (P<EnvCond, Integer> ec : ecs) {
int num_enabled = ec.snd();
if (num_enabled < 5) continue;
logProgress("%d\t%s\n", num_enabled, ec.fst());
}
logProgress("===============================================");
logProgress("==== What enabling chemicals have the most ====");
logProgress("=== potential reachables (potential because ===");
logProgress("=== they may always need another substrate) ==");
logProgress("Number of new reachables\tIf this chem is reachable\tIs chem reachable itself\tInChI\tNames");
List<Entry<Long, Integer>> m2l = new ArrayList<Entry<Long, Integer>>(chemImp.entrySet());
Collections.sort(m2l, new CmpSnd<Long>());
for (Entry<Long, Integer> e : m2l) {
int num_enabled = e.getValue();
Long chemid = e.getKey();
if (num_enabled < 5)
continue; // not worth making an exception for something that enables less than 5 chemicals
logProgress("%d\t%s\t%ss\n", num_enabled, chemid, isReachable(chemid));
}
logProgress("========================================");
}
private String namify(List<EnvCond> conditions) {
if (conditions == null)
return "null";
List<String> names = new ArrayList<String>();
for (EnvCond e : conditions)
names.add(namify(e));
return names.toString();
}
private String namify(EnvCond e) {
String print = "";
for (Long id : e.speculatedChems()) {
/* We do not load chemical metadata in one big blob anymore. If needed, query the db to get the specific data you
* need on individual chemicals. */
Chemical c = null;
String name = c == null ? id.toString() : c.getShortestBRENDAName();
print += print.equals("") ? name : " + " + name;
print += "(" + id + ")";
}
return "\"" + print + "\"";
}
public List<EnvCond> GetChemReachability(Long id) {
if (isReachable(id))
// reachable without any preconditions
return new ArrayList<EnvCond>();
else
// potentially null, but never is because each node is at
// least reachable from the one reaction coming into it.
// unless there are nodeMapping that are only consumed
return envCondForReachability(id);
}
public List<String> GetChemReachabilityReadable(Long id) {
List<EnvCond> r = GetChemReachability(id);
List<String> all_opts = new ArrayList<String>();
for (EnvCond e : r) {
all_opts.add(e.toReadableString(15));
}
return all_opts;
}
private List<EnvCond> envCondForReachability(Long id) {
return this.reachableUnder.get(id);
}
public boolean isReachable(Long id) {
return this.R_saved.contains(id);
}
public class CmpSnd<T> implements Comparator<Entry<T,Integer>> {
@Override
public int compare(Entry<T,Integer> o1, Entry<T,Integer> o2) {
return o1.getValue().compareTo(o2.getValue());
}
}
private Set<Node> allNodes(HashMap<Long, Node> map, EnvCond tuple) {
Set<Node> n = new HashSet<Node>();
for (Long c : tuple.speculatedChems())
n.add(map.get(c));
return n;
}
public HashMap<Integer, Set<Long>> getL12Layers() {
return super.R_by_layers;
}
}