package com.plectix.simulator.util.string;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.plectix.simulator.interfaces.ConnectedComponentInterface;
import com.plectix.simulator.simulator.ThreadLocalData;
import com.plectix.simulator.staticanalysis.Agent;
import com.plectix.simulator.staticanalysis.Link;
import com.plectix.simulator.staticanalysis.LinkStatus;
import com.plectix.simulator.staticanalysis.Site;
import com.plectix.simulator.util.io.PlxLogger;
/**
* This class is Singleton that creates a unique String from a ConnectedComponent's list of Agents.
*
* <br><br>
* Here is a brief description of the algorithm:
* <br><br>
* We first sort the Agents according to some "graph theoretical invariants"
* using {@link AgentInvariant.AgentInvariantComparator}. Please see that class for sorting rules.
*
* <br><br>
* If no Agents are "equal" according to these initial sorting rules, we are done.
* We can print the Agents in that order which should be unique.
*
* <br><br>
* But no "perfect" set of invariants is known that will distinguish all possible graph symmetries.
* Even though the initial sorting would discriminate Agents in most cases, there would still be
* some cases that some Agents would have equivalent invariants. Some of these equivalent cases
* are real symmetries and some are not. For example, consider the following polymer:
*
* <br><br>
* <center><tt>A-A-A-A-A-A-A-A</tt></center>
*
* <br><br>
* The initial sorting will create two categories: The Agents at both ends will be in the first
* category. The Agents in the middle will be in the second category. The Agents at the ends
* are really symmetrical. But the Agents in the middle are not all symmetrical, some are closer
* to the ends than the others. And once we choose which end to start writing the unique String,
* then none of the Agents are symmetrical anymore.
*
* <br><br>
* If we have any equivalent Agent, we proceed as follows until we discriminate all equivalent cases:
* First, we assign a rank to each agent. The initial ranks for the above example is:
*
* <br><br>
* <center><tt>1-2-2-2-2-2-2-1</tt></center>
*
* <br><br>
* Then, we compute the product of prime numbers corresponding to the rank of the Agent's neighbors.
* For example, if an Agent whose neighbors' ranks are 2, 2, 5 then the product of their corresponding
* primes is 3 x 3 x 11 = 99. According to the prime factorization theorem, this procedure will always
* provide an unambiguous result for any set of input ranks.
*
* <br><br>
* After computing the product of primes for each Agent, we resort the Agents using
* {@link AgentInvariant.AgentInvariantRankComparator} and re-rank them. Note that
* this resorting preserves the previous ranks but only discriminates equivalent cases.
* If no Agents have equivalent ranks, we are done.
*
* <br><br>
* If there are real symmetries (hence equivalent ranks such as the end Agents in the polymer example above),
* then these resorting would not change the previous ranks. This situation is called an "invariant partitioning".
* In order to "break ties", the algorithm proceeds by doubling all ranks and reducing the rank of the first Agent
* with the equivalent rank by one. These new ranks are treated as a new invariant set, and the previous steps
* of computing the product of prime numbers and resorting is repeated until there are no more equivalent
* ranks.
*
* <br><br>
* Some properties of the algorithm:
*
* <ul>
* <li>
* It uses the passed ConnectedComponent in one statement: <code>connectedComponent.getAgents()</code>
* and works with the returned list of Agents. No other data member in CConnectedComponent is used.
* <li>
* It doesn't change this list of Agents, e.g. it doesn't reorder them in any way, it works on its separate copy.
* <li>
* It doesn't change the list of Sites in Agents either, it works on its separate copy.
* The only things that the algorithm changes is <code>linkIndex</code> of Sites in the ConnectedComponent.
* The initial values are not read. It calls <code>site.setLinkIndex()</code> on all sites and sets
* the <code>linkIndex</code> data to -1 or a positive integer it computes.
* </ul>
*
* <br>
* The class doesn't have any data member and uses static private methods only. Therefore the access to the public method is through a Singleton.
*
* <br><br>
*
* @author ecemis
*/
public class ConnectedComponentToSmilesString implements ConnectedComponentToStringInterface {
private static final PlxLogger LOGGER = ThreadLocalData.getLogger(ConnectedComponentToSmilesString.class);
private static final ConnectedComponentToSmilesString INSTANCE = new ConnectedComponentToSmilesString();
private ConnectedComponentToSmilesString() {
super();
}
public static final ConnectedComponentToSmilesString getInstance() {
return INSTANCE;
}
public final String toUniqueString(final ConnectedComponentInterface connectedComponent) {
List<Agent> agentList = connectedComponent.getAgents();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("ConnectedComponent has " + agentList.size() + " Agents");
}
List<AgentInvariant> agentInvariantList = new ArrayList<AgentInvariant>(agentList.size());
for (Agent agent : agentList) {
agentInvariantList.add(new AgentInvariant(agent));
}
if (agentInvariantList.size() == 1) {
// there is only one agent so we don't need any ranking!
return toKappa(agentInvariantList);
}
// The following sets the new ranks for each AgentInvariant
// These new ranks are our new invariants...
boolean rankEquivalence = sortAndComputeRanks(agentInvariantList, AgentInvariant.AGENT_INVARIANT_COMPARATOR);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("--> Rank Equivalence = " + rankEquivalence + " Ranks: " + getRanksAsString(agentInvariantList));
}
if (rankEquivalence == false) {
// we are done:
return toKappa(agentInvariantList);
}
// let's have each AgentInvariant know its neighbors
computeNeighbors(agentInvariantList);
// infinite loop! hope it will finish...
int iterationCount = 0;
while (true) {
for (AgentInvariant agentInvariant : agentInvariantList) {
agentInvariant.computeProductOfNeighborPrimes();
}
// Let's save the ranks before computing the new ones...
saveRanks(agentInvariantList);
rankEquivalence = sortAndComputeRanks(agentInvariantList, AgentInvariant.AGENT_INVARIANT_RANK_COMPARATOR);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("--> Iteration= " + iterationCount++
+ " [Rank Equivalence= " + rankEquivalence
+ "] Ranks= " + getRanksAsString(agentInvariantList));
}
if (!rankEquivalence) {
// we are done with the infinite loop, the ranks are not equivalent ;-)
// return Kappa string now:
return toKappa(agentInvariantList);
}
// the ranks are equivalent... do we have invariant partitioning?
boolean invariantPartitioning = true;
for (AgentInvariant agentInvariant : agentInvariantList) {
if (agentInvariant.areRanksEqual() == false) {
invariantPartitioning = false;
break;
}
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("----> Invariant Partitioning= " + invariantPartitioning);
}
if (invariantPartitioning) {
// extended connectivity method is complete and an invariant partitioning has been developed
// we still have rank equivalence so we have to break ties now...
breakTies(agentInvariantList);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("--> Broken Ties... New Ranks= " + getRanksAsString(agentInvariantList));
}
}
}
// we can never be here cause there is no break from the infinite loop above...
}
private static final String toKappa(final List<AgentInvariant> agentInvariantList) {
// set all link indices to -1
for (AgentInvariant agentInvariant : agentInvariantList) {
for (Site site : agentInvariant.getSortedSites()) {
site.setLinkIndex(-1);
}
}
StringBuffer stringBuffer = new StringBuffer();
int linkIndexCounter = 0;
for (AgentInvariant agentInvariant : agentInvariantList) {
Agent agent = agentInvariant.getAgent();
stringBuffer.append(agent.getName() + "(");
boolean firstSite = true;
for (Site site : agentInvariant.getSortedSites()) {
if (firstSite) {
firstSite = false;
} else {
stringBuffer.append(",");
}
stringBuffer.append(site.getName());
if (!site.getInternalState().hasDefaultName()) {
stringBuffer.append("~" + site.getInternalState().getName());
}
Link linkState = site.getLinkState();
LinkStatus statusLink = linkState.getStatusLink();
if (statusLink == LinkStatus.BOUND) {
if (site.getLinkIndex() == -1) {
// let's find the site we are bound to
Site connectedSite = linkState.getConnectedSite();
if (connectedSite == null) {
// we don't know what Agent we are bound to!
stringBuffer.append("!_");
} else {
// let's now set this site's link index
site.setLinkIndex(linkIndexCounter);
// also set the link index of the site we are bound to...
connectedSite.setLinkIndex(linkIndexCounter);
// let's increment the link count:
linkIndexCounter++;
// let's dump it:
stringBuffer.append("!" + site.getLinkIndex());
}
} else {
// let's dump our link index:
stringBuffer.append("!" + site.getLinkIndex());
}
} else if (statusLink == LinkStatus.WILDCARD) {
stringBuffer.append("?");
} else if (statusLink != LinkStatus.FREE) {
// we expect that the site will be either BOUND, WILDCARD or FREE. throw exception otherwise:
throw new RuntimeException("Unexpected State: Link state is neither BOUND, nor WILDCARD, nor FREE.");
}
}
stringBuffer.append("),");
}
stringBuffer.deleteCharAt(stringBuffer.length()-1);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("----> Kappa String= " + stringBuffer);
}
return stringBuffer.toString();
}
private static final void breakTies(List<AgentInvariant> agentInvariantList) {
// The SMILES algorithm is not explained well here...
// It doubles all ranks and reduces the value of the first node, which is tied, by one.
// The set is then treated as a new invariant set...
// But here biology differs from chemistry, we have site names which makes the problem harder in some ways
List<AgentInvariant> equivalantAgents = new ArrayList<AgentInvariant>();
int nodetoTieIndex = -1;
for (int i = 1; i < agentInvariantList.size(); i++) {
AgentInvariant agentInvariantCurrent = agentInvariantList.get(i);
if (nodetoTieIndex == -1) {
if (AgentInvariant.AGENT_INVARIANT_RANK_COMPARATOR.compare(agentInvariantCurrent, agentInvariantList.get(i-1)) == 0) {
// this node is the first node in a series of symmetrical Agents...
nodetoTieIndex = i-1;
equivalantAgents.add(agentInvariantList.get(i-1));
equivalantAgents.add(agentInvariantCurrent);
}
} else {
if (AgentInvariant.AGENT_INVARIANT_RANK_COMPARATOR.compare(agentInvariantCurrent, agentInvariantList.get(nodetoTieIndex)) == 0) {
equivalantAgents.add(agentInvariantCurrent);
} else {
break;
}
}
}
if (equivalantAgents.size() < 2) {
throw new RuntimeException("Unexpected number of equivalent Agents: " + equivalantAgents.size());
}
// Let's double the ranks...
for (int i = 0; i < agentInvariantList.size(); i++) {
agentInvariantList.get(i).doubleRankNew();
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("----> breakTies: Number of agents with equivalent ranks: " + equivalantAgents.size() + " -> " + getRanksAsString(equivalantAgents));
}
// we should differentiate them here!
int previousRank = equivalantAgents.get(0).getRankNew();
sortAndComputeRanks(equivalantAgents, AgentInvariant.AGENT_INVARIANT_NEIGHBOR_SITE_COMPARATOR);
// let's lower the rank of the first node by one...
equivalantAgents.get(0).setRankNew(previousRank -1);
for (int i = 1; i < equivalantAgents.size(); i++) {
equivalantAgents.get(i).setRankNew(previousRank);
}
if (LOGGER.isDebugEnabled()) {
if (equivalantAgents.get(0).getRankNew() == equivalantAgents.get(1).getRankNew()) {
LOGGER.debug("----> We have equivalent ranks while breaking ties: " + getRanksAsString(equivalantAgents));
} else {
LOGGER.debug("----> breakTies: Successfully differentiated: " + getRanksAsString(equivalantAgents));
}
}
}
private static final void saveRanks(final List<AgentInvariant> agentInvariantList) {
for (AgentInvariant agentInvariant : agentInvariantList) {
agentInvariant.saveRank();
}
}
private static final String getRanksAsString(final List<AgentInvariant> agentInvariantList) {
StringBuffer stringBuffer = new StringBuffer();
for (AgentInvariant agentInvariant : agentInvariantList) {
stringBuffer.append(agentInvariant.getRankNew() + "(" + agentInvariant.getProductOfNeighborPrimes() + ")-");
}
return stringBuffer.toString();
}
/**
* Returns true if there are equivalent ranks, false otherwise.
*
* @param agentInvariantList
* @param agentInvariantComparator
* @return
*/
private static final boolean sortAndComputeRanks(final List<AgentInvariant> agentInvariantList, final Comparator<AgentInvariant> agentInvariantComparator) {
Collections.sort(agentInvariantList, agentInvariantComparator);
boolean rankEquivalence = false;
int rankCounter = 1;
agentInvariantList.get(0).setRankTemp(rankCounter);
for (int i = 1; i < agentInvariantList.size(); i++) {
AgentInvariant agentInvariantPrevious = agentInvariantList.get(i-1);
AgentInvariant agentInvariantCurrent = agentInvariantList.get(i);
if (agentInvariantComparator.compare(agentInvariantPrevious, agentInvariantCurrent) == 0) {
rankEquivalence = true;
} else {
// they are not equal so let's increase the rank
rankCounter += agentInvariantPrevious.getNumberOfConnections();
}
agentInvariantCurrent.setRankTemp(rankCounter);
}
for (AgentInvariant agentInvariant : agentInvariantList) {
agentInvariant.setRankNew(agentInvariant.getRankTemp());
}
return rankEquivalence;
}
private static final void computeNeighbors(final List<AgentInvariant> agentInvariantList) {
Map<Agent, AgentInvariant> targetAgentAgentInvariantMap = new LinkedHashMap<Agent, AgentInvariant>(agentInvariantList.size());
for (AgentInvariant agentInvariant : agentInvariantList) {
targetAgentAgentInvariantMap.put(agentInvariant.getAgent(), agentInvariant);
}
for (AgentInvariant agentInvariant : agentInvariantList) {
agentInvariant.computeNeighbors(targetAgentAgentInvariantMap);
}
}
}