package statalign.postprocess.plugins.contree;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import statalign.postprocess.plugins.TreeNode;
import statalign.postprocess.plugins.contree.hash.HashEntry;
import statalign.postprocess.plugins.contree.hash.HashTable;
import statalign.postprocess.plugins.contree.hash.HashUtils;
/**
* The main thread for calculating consensus trees and networks.
* Note that networks is dependent on the initial tree...
*
* @author wood, eiriksson
*
*/
public class CTMain {
// Constants
/** The default double collision constant - the larger, the better. */
private final static int C = 1000;
/** The default resolution rate in percentage.
* 33.34 - splits must occur in over a third of the trees for it to ALLWAYS be possible to draw in 2D, no matter what the input TREES are.
* */
private final static double DEFAULT_RES_PERCENTAGE = 33.34;
// Variables
private int noOfSamples; // Number of samples.
private int noOfTaxa; // Number of taxa.
private double resPercentage; // The resolution rate percentage (0 < x <= 100).
private long seed; // The seed used in the random number generator.
private TaxaMap taxa; // HashMap containing mapping from taxon names -> indices
private HashUtils hashUtils; // Holds stuff concerning the hashing.
private HashTable hashTable; // The hash table.
private int noOfTrees; // Current number of trees.
private int majorityThreshold; // Declares in how many trees partitions need to appear to be a majority partition.
private double interestThreshold; // Declares in how many trees partitions need to appear to be of interest.
private LinkedList<HashEntry> partitions;// Holds the partitions of interest.
private int noOfPartitions; // Holds the current number of partitions.
private double[] leafEdgeLengths; // Array of leaf edge lengths
private TreeNode root; // The tree root of the input tree.
// Functions
/**
* Initialise the class
*/
public CTMain() {
// Initializes some defaults (can be changed via setters).
Random random = new Random();
this.resPercentage = DEFAULT_RES_PERCENTAGE;
this.seed = random.nextLong();
// Object initialisation
partitions = new LinkedList<HashEntry>();
}
/**
* Print some configuration information
*
*/
public void printConfig() {
System.out.printf("Number of taxa: %d - ", noOfTaxa);
System.out.printf("Number of trees: %d - ", noOfTrees);
System.out.printf("Resolution: %d\n", interestThreshold);
}
/**
* Initialise a CTMain before usage with a first tree, going through and setting up the hash table etc.
*
* @param root Root of the initial tree to initialise with
* @param noOfSamples Current number of samples taken
*
*/
public void initialize(TreeNode root, int noOfSamples) {
// Parameter initialisation
this.noOfSamples = noOfSamples;
noOfTrees = 0;
// TaxaMap initialisation
List<TreeNode> leaves = root.getLeaves();
noOfTaxa = leaves.size();
// Hash initialisation
hashUtils = new HashUtils();
hashUtils.initialize(noOfTaxa, noOfSamples, C, seed);
hashTable = new HashTable(hashUtils.m1);
//Taxamap initialisation
taxa = new TaxaMap(noOfTaxa);
for (int i = 0; i < leaves.size(); i++) {
taxa.put(leaves.get(i).name, i);
}
leafEdgeLengths = new double[noOfTaxa];
// Adds a single star partition, once and for all.
BitSet star = new BitSet(noOfTaxa);
star.flip(0, noOfTaxa);
HashEntry entry = new HashEntry(-1, star, 0.0d);
entry.count = noOfSamples + 1;
partitions.add(entry);
// Majority threshold initialisation
updateInterestThreshold();
}
/**
* Simply update our interest threshold
*/
// Update the Interest Threshold
public void updateInterestThreshold() {
interestThreshold = (double) ((double) noOfTrees * (resPercentage / 100.0d));
}
/**
* Create partitions from an input tree - recursive so will be called by many nodes, beginning with root but with calculations actually beginning on leaves.
*
* @param node Node in tree with above split to add to the hash table
*
*/
private BitSet createPartitions(TreeNode node) {
if (node.isLeaf()) { // Leaf node.
int index = taxa.get(node.name);
// leaf simply has it's keys stored in the hash utilities
node.addProperty("tableHashKey", hashUtils.a1[index]);
node.addProperty("bucketHashKey", hashUtils.a2[index]);
// Updates the edge length array.
leafEdgeLengths[index] += node.edgeLength;
// Create a new partition from scratch to represent it
BitSet partition = new BitSet(noOfTaxa);
partition.set(index);
assert partition.cardinality() == 1 : "There should be exactly a single bit set.";
return partition;
} else { // An internal node: Traverses the tree in post order.
BitSet partition = new BitSet(noOfTaxa);
// Get the node's partition representation from its children
List<TreeNode> children = node.children;
for (TreeNode child : children) {
partition.or(createPartitions(child));
}
// if this node is NOT to the right side of the root then add it... (Avoid adding splits twice for CNetworks)
if (node.parent != root || root.getRight() != node) {
noOfPartitions++;
long tableKey = 0;
long bucketKey = 0;
long tableKey2 = 0;
long bucketKey2 = 0;
// Calculate the hash keys for this partition
for (TreeNode child : children) {
tableKey += child.getIntProperty("tableHashKey");
bucketKey += child.getIntProperty("bucketHashKey");
}
// if first is one then we need to store the flipped version so we store each split in one representation only.
if (partition.get(0) == true) {
// copy to a new partition that is the flipped version
BitSet partitionF = new BitSet(noOfTaxa);
for(int l=0;l<noOfTaxa;l++){
if(partition.get(l)==false)partitionF.set(l);
}
// calculate the hash keys for the flipped partition...
for (int k=0;k<noOfTaxa;k++) {
if(partitionF.get(k)==true){
tableKey2 += hashUtils.a1[k];
bucketKey2 += hashUtils.a2[k];
}
}
// store the properties in the node
node.addProperty("tableHashKey", (int) (tableKey2 % hashUtils.m1));
node.addProperty("bucketHashKey", (int) (bucketKey2 % hashUtils.m2));
if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition
hashTable.put(partitionF, node.edgeLength,node.getIntProperty("tableHashKey"), node.getIntProperty("bucketHashKey"), interestThreshold, partitions);
}
// remember to still return the original partition with its appropriate keys...
node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1));
node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2));
return partition;
}
// if first is zero then simply add the partition, recursively calculating the hash
else{
node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1));
node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2));
if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition
hashTable.put(partition, node.edgeLength, node.getIntProperty("tableHashKey"),node.getIntProperty("bucketHashKey"), interestThreshold, partitions);
}
return partition;
}
}
// still return the partition even if node was not added as was right of root...
return partition;
}
}
/**
* Add a new tree to the hash table
*
* @param root Root of new tree to add
*
*/
public void addNewTree(TreeNode root) {
// Updates the number of trees and the threshold.
noOfTrees++;
updateInterestThreshold();
// Hashes the partitions of the trees.
noOfPartitions = 0;
this.root = root;
createPartitions(root);
}
/**
* Create partitions from an input tree - recursive so will be called by many nodes, beginning with root but with calculations actually beginning on leaves.
*
* @param partitions Partitions in the form of entries in the hash table
* @param curInterestPercentage Current percentage of interest that we want splits to occur above to view in the network later
*
*/
private ArrayList<Cluster> constructClusters(LinkedList<HashEntry> partitions, double curInterestPercentage) {
ArrayList<Cluster> clusters = new ArrayList<Cluster>();
// Thresholds for the current run of the cluster builder (c.f. the threshold for the partitions list).
double curInterestThreshold = (double) (noOfTrees * (curInterestPercentage / 100.0d));
int majInterestThreshold = (int) (noOfTrees * (50.0 / 100.0d));
for (Iterator<HashEntry> it = partitions.iterator(); it.hasNext();) {
HashEntry entry = it.next();
// Checks if this partition is still above threshold... partition.
// If not: remove it - O(1).
if ((double)entry.count <= curInterestThreshold) {
// correct the isMajority flag to now refer to majority not if of interest or not..
if (entry.count <= majInterestThreshold) {
entry.isMajority = false;
}
it.remove();
continue;
}
// Constructs clusters (list of TreeNode's for each set bit) from each partition.
if ((double)entry.count > curInterestThreshold) {
Cluster cluster = new Cluster();
if (entry.count > majInterestThreshold) {
cluster.isMajority = true;
}
cluster.aboveSplit = entry.partition;
cluster.noOfOccurrences = entry.count;
cluster.edgeLength = entry.edgeLengthsSum / entry.count;
for (int i = 0; i < entry.partition.size(); i++) {
if (entry.partition.get(i)) {
TreeNode node = new TreeNode(taxa.getName(i));
node.edgeLength = leafEdgeLengths[i] / noOfTrees;
cluster.add(node);
}
}
clusters.add(cluster);
}
}
// Sort by number of taxa.
// TODO: This might obviously be optimized a bit, e.g. with a PriorityQueue. - Eiriksson
Collections.sort(clusters);
return clusters;
}
/**
* Constructs the majorityTree. Used to make Network so remember to call this first!
*/
public CTree constructMajorityTree() {
int numOfNode = 0;
CTree tree = new CTree();
// Creates the clusters.
tree.clusters = constructClusters(partitions,resPercentage);
// Begins by constructing the star tree.
for (TreeNode node : tree.clusters.get(0)) {
TreeNode root = tree.getRoot(); // Retrieves the root,
node.parent = root; // Parent of this node -> root.
root.children.add(node); // Adds this node as the children of the root.
tree.nodeList.add(node); // Adds this node to the list of nodes.
assert tree.nodeList.get(0).name.equals("root");
tree.parentList.put(node.name, 0); // Adds "this node -> root" parent mapping.
}
// Constructs internal nodes for the rest of the majority bi-partitions and rewires them.
for (int z = 1; z < tree.clusters.size(); z++) {
Cluster cluster = tree.clusters.get(z);
// only take the majority ones....!
if(cluster.isMajority == true){
// 1. Retrieves the parent of the first node in this cluster.
TreeNode parent = tree.nodeList.get(tree.parentList.get(cluster.get(0).name));
// 2. Constructs a new internal node.
String nodeName = "int" + Integer.toString(numOfNode);
TreeNode internalNode = new TreeNode(nodeName);
internalNode.addProperty("noOfOccurrences", cluster.noOfOccurrences);
internalNode.edgeLength = cluster.edgeLength;
internalNode.parent = parent;
// 3. Insert the new node into the node list.
tree.nodeList.add(internalNode);
assert tree.nodeList.get(tree.nodeList.size() - 1).name.equals(internalNode.name);
tree.parentList.put(nodeName, tree.nodeList.size() - 1);
// update the clusters node references...
// (use a method of storing edges that referenced the positions in the Tree's nodelists for reference later.)
tree.clusters.get(z).nodeRefA = tree.parentList.get(cluster.get(0).name);
tree.clusters.get(z).nodeRefB = (tree.nodeList.size()-1);
for (TreeNode node : cluster) {
// 4. Makes this node the child of the new internal node.
node.parent = internalNode;
assert node.parent.name.equals(tree.nodeList.get(tree.nodeList.size() - 1).name);
tree.parentList.put(node.name, tree.nodeList.size() - 1);
internalNode.children.add(node);
// 5. Delete the moved node(s) from the parent's children.
// TODO: optimize? probably not.
for (int i = 0; i < parent.children.size(); i++) {
if (parent.children.get(i).name.equals(node.name)) {
parent.children.remove(i);
break;
}
}
}
// Wires up the internal node.
parent.children.add(internalNode);
numOfNode++;
}
}
return tree;
}
/**
* Constructs the network beginning with the consensus tree and then calls the network drawing function to calculate positions
*
* @param tree Consensus tree already calculated
*/
public CNetwork constructNetwork(CTree tree){
// Create a new network...
CNetwork network = new CNetwork(noOfTaxa);
// Let's copy over all of the splits from the tree into the network format.
// Until noted, the node lists in tree and network MUST match and the first lot of nodes (after the root) in the tree are assumed to be the taxa.
for(int i=0;i<tree.nodeList.size();i++){
TreeNode treeNode = tree.nodeList.get(i);
// create a new node
CNetworkNode networkNode = new CNetworkNode();
// copy over the required data and add it...
networkNode.Taxaname = treeNode.name;
network.nodes.add(networkNode);
}
// Now add in the splits... (ignore 1st cluster as is for star tree)
for(int i=1;i<tree.clusters.size();i++){
Cluster cluster = tree.clusters.get(i);
if (cluster.isMajority == true){
CNetworkSplit networkSplit = new CNetworkSplit();
CNetworkEdge networkEdge = new CNetworkEdge();
networkEdge.split = networkSplit;
networkEdge.networkNodeA = network.nodes.get(cluster.nodeRefA);
networkEdge.networkNodeB = network.nodes.get(cluster.nodeRefB);
networkSplit.edges.add(networkEdge);
// Maybe we shouldn't be copying this over... but it at least clarifies things in code.
networkSplit.edgelength = cluster.edgeLength;
networkSplit.noOfOccurences = cluster.noOfOccurrences;
networkSplit.split = cluster.aboveSplit;
network.nodes.get(cluster.nodeRefA).joins.add(networkEdge);
network.nodes.get(cluster.nodeRefB).joins.add(networkEdge);
network.splits.add(networkSplit);
cluster.added = true;
}
}
// Now add in the taxa length splits.... NOTE i=0 is root?! Tried and tested anyway...
for(int i=1;i<=noOfTaxa;i++){
CNetworkSplit networkSplit = new CNetworkSplit();
CNetworkEdge networkEdge = new CNetworkEdge();
networkEdge.split = networkSplit;
int taxonParent = tree.parentList.get(tree.nodeList.get(i).name);
networkEdge.networkNodeA = network.nodes.get(taxonParent);
networkEdge.networkNodeB = network.nodes.get(i);
networkSplit.edges.add(networkEdge);
networkSplit.edgelength = tree.nodeList.get(i).edgeLength;
networkSplit.noOfOccurences = noOfTrees;
BitSet trivialSplit = new BitSet(noOfTaxa);
trivialSplit.set(i-1);
networkSplit.split = trivialSplit;
network.nodes.get(taxonParent).joins.add(networkEdge);
network.nodes.get(i).joins.add(networkEdge);
network.splits.add(networkSplit);
}
// NOTE: From now on the indices on Network & Tree may no longer match!!
// Remove the root if it is simply disecting an edge (only 2 edges on either side).
// NOTE: root is assumed to be the first position in the node list in CNetwork and CTree
if(network.nodes.get(0).joins.size() == 2){
// find the nodes on either side of the root...
CNetworkNode[] redundant = new CNetworkNode[2];
for(int h=0;h<2;h++){
if(network.nodes.get(0).joins.get(h).networkNodeA == network.nodes.get(0)){
redundant[h] = network.nodes.get(0).joins.get(h).networkNodeB;
}
else{
redundant[h] = network.nodes.get(0).joins.get(h).networkNodeA;
}
}
// correct the edge in the one list to go to the zero node.
if(network.nodes.get(0).joins.get(1).networkNodeA == network.nodes.get(0)){
network.nodes.get(0).joins.get(1).networkNodeA = redundant[0];
}
else{
network.nodes.get(0).joins.get(1).networkNodeB = redundant[0];
}
// find the edge in the list in zero and remove it..
for(int h=0;h<redundant[0].joins.size();h++){
if(redundant[0].joins.get(h).networkNodeA == network.nodes.get(0) || redundant[0].joins.get(h).networkNodeB == network.nodes.get(0)){
redundant[0].joins.remove(h);
h--;
}
}
// remove the edge from the split list
for(int h=0;h<network.nodes.get(0).joins.get(0).split.edges.size();h++){
if(network.nodes.get(0).joins.get(0).split.edges.get(h) == network.nodes.get(0).joins.get(0)){
network.nodes.get(0).joins.get(0).split.edges.remove(h);
h--;
}
}
// add the other edge in the zero list
redundant[0].joins.add(network.nodes.get(0).joins.get(1));
// remove the node at last, and any reference to the removed edge also goes!
network.nodes.remove(0);
}
// TODO: Optional: sort the list in terms of frequency of occurrence to select only most frequent if less than 33.34% occurences....!
//IGNORE the first one as it is the star network...
//Now let's go through the remaining splits and try and add them into the network.
for(int i=0;i<tree.clusters.size();i++){
Cluster cluster = tree.clusters.get(i);
// for clusters that haven't been added yet, let's add them...
if (cluster.added == false){
//define new split and copy over info.
CNetworkSplit addSplit = new CNetworkSplit();
addSplit.split = cluster.aboveSplit;
addSplit.edgelength = cluster.edgeLength;
// now findout if is incompatible with any existing splits in the network
ArrayList<CNetworkSplit> incompatibleSplits = new ArrayList<CNetworkSplit>();
for(CNetworkSplit compSplit : network.splits){
if(compSplit.isCompatible(addSplit,noOfTaxa)==false){
incompatibleSplits.add(compSplit);
}
}
// Now we need to find the nodes we need to copy, so create a list of them
ArrayList<CNetworkNode> nodesToCopy = new ArrayList<CNetworkNode>();
// go through the incompatible splits one by one and add them to chains.
ArrayList<CNetworkPath> chains = new ArrayList<CNetworkPath>();
// note all nodes that will be stored in paths for more efficient path claering up
ArrayList<CNetworkNode> nodesToClearUp = new ArrayList<CNetworkNode>();
// Loop through incompatible splits
for(CNetworkSplit currentSplit : incompatibleSplits){
// Loop through all the edges in this split
for(CNetworkEdge currentEdge : currentSplit.edges){
// create a new path and add it to the list...
CNetworkPath newPath = new CNetworkPath();
newPath.pathOfEdges.add(currentEdge);
newPath.pathOfNodes.add(currentEdge.networkNodeA);
newPath.pathOfNodes.add(currentEdge.networkNodeB);
currentEdge.networkNodeA.AddToPaths(newPath,nodesToClearUp);
currentEdge.networkNodeB.AddToPaths(newPath,nodesToClearUp);
chains.add(newPath);
// Now extend any existing paths...
// create a list of paths to extend found paths with...
ArrayList<CNetworkPath> extendPathsWith = new ArrayList<CNetworkPath>();
extendPathsWith.add(newPath);
// Hack to go from NodeA & NodeB to a for loop...
CNetworkNode[] edgeNodes = new CNetworkNode[]{currentEdge.networkNodeA, currentEdge.networkNodeB};
// For the first node, add the standard
for(int l=0; l<=1;l++){
// get ready to make this part of the nodes with paths to clear up... (should be already done?)
if(edgeNodes[l].paths.isEmpty()==true){
nodesToClearUp.add(edgeNodes[l]);
}
// Look at each path that the node is involved in...
int pathCount = edgeNodes[l].paths.size();
for(int g= 0; g <pathCount; g++){
CNetworkPath currentPath = edgeNodes[l].paths.get(g);
// for the case that we may want to add the edge to the start/end of a path on the node...
if(currentPath.endNode()==edgeNodes[l] || currentPath.startNode()==edgeNodes[l]){
//check we haven't already added this split yet...
if (currentPath.checkForSplit(currentEdge)==false){
//create a new combined path with all paths to extend with...
//Don't forget to make this add the new path to all the nodes as well as combining over the correct node & removing one instance of it...
for(CNetworkPath pathToAdd : extendPathsWith){
chains.add(currentPath.combineWith(pathToAdd,edgeNodes[l]));
}
}
}
}
}
}
}
for(CNetworkNode nodeToClearUp : nodesToClearUp){
nodeToClearUp.paths.clear();
}
ArrayList<CNetworkEdge> incompatibleEdges = new ArrayList<CNetworkEdge>();
//Case that there are no incompatible splits... just add in the split!
// Note that SOME splits appear to contain the whole tree (perhaps due to a hack earlier), so lets just ignore these.... This could be described as a hack induced hack...
if(incompatibleSplits.isEmpty() == true && (addSplit.split.cardinality() != noOfTaxa && addSplit.split.cardinality() !=0)){
// we have no idea which taxon is actually on the side we want, but we will use some cool functions of BitSet to help reduce the time it takes...
CNetworkNode beginNode;
// Start searching from the adding side of the split containing zeros?
boolean zeroSide;
if (addSplit.split.cardinality() <= (noOfTaxa)/2){
// if there are more set to 0 than 1 go from a taxon on the 1 side:
// Search through the nodes for this using predefined function...
beginNode = TaxonRefToNode(addSplit.split.nextSetBit(0), network);
zeroSide = false;
}
else{
// if there are more set to 1 than 0, then go from a taxon on the 0 side:
beginNode = TaxonRefToNode(addSplit.split.nextClearBit(0), network);
zeroSide = true;
}
// recursively go along the edges until a node is found that has an edge that satisfies.
ArrayList<CNetworkNode> consideredNodes = new ArrayList<CNetworkNode>();
CNetworkNode CopyNode2 = beginNode.findFirstNonSubset(addSplit,consideredNodes, zeroSide,noOfTaxa);
nodesToCopy.add(CopyNode2);
}
else{
//Copy over the list of nodes from the longest path...ERROR if we have a too big path...
if(chains.size()>0){
// Store the max paths...
ArrayList<CNetworkPath> currentMaxPaths = new ArrayList<CNetworkPath>();
// initialise with first one, which only serves as to store length...:
currentMaxPaths.add(chains.get(0));
for(CNetworkPath chain : chains){
if(chain.pathOfNodes.size()>currentMaxPaths.get(0).pathOfNodes.size()){
currentMaxPaths.clear();
currentMaxPaths.add(chain);
}
// Yes, still add it if we already added it!
if(chain.pathOfNodes.size()==currentMaxPaths.get(0).pathOfNodes.size()){
currentMaxPaths.add(chain);
}
}
// now remove any max path that requires all edges to be copied or none at all...
for(int m=1;m<currentMaxPaths.size();m++){
if(currentMaxPaths.get(m).isSplitSide(addSplit, noOfTaxa)==false){
currentMaxPaths.remove(m);
m--;
}
}
if(currentMaxPaths.size()>2){
//ERROR!!! Now in >2d....!
System.out.println("Potential Error: Network now in > 2 dimensions....!");
}
// Now create the copyNode list:
// Store those that have been copied.... err
// Go through all the paths
// DOES START AT 1 as first just stored length!
for(int m=1;m<currentMaxPaths.size();m++){
incompatibleEdges.addAll(currentMaxPaths.get(m).pathOfEdges);
CNetworkPath currentMaxPath = currentMaxPaths.get(m);
// Go through all the nodes
for(CNetworkNode currentNodeCopy : currentMaxPath.pathOfNodes){
if(currentNodeCopy.consideration == false){
currentNodeCopy.consideration = true;
nodesToCopy.add(currentNodeCopy);
}
}
}
// remove the considered flag from the node:
for(CNetworkNode currentNodeClean : nodesToCopy){
currentNodeClean.consideration = false;
}
}
}
// actually call the function that copies the nodes
CopyNode(nodesToCopy,addSplit,network,incompatibleEdges);
// and add the new split at last to the network.
network.splits.add(addSplit);
}
}
// fill in the network positions (hopefully)...
network.FindPositions(noOfTaxa);
//printDetails(tree,network);
return network;
}
/**
* Copy the list of nodes that need copying when adding a new split into a network.
*
* @param nodesToCopy ArrayList of nodes in the network that need to be copied
* @param network Network we are working in
* @param incompatibleEdges Edges that are in the path that joins up the nodes that are being copied
* @param splitToAdd Split that we are currently adding in by copying these nodes
*
*/
public void CopyNode(ArrayList<CNetworkNode> nodesToCopy, CNetworkSplit splitToAdd, CNetwork network, ArrayList<CNetworkEdge> incompatibleEdges){
// Copy Node:
for(CNetworkNode nodeToCopy: nodesToCopy){
CNetworkNode newNode = new CNetworkNode();
// create a new node with a name that indicates it has been added (we can output node names by modifying later code)
newNode.Taxaname = "added0"+Integer.toString(network.nodes.size());
// below should NEVER happen... but will leave in just in case as was at one time an issue:
if (nodeToCopy == null || newNode == null){
System.out.println("Null pointer in terms of node that we want to copy to...");
}
// Reference the new node on the old one
nodeToCopy.CopiedToNode = newNode;
// Add new node to network
network.nodes.add(newNode);
// Add an edge between old and new node
CNetworkEdge edgeToAdd = new CNetworkEdge();
edgeToAdd.networkNodeA = newNode;
edgeToAdd.networkNodeB = nodeToCopy;
edgeToAdd.split = splitToAdd;
splitToAdd.edges.add(edgeToAdd);
}
// Go through and move links over depending on which side the splits are a subset of.
for(CNetworkNode nodeToCopy: nodesToCopy){
// look at each join on each old node
for(int i=0;i<nodeToCopy.joins.size();i++){
CNetworkEdge edgeToConsider = nodeToCopy.joins.get(i);
// if it joins another copied node then we will duplicate the edge later...
if(edgeToConsider.networkNodeA.CopiedToNode == null || edgeToConsider.networkNodeB.CopiedToNode == null){
//if on "0" side of new split then keep it, otherwise move it to the new node.
if(splitToAdd.OnZeroSide(edgeToConsider.split,noOfTaxa)==false){
// Replace the appropriate edge reference...
if(edgeToConsider.networkNodeA == nodeToCopy){
edgeToConsider.networkNodeA = nodeToCopy.CopiedToNode;
//System.out.println("moved link to "+edgeToConsider.networkNodeB.Taxaname+" from "+nodeToCopy.Taxaname+" to "+nodeToCopy.CopiedToNode.Taxaname);
}
else if(edgeToConsider.networkNodeB == nodeToCopy){
edgeToConsider.networkNodeB = nodeToCopy.CopiedToNode;
//System.out.println("moved link to "+edgeToConsider.networkNodeA.Taxaname+" from "+nodeToCopy.Taxaname+" to "+nodeToCopy.CopiedToNode.Taxaname);
}
// Add it to the new node list
nodeToCopy.CopiedToNode.joins.add(edgeToConsider);
// delete it from the list of the old nodes:
nodeToCopy.joins.remove(i);
// go back one in the list!
i--;
}
}
}
}
// Add in the edges between copied nodes to the old nodes to their node join lists now we have done all the moving about!
for(CNetworkEdge edgeToLink : splitToAdd.edges){
edgeToLink.networkNodeA.joins.add(edgeToLink);
edgeToLink.networkNodeB.joins.add(edgeToLink);
}
// Add in the edges to the copied nodes that are in the list of incompatible splits.
for(CNetworkEdge incompatibleEdge : incompatibleEdges){
// Check to see if we have an edge that is incompatible (this SHOULD always be the case... )
if(incompatibleEdge.networkNodeA.CopiedToNode !=null && incompatibleEdge.networkNodeB.CopiedToNode != null){
// Create new edge
CNetworkEdge newEdge = new CNetworkEdge();
// Update node info.
newEdge.networkNodeA = incompatibleEdge.networkNodeA.CopiedToNode;
newEdge.networkNodeB = incompatibleEdge.networkNodeB.CopiedToNode;
newEdge.networkNodeA.joins.add(newEdge);
newEdge.networkNodeB.joins.add(newEdge);
// Update split info.
newEdge.split = incompatibleEdge.split;
incompatibleEdge.split.edges.add(newEdge);
}
}
// Now remove the CopiedToNode link now we are done...
for(CNetworkNode nodeToCopy: nodesToCopy){
nodeToCopy.CopiedToNode = null;
}
}
/**
* Find the taxon node in the network.
*
* @param network The network to find the taxon in
* @param taxonRef Integer representation of taxon in the TaxaMap
*
*/
public CNetworkNode TaxonRefToNode(int taxonRef, CNetwork network){
//function to return the node that is of the taxon
for(CNetworkNode currentNode : network.nodes){
if(currentNode.Taxaname == taxa.getName(taxonRef)){
return currentNode;
}
}
// Notify that method failed... return null...
System.out.println("Error - taxon reference not found. Maybe duplicate names?");
return null;
}
public static String[] readTreesFromFile(String fileName, int n) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String[] trees = new String[n];
for (int i = 0; i < n; i++) {
trees[i] = reader.readLine();
}
return trees;
}
private static void printUsage() {
System.out.println("Usage: java CTMain <file containing trees> <no. of trees>");
}
public static void main(String[] args) throws IOException {
if (args.length != 2) {
printUsage();
System.exit(-1);
}
//CTMain main = new CTMain();
//String[] trees = readTreesFromFile(args[0], Integer.parseInt(args[1]));
//System.out.println(main.start(trees));
}
// Getters and setters
public double getResPercentage() {
return resPercentage;
}
public void setResPercentage(double resPercentage) {
this.resPercentage = resPercentage;
}
public long getSeed() {
return seed;
}
public void setSeed(long seed) {
this.seed = seed;
}
//THE following 3 functions are used for testing only....
/**
* FOR TESTING ONLY
* Function called to initialise the Network Tester hash table & rest of CTMain
*
* @param noOfTestTaxa Number of taxa
* @param noOfTestSamples number of trees input
*
*/
public void InitialiseNetworkTester(int noOfTestTaxa,int noOfTestSamples){
// TaxaMap initialisation
noOfTrees = 0;
noOfTaxa = noOfTestTaxa;
noOfSamples = noOfTestSamples;
// Hash initialisation
hashUtils = new HashUtils();
hashUtils.initialize(noOfTaxa, noOfSamples, C, seed);
hashTable = new HashTable(hashUtils.m1);
leafEdgeLengths = new double[noOfTaxa];
taxa = new TaxaMap(noOfTaxa);
for (int i = 0; i < noOfTaxa; i++) {
taxa.put(""+i, i);
leafEdgeLengths[i] = 1;
}
// Adds a single star partition, once and for all.
BitSet star = new BitSet(noOfTaxa);
star.flip(0, noOfTaxa);
HashEntry entry = new HashEntry(-1, star, 0.0d);
entry.count = noOfSamples+1;
partitions.add(entry);
updateInterestThreshold();
}
/**
* FOR TESTING ONLY
* Add a tree to the hash table
*
* @param splits Tree to add consisting of an arraylist of BitSet splits
*
*/
public void addTestTree(ArrayList<BitSet> splits){
noOfTrees++;
updateInterestThreshold();
// Hashes the partitions of the trees.
noOfPartitions = 0;
for(BitSet partition : splits){
addTestSplit(partition);
}
}
/**
* FOR TESTING ONLY
* Add a single split to the hash table
*
* @param partition BitSet split to add
*
*/
public void addTestSplit(BitSet partition){
long tableKey2 = 0;
long bucketKey2 = 0;
// always calculate the split directly
for (int k=0;k<noOfTaxa;k++) {
if(partition.get(k)==true){
tableKey2 += hashUtils.a1[k];
bucketKey2 += hashUtils.a2[k];
}
}
hashTable.put(partition, 1, (int) (tableKey2 % hashUtils.m1), (int) (bucketKey2 % hashUtils.m2), interestThreshold,partitions);
}
/**
* FOR TESTING ONLY
* Print details from the network - call after it has been constructed!
*
* @param tree Standard consensus tree
* @param network The network
*
*/
public void printDetails(CTree tree,CNetwork network){
// Print out the clusters:
System.out.println("ALL CLUSTERS:");
for(int i=0;i<tree.clusters.size();i++){
System.out.println(i+" "+tree.clusters.get(i).aboveSplit.toString()+" "+tree.clusters.get(i).noOfOccurrences);
}
// Print out the tree copied into network format:
System.out.println("TREE IN NETWORK, splits:");
for(int i=0;i<network.splits.size();i++){
System.out.println(i+" "+network.splits.get(i).split.toString()+" "+network.splits.get(i).noOfOccurences);
for(int j=0;j<network.splits.get(i).edges.size();j++){
System.out.println(j+" "+network.splits.get(i).edges.get(j).networkNodeA.Taxaname+" to "+network.splits.get(i).edges.get(j).networkNodeB.Taxaname);
}
}
System.out.println("The following comes from the network format...");
System.out.println("NODES:");
for (int i = 0; i< network.nodes.size();i++){
System.out.println(""+network.nodes.get(i).hashCode()+" "+network.nodes.get(i).Taxaname);
}
System.out.println("EDGES:");
for (int i = 0; i < network.splits.size(); i++){
for(int j=0; j<network.splits.get(i).edges.size();j++){
System.out.println(network.splits.get(i).split.toString()+": "
+network.splits.get(i).edges.get(j).networkNodeA.Taxaname+" TO "+
network.splits.get(i).edges.get(j).networkNodeB.Taxaname);
}
}
System.out.println("The following comes from the original tree...(!)");
for (int i = 1; i < tree.clusters.size(); i++) {
Cluster cluster2 = tree.clusters.get(i);
//if(cluster2.isMajority == false){
network.outputString = "1 ";
for(TreeNode node :cluster2){
network.outputString += " " + (taxa.get(node.name)+1);
}
network.outputString += ",";
System.out.println(network.outputString);
//}
}
//printing to a file
try {
network.PrintOut(1,noOfTaxa,taxa);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//network in nex format
try {
network.PrintOut(2,noOfTaxa,taxa);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return;
}
}