CTMain.java example

Explorer
statalign-master
- src
package statalign.postprocess.plugins.contree;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;

import statalign.postprocess.plugins.TreeNode;
import statalign.postprocess.plugins.contree.hash.HashEntry;
import statalign.postprocess.plugins.contree.hash.HashTable;
import statalign.postprocess.plugins.contree.hash.HashUtils;
/**
 * The main thread for calculating consensus trees and networks.
 * Note that networks is dependent on the initial tree...
 * 
 * @author wood, eiriksson
 *
 */
public class CTMain {

    // Constants

    /** The default double collision constant - the larger, the better. */
    private final static int C = 1000;

    /** The default resolution rate in percentage. 
     * 33.34 - splits must occur in over a third of the trees for it to ALLWAYS be possible to draw in 2D, no matter what the input TREES are.
     * */
    private final static double DEFAULT_RES_PERCENTAGE = 33.34; 


    // Variables
    private int noOfSamples;                // Number of samples.
    private int noOfTaxa;                   // Number of taxa.
    private double resPercentage;           // The resolution rate percentage (0 < x <= 100). 
    private long seed;                      // The seed used in the random number generator.
    private TaxaMap taxa;                   // HashMap containing mapping from taxon names -> indices

    private HashUtils hashUtils;            // Holds stuff concerning the hashing.
    private HashTable hashTable;            // The hash table.

    private int noOfTrees;                  // Current number of trees.
    private int majorityThreshold;          // Declares in how many trees partitions need to appear to be a majority partition.
    private double interestThreshold;          // Declares in how many trees partitions need to appear to be of interest. 

    private LinkedList<HashEntry> partitions;// Holds the partitions of interest. 
    private int noOfPartitions;             // Holds the current number of partitions.

    private double[] leafEdgeLengths;		// Array of leaf edge lengths
    
    private TreeNode root; 	// The tree root of the input tree.

    // Functions
    /**
     * Initialise the class
     */
    public CTMain() {
        // Initializes some defaults (can be changed via setters).
        Random random = new Random();
        this.resPercentage = DEFAULT_RES_PERCENTAGE;
        this.seed = random.nextLong();

        // Object initialisation 
        partitions = new LinkedList<HashEntry>();
    }
    /**
     * Print some configuration information
     *
     */
    public void printConfig() {
        System.out.printf("Number of taxa: %d - ", noOfTaxa);
        System.out.printf("Number of trees: %d - ", noOfTrees);
        System.out.printf("Resolution: %d\n", interestThreshold); 
    }
    /**
     * Initialise a CTMain before usage with a first tree, going through and setting up the hash table etc.
     * 
     * @param root Root of the initial tree to initialise with
     * @param noOfSamples Current number of samples taken
     *
     */
    public void initialize(TreeNode root, int noOfSamples) {
        // Parameter initialisation
        this.noOfSamples = noOfSamples;
        noOfTrees = 0;
        // TaxaMap initialisation
        List<TreeNode> leaves = root.getLeaves();
        noOfTaxa = leaves.size();
        // Hash initialisation
        hashUtils = new HashUtils();
        hashUtils.initialize(noOfTaxa, noOfSamples, C, seed);
        hashTable = new HashTable(hashUtils.m1);
        //Taxamap initialisation
        taxa = new TaxaMap(noOfTaxa);
        for (int i = 0; i < leaves.size(); i++) {
            taxa.put(leaves.get(i).name, i);
        }
        leafEdgeLengths = new double[noOfTaxa];
        // Adds a single star partition, once and for all.
        BitSet star = new BitSet(noOfTaxa);
        star.flip(0, noOfTaxa);
        HashEntry entry = new HashEntry(-1, star, 0.0d);
        entry.count = noOfSamples + 1;
        partitions.add(entry);
        // Majority threshold initialisation
        updateInterestThreshold(); 
    } 
    /**
     * Simply update our interest threshold
     */
    // Update the Interest Threshold
    public void updateInterestThreshold() { 
    	interestThreshold = (double) ((double) noOfTrees * (resPercentage / 100.0d)); 
	}
    /**
     * Create partitions from an input tree - recursive so will be called by many nodes, beginning with root but with calculations actually beginning on leaves.
     *  
     * @param node Node in tree with above split to add to the hash table
     *
     */
	private BitSet createPartitions(TreeNode node) { 
		if (node.isLeaf()) { // Leaf node. 
        	int index = taxa.get(node.name);
        	// leaf simply has it's keys stored in the hash utilities
            node.addProperty("tableHashKey", hashUtils.a1[index]);
            node.addProperty("bucketHashKey", hashUtils.a2[index]);
            // Updates the edge length array.
            leafEdgeLengths[index] += node.edgeLength; 
            // Create a new partition from scratch to represent it
            BitSet partition = new BitSet(noOfTaxa); 
            partition.set(index);
            assert partition.cardinality() == 1 : "There should be exactly a single bit set."; 
            return partition;
		} else { // An internal node: Traverses the tree in post order. 
        	BitSet partition = new BitSet(noOfTaxa);
        	// Get the node's partition representation from its children
        	List<TreeNode> children = node.children;
        	for (TreeNode child : children) {
                partition.or(createPartitions(child)); 
        	}
        	// if this node is NOT to the right side of the root then add it... (Avoid adding splits twice for CNetworks)
        	if (node.parent != root || root.getRight() != node) { 
                noOfPartitions++; 
                long tableKey = 0; 
                long bucketKey = 0; 
                long tableKey2 = 0; 
                long bucketKey2 = 0; 
                // Calculate the hash keys for this partition
                for (TreeNode child : children) { 
                    tableKey += child.getIntProperty("tableHashKey");
                    bucketKey += child.getIntProperty("bucketHashKey");
                } 
                // if first is one then we need to store the flipped version so we store each split in one representation only.
                if (partition.get(0) == true) { 
                    // copy to a new partition that is the flipped version
                    BitSet partitionF = new BitSet(noOfTaxa); 
                    for(int l=0;l<noOfTaxa;l++){ 
                    	if(partition.get(l)==false)partitionF.set(l); 
                    } 
                    // calculate the hash keys for the flipped partition...
                    for (int k=0;k<noOfTaxa;k++) { 
                        if(partitionF.get(k)==true){ 
                        	tableKey2 += hashUtils.a1[k]; 
                        	bucketKey2 += hashUtils.a2[k]; 
                        } 
                    } 
                    // store the properties in the node
                    node.addProperty("tableHashKey", (int) (tableKey2 % hashUtils.m1));  
                    node.addProperty("bucketHashKey", (int) (bucketKey2 % hashUtils.m2));
                    if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition 
                            hashTable.put(partitionF, node.edgeLength,node.getIntProperty("tableHashKey"), node.getIntProperty("bucketHashKey"), interestThreshold, partitions); 
                    } 
                    // remember to still return the original partition with its appropriate keys...
                    node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1));  
                    node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2)); 
                    return partition; 
                } 
                // if first is zero then simply add the partition, recursively calculating the hash
                else{ 
                    node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1));  
                    node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2)); 
                    if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition 
                    	hashTable.put(partition, node.edgeLength, node.getIntProperty("tableHashKey"),node.getIntProperty("bucketHashKey"), interestThreshold, partitions); 
                    } 
                    return partition; 
                } 
        	} 
        	// still return the partition even if node was not added as was right of root...
        	return partition; 
		} 
	} 
	 /**
     * Add a new tree to the hash table
     * 
     * @param root Root of new tree to add
     *
     */
    public void addNewTree(TreeNode root) {
        // Updates the number of trees and the threshold.
        noOfTrees++;
        updateInterestThreshold();

        // Hashes the partitions of the trees.
        noOfPartitions = 0;
        this.root = root; 
        createPartitions(root);
    }
    /**
     * Create partitions from an input tree - recursive so will be called by many nodes, beginning with root but with calculations actually beginning on leaves.
     *  
     * @param partitions Partitions in the form of entries in the hash table
     * @param curInterestPercentage Current percentage of interest that we want splits to occur above to view in the network later
     *
     */
	private ArrayList<Cluster> constructClusters(LinkedList<HashEntry> partitions, double curInterestPercentage) {
		ArrayList<Cluster> clusters = new ArrayList<Cluster>();
		// Thresholds for the current run of the cluster builder (c.f. the threshold for the partitions list).
		double curInterestThreshold = (double) (noOfTrees * (curInterestPercentage / 100.0d));
		int majInterestThreshold = (int) (noOfTrees * (50.0 / 100.0d));
		for (Iterator<HashEntry> it = partitions.iterator(); it.hasNext();) {
			HashEntry entry = it.next();
			// Checks if this partition is still above threshold... partition.
			// If not: remove it - O(1).
			if ((double)entry.count <= curInterestThreshold) {
				// correct the isMajority flag to now refer to majority not if of interest or not..
				if (entry.count <= majInterestThreshold) {
					entry.isMajority = false;
				}
				it.remove();
				continue;
			}
			// Constructs clusters (list of TreeNode's for each set bit) from each partition.
			if ((double)entry.count > curInterestThreshold) {
				Cluster cluster = new Cluster();
				if (entry.count > majInterestThreshold) {
					cluster.isMajority = true;
				}
				cluster.aboveSplit = entry.partition;
				cluster.noOfOccurrences = entry.count;
				cluster.edgeLength = entry.edgeLengthsSum / entry.count;
				for (int i = 0; i < entry.partition.size(); i++) {
					if (entry.partition.get(i)) {
						TreeNode node = new TreeNode(taxa.getName(i));
						node.edgeLength = leafEdgeLengths[i] / noOfTrees;
						cluster.add(node);
					}
				}
				clusters.add(cluster);
			}
		}
		// Sort by number of taxa.
		// TODO: This might obviously be optimized a bit, e.g. with a PriorityQueue. - Eiriksson
		Collections.sort(clusters);
		return clusters;
	}
    /**
     * Constructs the majorityTree.  Used to make Network so remember to call this first!  
     */
    public CTree constructMajorityTree() {
        int numOfNode = 0;
        CTree tree = new CTree();
        // Creates the clusters.
        tree.clusters = constructClusters(partitions,resPercentage);


        // Begins by constructing the star tree.

        for (TreeNode node : tree.clusters.get(0)) {
            TreeNode root = tree.getRoot();                    // Retrieves the root,
            node.parent = root;                                 // Parent of this node -> root.
            root.children.add(node);                            // Adds this node as the children of the root.
            tree.nodeList.add(node);                            // Adds this node to the list of nodes.
            assert tree.nodeList.get(0).name.equals("root");
            tree.parentList.put(node.name, 0);                  // Adds "this node -> root" parent mapping.
        }

        // Constructs internal nodes for the rest of the majority bi-partitions and rewires them.

        for (int z = 1; z < tree.clusters.size(); z++) {
            Cluster cluster = tree.clusters.get(z);
            // only take the majority ones....!
            if(cluster.isMajority == true){
	
	            // 1. Retrieves the parent of the first node in this cluster.
	            TreeNode parent = tree.nodeList.get(tree.parentList.get(cluster.get(0).name));
	
	            // 2. Constructs a new internal node.
	            String nodeName = "int" + Integer.toString(numOfNode);
	            TreeNode internalNode = new TreeNode(nodeName);
	            internalNode.addProperty("noOfOccurrences", cluster.noOfOccurrences);	            
	            internalNode.edgeLength = cluster.edgeLength;
	            internalNode.parent = parent;
	
	            // 3. Insert the new node into the node list.
	            tree.nodeList.add(internalNode);
	            assert tree.nodeList.get(tree.nodeList.size() - 1).name.equals(internalNode.name);
	            tree.parentList.put(nodeName, tree.nodeList.size() - 1);
	            
	            // update the clusters node references...
	            // (use a method of storing edges that referenced the positions in the Tree's nodelists for reference later.)
	            tree.clusters.get(z).nodeRefA = tree.parentList.get(cluster.get(0).name);
	            tree.clusters.get(z).nodeRefB = (tree.nodeList.size()-1);        
	            
	            for (TreeNode node : cluster) {
	                // 4. Makes this node the child of the new internal node.
	                node.parent = internalNode;
	                assert node.parent.name.equals(tree.nodeList.get(tree.nodeList.size() - 1).name);
	                tree.parentList.put(node.name, tree.nodeList.size() - 1);
	                internalNode.children.add(node);
	
	                // 5. Delete the moved node(s) from the parent's children.
	                // TODO: optimize? probably not.
	                for (int i = 0; i < parent.children.size(); i++) {
	                    if (parent.children.get(i).name.equals(node.name)) {
	                        parent.children.remove(i);
	                        break;
	                    }
	                }
	            }
	            // Wires up the internal node.
	            parent.children.add(internalNode);
	            numOfNode++;
            }
        }
        return tree;
    }
    
    /**
     * Constructs the network beginning with the consensus tree and then calls the network drawing function to calculate positions
     * 
     * @param tree Consensus tree already calculated
     */
    public CNetwork constructNetwork(CTree tree){
    	// Create a new network...
    	CNetwork network = new CNetwork(noOfTaxa);
        // Let's copy over all of the splits from the tree into the network format.
    	// Until noted, the node lists in tree and network MUST match and the first lot of nodes (after the root) in the tree are assumed to be the taxa.
    	for(int i=0;i<tree.nodeList.size();i++){
    		TreeNode treeNode = tree.nodeList.get(i);
    		// create a new node
    		CNetworkNode networkNode = new CNetworkNode();
    		// copy over the required data and add it...
    		networkNode.Taxaname = treeNode.name;
    		network.nodes.add(networkNode);  		
    	}
    	// Now add in the splits... (ignore 1st cluster as is for star tree)
    	for(int i=1;i<tree.clusters.size();i++){
    		Cluster cluster = tree.clusters.get(i);
    		if (cluster.isMajority == true){
	    		CNetworkSplit networkSplit = new CNetworkSplit();
	    		CNetworkEdge networkEdge = new CNetworkEdge();
	    		networkEdge.split = networkSplit;
	    		networkEdge.networkNodeA = network.nodes.get(cluster.nodeRefA);
	    		networkEdge.networkNodeB = network.nodes.get(cluster.nodeRefB);
	    		networkSplit.edges.add(networkEdge);
	    		// Maybe we shouldn't be copying this over... but it at least clarifies things in code.
	    		networkSplit.edgelength = cluster.edgeLength;
	    		networkSplit.noOfOccurences = cluster.noOfOccurrences;
	    		networkSplit.split = cluster.aboveSplit;
	    		network.nodes.get(cluster.nodeRefA).joins.add(networkEdge);
	    		network.nodes.get(cluster.nodeRefB).joins.add(networkEdge);
	    		network.splits.add(networkSplit);
	    		cluster.added = true;
    		}
    	}
    	// Now add in the taxa length splits.... NOTE i=0 is root?!  Tried and tested anyway...
    	for(int i=1;i<=noOfTaxa;i++){
    		CNetworkSplit networkSplit = new CNetworkSplit();
    		CNetworkEdge networkEdge = new CNetworkEdge();
    		networkEdge.split = networkSplit;
    		int taxonParent = tree.parentList.get(tree.nodeList.get(i).name);
    		networkEdge.networkNodeA = network.nodes.get(taxonParent);
    		networkEdge.networkNodeB = network.nodes.get(i);
    		networkSplit.edges.add(networkEdge);
    		networkSplit.edgelength = tree.nodeList.get(i).edgeLength;
    		networkSplit.noOfOccurences = noOfTrees;
    		BitSet trivialSplit = new BitSet(noOfTaxa);
    		trivialSplit.set(i-1);
    		networkSplit.split = trivialSplit;
    		network.nodes.get(taxonParent).joins.add(networkEdge);
    		network.nodes.get(i).joins.add(networkEdge);
    		network.splits.add(networkSplit);
    	}
    	// NOTE: From now on the indices on Network & Tree may no longer match!!
    	// Remove the root if it is simply disecting an edge (only 2 edges on either side).
		// NOTE: root is assumed to be the first position in the node list in CNetwork and CTree
    	if(network.nodes.get(0).joins.size() == 2){
    		// find the nodes on either side of the root...
    		CNetworkNode[] redundant = new CNetworkNode[2];
    		for(int h=0;h<2;h++){
				if(network.nodes.get(0).joins.get(h).networkNodeA == network.nodes.get(0)){
					redundant[h] = network.nodes.get(0).joins.get(h).networkNodeB;
				}
				else{
					redundant[h] = network.nodes.get(0).joins.get(h).networkNodeA;
				}
    		}
    		// correct the edge in the one list to go to the zero node.
    		if(network.nodes.get(0).joins.get(1).networkNodeA == network.nodes.get(0)){
    			network.nodes.get(0).joins.get(1).networkNodeA = redundant[0];
    		}
    		else{
    			network.nodes.get(0).joins.get(1).networkNodeB = redundant[0];
    		}
    		// find the edge in the list in zero and remove it..
    		for(int h=0;h<redundant[0].joins.size();h++){
    			if(redundant[0].joins.get(h).networkNodeA == network.nodes.get(0) || redundant[0].joins.get(h).networkNodeB == network.nodes.get(0)){
    				redundant[0].joins.remove(h);
    				h--;
    			}
    		}
    		// remove the edge from the split list
    		for(int h=0;h<network.nodes.get(0).joins.get(0).split.edges.size();h++){
    			if(network.nodes.get(0).joins.get(0).split.edges.get(h) == network.nodes.get(0).joins.get(0)){
    				network.nodes.get(0).joins.get(0).split.edges.remove(h);
    				h--;
    			}
    		}
    		// add the other edge in the zero list
    		redundant[0].joins.add(network.nodes.get(0).joins.get(1));
    		// remove the node at last, and any reference to the removed edge also goes!
    		network.nodes.remove(0);
    	}

    	// TODO: Optional: sort the list in terms of frequency of occurrence to select only most frequent if less than 33.34% occurences....!
        //IGNORE the first one as it is the star network...
    	//Now let's go through the remaining splits and try and add them into the network.
    	for(int i=0;i<tree.clusters.size();i++){
    		Cluster cluster = tree.clusters.get(i);
    		// for clusters that haven't been added yet, let's add them...
    		if (cluster.added == false){
    			//define new split and copy over info.
	    		CNetworkSplit addSplit = new CNetworkSplit(); 
	    		addSplit.split = cluster.aboveSplit;
	    		addSplit.edgelength = cluster.edgeLength;
	    		// now findout if is incompatible with any existing splits in the network
	    		ArrayList<CNetworkSplit> incompatibleSplits = new ArrayList<CNetworkSplit>();
	    		for(CNetworkSplit compSplit : network.splits){
		    		if(compSplit.isCompatible(addSplit,noOfTaxa)==false){
		    			incompatibleSplits.add(compSplit);
		    		} 			
	    		}
	    		// Now we need to find the nodes we need to copy, so create a list of them
	    		ArrayList<CNetworkNode> nodesToCopy = new ArrayList<CNetworkNode>();
    			// go through the incompatible splits one by one and add them to chains.
    			ArrayList<CNetworkPath> chains = new ArrayList<CNetworkPath>();
    			// note all nodes that will be stored in paths for more efficient path claering up
    			ArrayList<CNetworkNode> nodesToClearUp = new ArrayList<CNetworkNode>();
    			// Loop through incompatible splits
    			for(CNetworkSplit currentSplit : incompatibleSplits){
    				// Loop through all the edges in this split
    				for(CNetworkEdge currentEdge : currentSplit.edges){
    					// create a new path and add it to the list...
						CNetworkPath newPath = new CNetworkPath();
						newPath.pathOfEdges.add(currentEdge);
						newPath.pathOfNodes.add(currentEdge.networkNodeA);
						newPath.pathOfNodes.add(currentEdge.networkNodeB);
						currentEdge.networkNodeA.AddToPaths(newPath,nodesToClearUp);
						currentEdge.networkNodeB.AddToPaths(newPath,nodesToClearUp);
						chains.add(newPath);
						// Now extend any existing paths...
    					// create a list of paths to extend found paths with...
    	    			ArrayList<CNetworkPath> extendPathsWith = new ArrayList<CNetworkPath>();
    	    			extendPathsWith.add(newPath);
    					// Hack to go from NodeA & NodeB to a for loop...
    					CNetworkNode[] edgeNodes = new CNetworkNode[]{currentEdge.networkNodeA, currentEdge.networkNodeB};
    					// For the first node, add the standard
    					 for(int l=0; l<=1;l++){
							// get ready to make this part of the nodes with paths to clear up... (should be already done?)
							if(edgeNodes[l].paths.isEmpty()==true){
								nodesToClearUp.add(edgeNodes[l]);
							}
    						// Look at each path that the node is involved in...
							int pathCount = edgeNodes[l].paths.size();
	    					for(int g= 0; g <pathCount; g++){
	    						 CNetworkPath currentPath = edgeNodes[l].paths.get(g);
	    						 // for the case that we may want to add the edge to the start/end of a path on the node...
								 if(currentPath.endNode()==edgeNodes[l] || currentPath.startNode()==edgeNodes[l]){
									 //check we haven't already added this split yet...
									 if (currentPath.checkForSplit(currentEdge)==false){
										 //create a new combined path with all paths to extend with...
										 //Don't forget to make this add the new path to all the nodes as well as combining over the correct node & removing one instance of it...
										for(CNetworkPath pathToAdd : extendPathsWith){
											chains.add(currentPath.combineWith(pathToAdd,edgeNodes[l]));
										}
									 }
								 }
	    					 }
    					 }
    				}
    			}
    			for(CNetworkNode nodeToClearUp : nodesToClearUp){
    				nodeToClearUp.paths.clear();
    			}
    			ArrayList<CNetworkEdge> incompatibleEdges = new ArrayList<CNetworkEdge>();
    			//Case that there are no incompatible splits... just add in the split!
    			// Note that SOME splits appear to contain the whole tree (perhaps due to a hack earlier), so lets just ignore these....  This could be described as a hack induced hack...
    			if(incompatibleSplits.isEmpty() == true && (addSplit.split.cardinality() != noOfTaxa && addSplit.split.cardinality() !=0)){
    				// we have no idea which taxon is actually on the side we want, but we will use some cool functions of BitSet to help reduce the time it takes...
    				CNetworkNode beginNode;
    				// Start searching from the adding side of the split containing zeros?
    				boolean zeroSide;    				
    				if (addSplit.split.cardinality() <= (noOfTaxa)/2){
    					// if there are more set to 0 than 1 go from a taxon on the 1 side:
    					// Search through the nodes for this using predefined function...
    					beginNode = TaxonRefToNode(addSplit.split.nextSetBit(0), network);
    					zeroSide = false;
    				}
    				else{
    					// if there are more set to 1 than 0, then go from a taxon on the 0 side:
    					beginNode = TaxonRefToNode(addSplit.split.nextClearBit(0), network);
    					zeroSide = true;
    				}
    				// recursively go along the edges until a node is found that has an edge that satisfies.
    				ArrayList<CNetworkNode> consideredNodes = new ArrayList<CNetworkNode>();
    				CNetworkNode CopyNode2 = beginNode.findFirstNonSubset(addSplit,consideredNodes, zeroSide,noOfTaxa);
    				nodesToCopy.add(CopyNode2);
    			}
    			else{
    				//Copy over the list of nodes from the longest path...ERROR if we have a too big path...
    				if(chains.size()>0){
    					// Store the max paths...
    					ArrayList<CNetworkPath> currentMaxPaths = new ArrayList<CNetworkPath>();
    					// initialise with first one, which only serves as to store length...:
    							currentMaxPaths.add(chains.get(0));
	    				for(CNetworkPath chain : chains){
	    					if(chain.pathOfNodes.size()>currentMaxPaths.get(0).pathOfNodes.size()){
	    						currentMaxPaths.clear();
	    						currentMaxPaths.add(chain);
	    					}
	    					// Yes, still add it if we already added it!
	    					if(chain.pathOfNodes.size()==currentMaxPaths.get(0).pathOfNodes.size()){
	    						currentMaxPaths.add(chain);
	    					}
	    				}
	    				// now remove any max path that requires all edges to be copied or none at all...
	    				for(int m=1;m<currentMaxPaths.size();m++){
	    					if(currentMaxPaths.get(m).isSplitSide(addSplit, noOfTaxa)==false){
	    						currentMaxPaths.remove(m);
		    					m--;
	    					}
	    				}
	    				if(currentMaxPaths.size()>2){
	        				//ERROR!!! Now in >2d....!
	    					System.out.println("Potential Error: Network now in > 2 dimensions....!");
	        			}
	    				// Now create the copyNode list:
	    				// Store those that have been copied.... err
	    				// Go through all the paths
	    				// DOES START AT 1 as first just stored length!
	    				for(int m=1;m<currentMaxPaths.size();m++){
	    					incompatibleEdges.addAll(currentMaxPaths.get(m).pathOfEdges);
	    					CNetworkPath currentMaxPath = currentMaxPaths.get(m);
	    					// Go through all the nodes
	    					for(CNetworkNode currentNodeCopy : currentMaxPath.pathOfNodes){
	    						if(currentNodeCopy.consideration == false){
	    							currentNodeCopy.consideration = true;
	    							nodesToCopy.add(currentNodeCopy);
	    						}
	    					}
	    				}
	    				// remove the considered flag from the node:
	    				for(CNetworkNode currentNodeClean : nodesToCopy){
	    					currentNodeClean.consideration = false;
	    				}
    				}

    			}
    			// actually call the function that copies the nodes
    			CopyNode(nodesToCopy,addSplit,network,incompatibleEdges);
    			// and add the new split at last to the network.
    			network.splits.add(addSplit);
    		}
    	}
        // fill in the network positions (hopefully)...
        network.FindPositions(noOfTaxa);
        //printDetails(tree,network);
    	return network;
    }

    
    /**
     * Copy the list of nodes that need copying when adding a new split into a network.
     *  
     * @param nodesToCopy ArrayList of nodes in the network that need to be copied
     * @param network Network we are working in
     * @param incompatibleEdges Edges that are in the path that joins up the nodes that are being copied
     * @param splitToAdd Split that we are currently adding in by copying these nodes
     *
     */
    public void CopyNode(ArrayList<CNetworkNode> nodesToCopy, CNetworkSplit splitToAdd, CNetwork network, ArrayList<CNetworkEdge> incompatibleEdges){
    	// Copy Node:
    	for(CNetworkNode nodeToCopy: nodesToCopy){
    		CNetworkNode newNode = new CNetworkNode();
    		// create a new node with a name that indicates it has been added (we can output node names by modifying later code)
    		newNode.Taxaname = "added0"+Integer.toString(network.nodes.size());
    		// below should NEVER happen... but will leave in just in case as was at one time an issue:
	    	if (nodeToCopy == null || newNode == null){
	    		System.out.println("Null pointer in terms of node that we want to copy to...");
	    	}
	    	// Reference the new node on the old one
    		nodeToCopy.CopiedToNode = newNode;
    		// Add new node to network
    		network.nodes.add(newNode);
    		// Add an edge between old and new node
    		CNetworkEdge edgeToAdd = new CNetworkEdge();
    		edgeToAdd.networkNodeA = newNode;
    		edgeToAdd.networkNodeB = nodeToCopy;
    		edgeToAdd.split = splitToAdd;
    		splitToAdd.edges.add(edgeToAdd);
    	}
    	// Go through and move links over depending on which side the splits are a subset of.
    	for(CNetworkNode nodeToCopy: nodesToCopy){
    		// look at each join on each old node
			for(int i=0;i<nodeToCopy.joins.size();i++){
    			CNetworkEdge edgeToConsider = nodeToCopy.joins.get(i);
    			// if it joins another copied node then we will duplicate the edge later...
    			if(edgeToConsider.networkNodeA.CopiedToNode == null || edgeToConsider.networkNodeB.CopiedToNode == null){
            		//if on "0" side of new split then keep it, otherwise move it to the new node.
    				if(splitToAdd.OnZeroSide(edgeToConsider.split,noOfTaxa)==false){
    					// Replace the appropriate edge reference...
    					if(edgeToConsider.networkNodeA == nodeToCopy){
    						edgeToConsider.networkNodeA = nodeToCopy.CopiedToNode;
    						//System.out.println("moved link to "+edgeToConsider.networkNodeB.Taxaname+" from "+nodeToCopy.Taxaname+" to "+nodeToCopy.CopiedToNode.Taxaname);
    					}
    					else if(edgeToConsider.networkNodeB == nodeToCopy){
    						edgeToConsider.networkNodeB = nodeToCopy.CopiedToNode;
    						//System.out.println("moved link to "+edgeToConsider.networkNodeA.Taxaname+" from "+nodeToCopy.Taxaname+" to "+nodeToCopy.CopiedToNode.Taxaname);
    					}
    					// Add it to the new node list
						nodeToCopy.CopiedToNode.joins.add(edgeToConsider);
    					// delete it from the list of the old nodes:
						nodeToCopy.joins.remove(i);
						// go back one in the list!
						i--;
    				}
    			}
    		}
    	}
    	// Add in the edges between copied nodes to the old nodes to their node join lists now we have done all the moving about!
    	for(CNetworkEdge edgeToLink : splitToAdd.edges){
    		edgeToLink.networkNodeA.joins.add(edgeToLink);
    		edgeToLink.networkNodeB.joins.add(edgeToLink);
    	}
    	// Add in the edges to the copied nodes that are in the list of incompatible splits.
		for(CNetworkEdge incompatibleEdge : incompatibleEdges){
			// Check to see if we have an edge that is incompatible (this SHOULD always be the case... )
			if(incompatibleEdge.networkNodeA.CopiedToNode !=null && incompatibleEdge.networkNodeB.CopiedToNode != null){
				// Create new edge
				CNetworkEdge newEdge = new CNetworkEdge();
				// Update node info.
				newEdge.networkNodeA = incompatibleEdge.networkNodeA.CopiedToNode;
				newEdge.networkNodeB = incompatibleEdge.networkNodeB.CopiedToNode;
				newEdge.networkNodeA.joins.add(newEdge);
				newEdge.networkNodeB.joins.add(newEdge);
				// Update split info.
				newEdge.split = incompatibleEdge.split;
				incompatibleEdge.split.edges.add(newEdge);
			}
		}
    	// Now remove the CopiedToNode link now we are done...
    	for(CNetworkNode nodeToCopy: nodesToCopy){
    		nodeToCopy.CopiedToNode = null;
    	}
    	
    }
    /**
     * Find the taxon node in the network.
     *  
     * @param network The network to find the taxon in
     * @param taxonRef Integer representation of taxon in the TaxaMap
     *
     */
    public CNetworkNode TaxonRefToNode(int taxonRef, CNetwork network){
    	//function to return the node that is of the taxon
    	for(CNetworkNode currentNode : network.nodes){
    		if(currentNode.Taxaname == taxa.getName(taxonRef)){
    			return currentNode;
    		}
    	}
    	// Notify that method failed... return null...
    	System.out.println("Error - taxon reference not found.  Maybe duplicate names?");
    	return null;
    }

    public static String[] readTreesFromFile(String fileName, int n) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(fileName));
        String[] trees = new String[n];
        for (int i = 0; i < n; i++) {
            trees[i] = reader.readLine();
        }
        return trees;
    }

    private static void printUsage() {
        System.out.println("Usage: java CTMain <file containing trees> <no. of trees>");
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 2) {
            printUsage();
            System.exit(-1);
        }

        //CTMain main = new CTMain();
        //String[] trees = readTreesFromFile(args[0], Integer.parseInt(args[1]));
        //System.out.println(main.start(trees));
    }

    // Getters and setters

    public double getResPercentage() {
        return resPercentage;
    }

    public void setResPercentage(double resPercentage) {
        this.resPercentage = resPercentage;
    }

    public long getSeed() {
        return seed;
    }

    public void setSeed(long seed) {
        this.seed = seed;
    }

    //THE following 3 functions are used for testing only....
    /**
     * FOR TESTING ONLY
     * Function called to initialise the Network Tester hash table & rest of CTMain
     * 
     * @param noOfTestTaxa Number of taxa
     * @param noOfTestSamples number of trees input
     *
     */
    public void InitialiseNetworkTester(int noOfTestTaxa,int noOfTestSamples){
        // TaxaMap initialisation
        noOfTrees = 0;
        noOfTaxa = noOfTestTaxa;
        noOfSamples = noOfTestSamples;
        // Hash initialisation
        hashUtils = new HashUtils();
        hashUtils.initialize(noOfTaxa, noOfSamples, C, seed);
        hashTable = new HashTable(hashUtils.m1);
        leafEdgeLengths = new double[noOfTaxa];
        taxa = new TaxaMap(noOfTaxa);
        for (int i = 0; i < noOfTaxa; i++) {
            taxa.put(""+i, i);
            leafEdgeLengths[i] = 1;
        }


        // Adds a single star partition, once and for all.
        BitSet star = new BitSet(noOfTaxa);
        star.flip(0, noOfTaxa);
        HashEntry entry = new HashEntry(-1, star, 0.0d);
        entry.count = noOfSamples+1;
        partitions.add(entry);
        updateInterestThreshold(); 
	}
    /**
     * FOR TESTING ONLY
     * Add a tree to the hash table
     * 
     * @param splits Tree to add consisting of an arraylist of BitSet splits
     *
     */
    public void addTestTree(ArrayList<BitSet> splits){
        noOfTrees++;
        updateInterestThreshold();
        // Hashes the partitions of the trees.
        noOfPartitions = 0;
        for(BitSet partition : splits){
        	addTestSplit(partition);
        }
	}
    /**
     * FOR TESTING ONLY
     * Add a single split to the hash table
     * 
     * @param partition BitSet split to add
     *
     */
	public void addTestSplit(BitSet partition){
		long tableKey2 = 0; 
        long bucketKey2 = 0; 
        // always calculate the split directly
		for (int k=0;k<noOfTaxa;k++) { 
	        if(partition.get(k)==true){ 
	                tableKey2 += hashUtils.a1[k]; 
	                bucketKey2 += hashUtils.a2[k]; 
	        } 
		} 
		hashTable.put(partition, 1, (int) (tableKey2 % hashUtils.m1), (int) (bucketKey2 % hashUtils.m2), interestThreshold,partitions); 
	}
	
    /**
     * FOR TESTING ONLY
     * Print details from the network - call after it has been constructed!
     * 
     * @param tree Standard consensus tree
     * @param network The network
     *
     */
	public void printDetails(CTree tree,CNetwork network){
    	// Print out the clusters:
       	System.out.println("ALL CLUSTERS:");
    	for(int i=0;i<tree.clusters.size();i++){
    		System.out.println(i+" "+tree.clusters.get(i).aboveSplit.toString()+" "+tree.clusters.get(i).noOfOccurrences);
    	}
    	// Print out the tree copied into network format:
    	System.out.println("TREE IN NETWORK, splits:");
    	for(int i=0;i<network.splits.size();i++){
    		System.out.println(i+" "+network.splits.get(i).split.toString()+" "+network.splits.get(i).noOfOccurences);
        	for(int j=0;j<network.splits.get(i).edges.size();j++){
        		System.out.println(j+" "+network.splits.get(i).edges.get(j).networkNodeA.Taxaname+" to "+network.splits.get(i).edges.get(j).networkNodeB.Taxaname);
        	}
    	}
    	 System.out.println("The following comes from the network format...");
         System.out.println("NODES:");
         for (int i = 0; i< network.nodes.size();i++){
             System.out.println(""+network.nodes.get(i).hashCode()+" "+network.nodes.get(i).Taxaname);
         }
         
         System.out.println("EDGES:");
         for (int i = 0; i < network.splits.size(); i++){
         	for(int j=0; j<network.splits.get(i).edges.size();j++){
         		System.out.println(network.splits.get(i).split.toString()+": "
         	+network.splits.get(i).edges.get(j).networkNodeA.Taxaname+" TO "+
         	network.splits.get(i).edges.get(j).networkNodeB.Taxaname);
         	}
         }
         System.out.println("The following comes from the original tree...(!)");
         for (int i = 1; i < tree.clusters.size(); i++) {
         	Cluster cluster2 = tree.clusters.get(i);
             //if(cluster2.isMajority == false){
 	        	network.outputString = "1 ";
 	        	for(TreeNode node :cluster2){
 	        		network.outputString += " " + (taxa.get(node.name)+1);
 	        	}
 	    		network.outputString += ",";
 	            System.out.println(network.outputString);
             //}
         }
       //printing to a file
         try {
 			network.PrintOut(1,noOfTaxa,taxa);
 		} catch (IOException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
 		}
         //network in nex format
         try {
         	network.PrintOut(2,noOfTaxa,taxa);
 		} catch (IOException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
 		}
    	return;
	}

}