package OperonEvolutionInHalos;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedList;
public class OperonStats extends LoadData{
/*
* This method performs stats on predict operons, looking for particular
* cross-species changes
*
* uses JCE data structures, minus the GUI - background-type tasks etc
*/
//main method
public static void main(String[] args) {
// ======= For Histograms - March 17, 2014 ==== //
//dummy operon set
//OperonSet OS = new OperonSet();
// //data files
// String HaloDistFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/cyano_gamma_halo_dist/halo.dist";
// String GammaDistFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/cyano_gamma_halo_dist/gamma.dist";
// String CyanoDistFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/cyano_gamma_halo_dist/cyano.dist";
//
// //data in appropriate format
// PhyloHistData Halos = OS.BuildGenericDistanceMapping(HaloDistFile);
// PhyloHistData Gamma = OS.BuildGenericDistanceMapping(GammaDistFile);
// PhyloHistData Cyano = OS.BuildGenericDistanceMapping(CyanoDistFile);
//
// //export files
// String HaloHist = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/HistogramData_Mar17/HaloHistData.txt";
// String GammaHist = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/HistogramData_Mar17/GammaHistData.txt";
// String CyanoHist = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/HistogramData_Mar17/CyanoHistData.txt";
//
// //constants
// int bins = 100;
//
// //determine the largest of the maximum distances.
// Double LargestMaxDist = Math.max(Halos.MaxDist, Gamma.MaxDist);
// LargestMaxDist = Math.max(LargestMaxDist, Cyano.MaxDist);
//
// //export data as file
// OS.PhyloHist(HaloHist, bins, LargestMaxDist, Halos.PhyDistHash);
// OS.PhyloHist(GammaHist, bins, LargestMaxDist, Gamma.PhyDistHash);
// OS.PhyloHist(CyanoHist, bins, LargestMaxDist, Cyano.PhyDistHash);
// ======= Build Data Set =========== //
//build data set
ImportGenomes(); //load genomic data
BasicOperons(50); //create basic operons
//ShowLocalOperonDuplications(50); //display duplication cases
//String ContextSetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/Current/CS_BasicOperons_NoSingleGenes.txt";
//ExportOperonsAsContextSet(ContextSetFile,"BasicOperons",false); //Export set
//convert to set for trajectory analysis
OperonSet BasicSet = new OperonSet(OS,"BasicOperons");
BasicSet.BuildPhylogeneticDistanceMapping();
// String HistDataFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Phylogeny/HistogramData_200.txt";
// int bins = 200;
// BasicSet.PhyloHist(HistDataFile, bins, BasicSet.MaxDist, BasicSet.PhyDistHash);
BasicSet.BuildOperonTrajectories();
for (int i : BasicSet.Trajectories.keySet()){
OperonTrajectory OT = BasicSet.Trajectories.get(i);
if (!OT.AlwaysASingleGene){
System.out.println(i + " Operonicity: " + OT.Operonicity + " Variety: " + OT.Variety);
}
}
//export single gene modifications counts
// String ExportFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/Miscellaneous/ClearAIP_v2.txt";
// BasicSet.ExportClearSingleGeneModifications(ExportFile);
//Export a query set for context forest analysis (Feb 6, 2014)
//String QuerySetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/Current/QS_60Orgs_Op50.txt";
//BasicSet.ExportQuerySet(QuerySetFile, BasicSet.Trajectories, 0.50, 1.1, 60);
// //Export a query set for gene order analysis
// String QuerySetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/Current/QS_AdjacentGenePairs.txt";
// BasicSet.ExportAdjacentGenePairs(QuerySetFile, BasicSet.Trajectories);
//BasicSet.ExportQuerySet(QuerySetFile, Trajectories, MinOperonicity, MaxNovelty)
// String QuerySetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/QS/QS_Operonicity_0_90.txt";
// BasicSet.ExportQuerySet(QuerySetFile, BasicSet.Trajectories, 0.90, 1.1);
//export, excluding trajectories where the gene is always a singleton
//String StatsTxt = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/OperonTrajectories/D50";
//BasicSet.ExportByDifferentVariables(StatsTxt,false);
// //determine agreement at this level
// double margin = 0.10;
// int NumAgree = 0;
// for (Integer x : BasicSet.Trajectories.keySet()){
// OperonTrajectory OT = BasicSet.Trajectories.get(x);
// BasicSet.AddLowestLevelPhyloDisagreement(OT, margin, true);
// if (OT.AgreesWithPhylogenyAtLowestLevel && !OT.AlwaysASingleGene){
// NumAgree++;
// } else {
// if (!OT.AlwaysASingleGene){
// System.out.println(OT.ClusterID + " disagrees with the phylogeny (ignore single genes).");
// }
// }
// }
// System.out.println(NumAgree + " agree in total.");
// //new list
// LinkedList<OperonTrajectory> Trajs = new LinkedList<OperonTrajectory>(BasicSet.Trajectories.values());
// Collections.sort(Trajs, new OperonSet.SortbyEvoRate());
//
// for (OperonTrajectory OT : Trajs){
// if (!OT.AlwaysASingleGene){
// System.out.println(OT.ClusterID + "\t" + OT.EvoRate);
// }
// }
// //determine level of agreement at various levels
// LinkedHashMap<Double,Integer> AgreementCounts = new LinkedHashMap<Double,Integer>();
// LinkedHashMap<Double,Integer> SingleAgreementCounts = new LinkedHashMap<Double,Integer>();
//
// for (int i = 0; i <= 19; i++){
//
// //re-set agreement counter
// int NumAgree = 0;
// int NumSingleAgree = 0;
// int NumNonSingle = 0;
//
// //turn integer to double
// double margin = 0.05* (double) i;
// margin = (double)Math.round(margin * 100) / 100;
//
// //determine agreement at this level
// for (Integer x : BasicSet.Trajectories.keySet()){
// OperonTrajectory OT = BasicSet.Trajectories.get(x);
// BasicSet.AddLowestLevelPhyloDisagreement(OT, margin, true); //added 3rd argument 5/2/2014
// if (OT.AgreesWithPhylogenyAtLowestLevel && !OT.AlwaysASingleGene){
// NumAgree++;
// }
// BasicSet.AddLowestLevelPhyloDisagreement(OT, margin, false);
// if (OT.AgreesWithPhylogenyAtLowestLevel && ! OT.AlwaysASingleGene){
// NumSingleAgree++;
// }
// if (!OT.AlwaysASingleGene){
// NumNonSingle++;
// }
// }
//
// //store in hash
// AgreementCounts.put(margin,NumAgree);
// SingleAgreementCounts.put(margin, NumSingleAgree);
//
// //debugging
// //System.out.println(NumNonSingle + " non-single");
// }
// //print
// System.out.println("margin\tagree\tsingle_agree");
// for (double d : AgreementCounts.keySet()){
// System.out.println(d + "\t" + AgreementCounts.get(d) + "\t" + SingleAgreementCounts.get(d));
// }
//BasicSet.AddLowestLevelPhyloDisagreement(BasicSet.Trajectories.get(1500), 0.01);
//Max Dist: 0.94212093 for Halococcus_hamelinensis,Haloquadratum_walsbyi
// BistableParams BP = new BistableParams();
// BP.MaxSameGrpPhyloDist = 99;
// BP.MinDiffGrpPhyloDist = 0.0;
// BP.MaxContentDiss = 0.1;
// BP.MinOpSize = 2;
// BP.MinGrpMemSize = 1;
//
// LinkedList<Integer> X = BasicSet.FindMultiStableOperonTopologies(BP);
//
// System.out.println(X.size() + " in total.");
//helpful output message
System.out.println("All Processes Successfully Completed!");
// ================================== //
// ======= Temporarily Unused ======= //
// ================================== //
//Export sets
//basic
//String ContextSetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/CS_BasicOperons.txt";
//BasicSet.ExportTrajectoriesAsContextSet(ContextSetFile, true, BasicSet.Trajectories); //about ~30 min
// Export Query Sets -> for use with analysis with JCE
//String QuerySetFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/QS/QS_AllOperons.txt";
//BasicSet.ExportQuerySet(QuerySetFile, BasicSet.Trajectories, 0.001, 1.1);
// ======= Analyze Data Set =========== //
//String StatsTxt = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/OperonTrajectories/D50";
//BasicSet.ExportByDifferentVariables(StatsTxt,false);
// //COD analysis
// CODParameters COD = new CODParameters();
// COD.RangeAroundOperon = 5000;
// COD.RequireSameStrand = true;
// LinkedHashMap<Integer, OperonTrajectory> COD_Hash = BasicSet.CreateCODSet(COD);
//
// //COD set file name
// String ContextSetFileCODs = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/CS_CODs_5K_t.txt";
// BasicSet.ExportTrajectoriesAsContextSet(ContextSetFileCODs, true, COD_Hash);
//Debugging of file parsing
//LinkedHashMap<String,Integer> ComparisonHash = BasicSet.BuildPhylogeneticDistanceMapping();
// //print data - working!
// for (String s : ComparisonHash.keySet()){
// System.out.println(s + ": " + ComparisonHash.get(s));
// }
// System.out.println(ComparisonHash.size());
// double MaxDist = -1.0;
// LinkedList<String> Lmax = new LinkedList<String>();
// for (LinkedList<String> L : BasicSet.PhyDistHash.keySet()){
// double d = BasicSet.PhyDistHash.get(L);
// if (d > MaxDist){
// MaxDist = d;
// Lmax = L;
// }
// }
//
// System.out.println("Max Dist: " + MaxDist + " for " + Lmax.get(0) + " , " + Lmax.get(1));
//Monday, Jan 7, 2013
// //Import a list of all clusters of interest in this investigation
// String ClustersUsedFile = "/Users/phillipseitzer/Dropbox/OperonEvolutionInHalophiles/JCE/Current/QS_NonSingle.txt";
// ImportClustersToInclude(ClustersUsedFile);
}
}