package haloGUI;
import genomeObjects.AnnotatedGenome;
import genomeObjects.ContextSet;
import genomeObjects.ContextSetDescription;
import genomeObjects.OrganismSet;
//import importExport.FitxerDades;
import inicial.JContextExplorer;
//import inicial.Language;
import javax.swing.*;
import javax.swing.filechooser.FileFilter;
import javax.swing.text.BadLocationException;
import javax.swing.text.Style;
import javax.swing.text.StyleConstants;
import javax.swing.text.StyleContext;
import javax.swing.text.StyledDocument;
import contextViewer.DrawGene;
//import methods.Reagrupa;
//import moduls.frm.FrmPrincipalDesk;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.event.WindowEvent;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map.Entry;
import java.util.Iterator;
import java.awt.Window;
@SuppressWarnings("serial")
public class LoadGenomesPanelv2 extends JLayeredPane implements ActionListener,
PropertyChangeListener {
// parent
private StartFrame sf;
private GFFChecker gffc;
// GUI components
private JLabel Genomes, GeneClusters;
private JProgressBar progressBar, progressBarClusters;
private JButton btnLoad, btnClusterLoad, btnSubmit;
private JTextField GenomeWorkingSetFileName, ClusterFileName;
private String strGWS = " Genomic Working Set (required)";
private String strHC = " Homology Clusters (optional)";
private String strLoad = "Load";
private String clusterLoad = "Load";
private String strNoFileLoaded = "No file currently loaded.";
private String strCancelled = strNoFileLoaded;
private JButton LoadInfo, ClusterInfo;
private Font HeaderFont = new Font("Arial", 1, 13);
private int HeaderPadding = 11;
private String strInfo = "???";
// Switches to determine operations able to be performed
private boolean LoadingGenomeFiles = false;
private boolean LoadingGeneClusters = false;
private boolean GenomeWorkingSetLoaded = false;
private boolean GeneClustersLoaded = false;
private boolean ReadyToSubmit = false;
// read in files or directories
private boolean GenomesAsSingleFile = false;
// improperly-loaded switches
private boolean ClusterFileImproperlyLoaded = false;
private boolean GenomeWorkingSetFileImproperlyLoaded = false;
// Loaded Organism Set + corresponding information
private OrganismSet OS;
private int TotalOrganisms;
private LinkedList<String> IncludeTypes;
private LinkedList<String> DisplayOnlyTypes;
// loaded file names, with path
private String GenomeWorkingSetFile;
private String ClustersFile;
private File ReferenceDirectory;
// loaded file names no path
private String GenomeWorkingSetFile_NoPath;
private String ClustersFile_NoPath;
// dummy labels for spacing columns
private JLabel d1, d2, d3, d4, d5;
private File[] GenomeFiles;
// ----- Building this frame ----------------------------------//
// constructor
public LoadGenomesPanelv2(StartFrame startframe) {
this.getPanel();
this.setVisible(true);
this.sf = startframe;
}
// panel components
public void getPanel() {
// Define GridBagLayout
this.setLayout(new GridBagLayout());
final GridBagConstraints c = new GridBagConstraints();
int gridy = 0;
// initial GridBagLayout parameters
c.anchor = GridBagConstraints.FIRST_LINE_START;
c.weightx = 1;
c.insets = new Insets(6, 3, 3, 3);
// dummy labels, to artificially normalize column widths
c.gridx = 0;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 1;
d1 = new JLabel(" ");
d1.setBackground(Color.LIGHT_GRAY);
d1.setOpaque(false);
add(d1, c);
c.gridx = 1;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 1;
d2 = new JLabel(" ");
d2.setBackground(Color.LIGHT_GRAY);
d2.setOpaque(false);
add(d2, c);
c.gridx = 2;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 1;
d3 = new JLabel(" ");
d3.setBackground(Color.LIGHT_GRAY);
d3.setOpaque(false);
add(d3, c);
c.gridx = 3;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 1;
d4 = new JLabel(" ");
d4.setBackground(Color.LIGHT_GRAY);
d4.setOpaque(false);
add(d4, c);
c.gridx = 4;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 1;
d5 = new JLabel(" ");
d5.setBackground(Color.LIGHT_GRAY);
d5.setOpaque(false);
add(d5, c);
// Genome section heading
c.gridx = 0;
c.gridy = gridy;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 5;
c.ipady = HeaderPadding;
Genomes = new JLabel(strGWS);
Genomes.setBackground(Color.LIGHT_GRAY);
Genomes.setOpaque(true);
Genomes.setFont(HeaderFont);
add(Genomes, c);
c.anchor = GridBagConstraints.FIRST_LINE_END;
c.gridx = 4;
c.ipady = 0;
c.gridy = gridy;
c.gridheight = 1;
c.gridwidth = 1;
c.fill = GridBagConstraints.REMAINDER;
LoadInfo = new JButton(strInfo);
LoadInfo.addActionListener(this);
add(LoadInfo, c);
this.moveToFront(LoadInfo);
gridy++;
// Load File button
c.insets = new Insets(3, 3, 3, 3);
c.gridx = 0;
c.gridy = gridy;
c.gridwidth = 1;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
// c.fill = GridBagConstraints.NONE;
c.gridy = gridy;
btnLoad = new JButton(strLoad);
btnLoad.addActionListener(this);
add(btnLoad, c);
// Genomic Working Set File Name
c.insets = new Insets(3, 3, 3, 3);
c.ipady = 5;
c.gridx = 1;
c.gridy = gridy;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 4;
GenomeWorkingSetFileName = new JTextField();
GenomeWorkingSetFileName.setText(strNoFileLoaded); // No file loaded
GenomeWorkingSetFileName.addActionListener(this);
GenomeWorkingSetFileName.setEditable(false);
add(GenomeWorkingSetFileName, c);
// gridy++;
// loading genomes progress bar
c.insets = new Insets(3, 3, 3, 3);
c.ipady = 5;
c.gridx = 1;
c.gridy = gridy;
c.gridwidth = 4;
c.fill = GridBagConstraints.HORIZONTAL;
progressBar = new JProgressBar(0, 100);
progressBar.setBorderPainted(false);
progressBar.setStringPainted(false);
progressBar.setValue(0);
progressBar.setForeground(Color.BLUE);
progressBar.setVisible(false);
add(progressBar, c);
gridy++;
// Gene clusters section heading
c.insets = new Insets(10, 3, 3, 3);
c.ipady = 0;
c.gridx = 0;
c.gridy = gridy;
c.gridwidth = 5;
c.gridheight = 1;
c.ipady = HeaderPadding;
c.fill = GridBagConstraints.HORIZONTAL;
GeneClusters = new JLabel(strHC);
GeneClusters.setBackground(Color.LIGHT_GRAY);
GeneClusters.setOpaque(true);
GeneClusters.setFont(HeaderFont);
add(GeneClusters, c);
c.anchor = GridBagConstraints.FIRST_LINE_END;
c.gridx = 4;
c.ipady = 0;
c.gridy = gridy;
c.gridheight = 1;
c.gridwidth = 1;
c.fill = GridBagConstraints.REMAINDER;
ClusterInfo = new JButton(strInfo);
ClusterInfo.addActionListener(this);
add(ClusterInfo, c);
this.moveToFront(ClusterInfo);
// this.add(LoadInfo, 2);
gridy++;
// Load Cluster File button
c.insets = new Insets(3, 3, 3, 3);
c.gridx = 0;
c.gridy = gridy;
c.gridwidth = 1;
c.gridheight = 1;
c.fill = GridBagConstraints.HORIZONTAL;
// c.fill = GridBagConstraints.NONE;
c.gridy = gridy;
btnClusterLoad = new JButton(clusterLoad);
btnClusterLoad.addActionListener(this);
add(btnClusterLoad, c);
// gene clusters progress bar
c.ipady = 5;
c.gridx = 1;
c.gridy = gridy;
c.gridwidth = 4;
c.fill = GridBagConstraints.HORIZONTAL;
progressBarClusters = new JProgressBar(0, 100);
progressBarClusters.setStringPainted(false);
progressBarClusters.setBorderPainted(false);
progressBarClusters.setValue(0);
progressBarClusters.setForeground(Color.BLUE);
progressBarClusters.setVisible(false);
add(progressBarClusters, c);
// gridy++;
// clusters file name
c.ipady = 5;
c.gridx = 1;
c.gridy = gridy;
c.fill = GridBagConstraints.HORIZONTAL;
c.gridwidth = 4;
ClusterFileName = new JTextField();
ClusterFileName.setText(strNoFileLoaded); // No file loaded
ClusterFileName.setEditable(false);
add(ClusterFileName, c);
gridy++;
// Submit button
c.insets = new Insets(10, 3, 3, 3);
c.gridx = 2;
c.gridwidth = 1;
c.gridheight = 1;
c.ipady = 0;
c.gridy = gridy;
c.fill = GridBagConstraints.NONE;
btnSubmit = new JButton("Submit");
btnSubmit.addActionListener(this);
add(btnSubmit, c);
}
// Instructions for launching program
public JTextPane getGenomicWorkingSetInfo() {
// create a JTextPane + add settings
JTextPane Instructions = new JTextPane();
Instructions.setEditable(false);
// retrieve document, and add styles
StyledDocument doc = Instructions.getStyledDocument();
Style def = StyleContext.getDefaultStyleContext().getStyle(
StyleContext.DEFAULT_STYLE);
Style regular = doc.addStyle("regular", def);
StyleConstants.setFontFamily(def, "SansSerif");
Style s = doc.addStyle("bold", regular);
StyleConstants.setBold(s, true);
// text into document
try {
doc.insertString(doc.getLength(), "Instructions:\n\n",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), "A ", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Genomic Working Set",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
" is a collection of annotated genomes. When performing searches in JContextExplorer, "
+ "JContextExplorer will query all genomes in the loaded genomic working set.\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"To load a genomic working set, push the \"load\" button below and either\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"(1) Select a directory containing individual annotated genome files\n",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), "or\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"(2) Select a genomic working set file.\n\n",
doc.getStyle("bold"));
doc.insertString(doc.getLength(),
"Individual annotated genomes should be formatted in ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"General Feature Format (or .GFF) [version 2],",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
" a standard tab-delimited text file format. GFF files should have the file extension \".gff\". "
+ "\n\nEach line in the GFF file describes a single annotated feature, and is split into 9 columns."
+ " This program only reads in columns ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "1, 3, 4, 5, 7,",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), " and ", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "9, ", doc.getStyle("bold"));
doc.insertString(doc.getLength(),
"which contain the following information:\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 1:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Sequence name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 3:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Feature Type\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 4:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Feature Start Position\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 5:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Feature End Position\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 7:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Strand\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 9:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Annotation\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"If you specify a directory of .GFF files,"
+ " JContextExplorer will name each genome according to the name of the file. "
+ "\n\nFor example,", doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"\n/SomeDirectory/CollectionOfGenomes/Organism1.gff",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), "\nwill be named ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "\nOrganism1",
doc.getStyle("bold"));
doc.insertString(doc.getLength(),
".\nPlease avoid names containing "
+ "white spaces (instead, use underscores).\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"Instead of specifying a directory of .gff files, you may specify a single"
+ " genomic working set file. This file must be a 1 or 2-column tab-delimited text file. In the first column,"
+ " please specify the file path to all annotated genome files you would like to include in your genomic working set."
+ " If you do not include a second column, each genome will be named according to the name of the file."
+ " The optional second column consists of a customized name for each genome.\n\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "WARNING!\n\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"When specifying file paths of individual genome files, please be sure to either specify ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "(1) The absolute path",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), " or ", doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
" (2) The path relative to the directory from which JContextExplorer was launched.",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
" JContextExplorer will be unable to import files if the file paths are not correctly specified.\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"For additional help and examples, please consult the JContextExplorer manual.",
doc.getStyle("regular"));
// doc.insertString(doc.getLength(),
// "A genomic working set file is either a 1 or 2-column tab-delimited text file.",
// doc.getStyle("regular"));
// doc.insertString(doc.getLength(),
// " is a collection of annotated genomes. When performing searches in JContextExplorer, "
// +
// "JContextExplorer will query all genomes in the genomic working set. If you would like to work on "
// +
// "a subset of genomes in your genomic working set, please define a new Genomic Working Set containing only "
// +
// "your genomes of interest, and launch a new instance of JContextExplorer, using only these genomes.\n\n",
// doc.getStyle("regular"));
} catch (BadLocationException e) {
e.printStackTrace();
}
return Instructions;
}
// Instructions for cluster file
public JTextPane getClusterInfo() {
// create a JTextPane + add settings
JTextPane Instructions = new JTextPane();
Instructions.setEditable(false);
// retrieve document, and add styles
StyledDocument doc = Instructions.getStyledDocument();
Style def = StyleContext.getDefaultStyleContext().getStyle(
StyleContext.DEFAULT_STYLE);
Style regular = doc.addStyle("regular", def);
StyleConstants.setFontFamily(def, "SansSerif");
Style s = doc.addStyle("bold", regular);
StyleConstants.setBold(s, true);
// text into document
try {
doc.insertString(doc.getLength(), "Instructions:\n\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"Within a single genomic working set, certain annotated features may be "
+ "homologous to one another. This may occur both within a single species and across multiple species. "
+ "A group of homologous features is often referred to as a ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Homologous Gene Cluster",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ", or simply a ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Homology Cluster",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
". Numerous methods exist to detect homology across and within genomes,"
+ " and to cluster annotated features in a set of genomes into homology cluster groups. Often, but "
+ "not necessarily, these homology cluster groups are ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "non-overlapping",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
". That is, each annotated feature may belong to a maximum of one homology cluster.\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"For all homology cluster-associated processes, JContextExplorer assumes non-overlapping homology clusters",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ".\n\n", doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"When JContextExpolorer searches for annotated features in a genomic working set, "
+ "it may do so either by\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"(1) Matching a textual query to individual genomic feature annotations",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), "\nor\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"(2) Matching a homology cluster ID number.\n\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"Textual annotations may be unreliable (especially if a "
+ "genomic working set contains contains genomes annotated by different groups), so it may be worthwhile to compute homology clusters"
+ " and load these computed homology clusters into JContextExplorer.",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "\n\nWARNING!\n\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"JContextExplorer cannot compute homology clusters from a set of sequenced genomes, only search a set"
+ " of pre-computed, loaded homology clusters.\n\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"To load a set of pre-computed homology clusters",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ", click the ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "load", doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
" button below the banner, and select the appropriate file. Homology clusters may be defined according to gene name "
+ "or precise feature coordinates.",
doc.getStyle("regular"));
doc.insertString(doc.getLength(),
" All files must be tab-delimited, and ",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"each line in the file "
+ "describes an individual feature - homology group relationship.",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
" Depending on the number of columns provided, each line is parsed differently. "
+ "Lines in the file that do not "
+ "follow the specifications described below will be ignored.",
doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"\n\nThere are 5 acceptable line formats:\n\n",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), "(1) Five-Column Format\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"If there are 5 tab-delimited entries in the line, entries take on the following values:\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 1:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Genome Name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 2:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Sequence Name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 3:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Feature Start Position\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 4:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Feature End Position\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 5:", doc.getStyle("bold"));
doc.insertString(doc.getLength(),
" Homology Cluster ID Number\n\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "If a feature starts at ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Feature Start Position",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), " and stops at ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Feature Stop Position",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ", on the sequence named ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Sequence Name",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ", in the genome named ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Genome Name",
doc.getStyle("bold"));
doc.insertString(doc.getLength(),
", this feature is assigned the provided ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Homology Cluster ID Number",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ".\n\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "(2) Four-Column Format\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"If there are 4 tab-delimited entries in the line, entries take on the following values:\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 1:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Genome Name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 2:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Sequence Name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 3:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Annotation Key\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 4:", doc.getStyle("bold"));
doc.insertString(doc.getLength(),
" Homology Cluster ID Number\n\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(),
"If a feature contains the string ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Annotation Key",
doc.getStyle("bold"));
doc.insertString(doc.getLength(),
" in it's annotation, and is found on the sequence named ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Sequence Name",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), " in the genome named ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Genome Name",
doc.getStyle("bold"));
doc.insertString(doc.getLength(),
", this feature is assigned the provided ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Homology Cluster ID Number",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
".\n\nIn the Annotation Key field, please use underscores instead of spaces.\n\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "(3) Three-Column Format\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"If there are 3 tab-delimited entries in the line, entries take on the following values:\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 1:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Genome Name\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 2:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Annotation Key\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 3:", doc.getStyle("bold"));
doc.insertString(doc.getLength(),
" Homology Cluster ID Number\n\n", doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"This format is identical to Four-column format, however does not check for agreement in the sequence name.\n\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "(4) Two-Column Format\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"If there are 2 tab-delimited entries in the line, entries take on the following values:\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 1:", doc.getStyle("bold"));
doc.insertString(doc.getLength(), " Annotation Key\n",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Column 3:", doc.getStyle("bold"));
doc.insertString(doc.getLength(),
" Homology Cluster ID Number\n\n", doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"All features in all genomes in the genomic working set with an annotation that"
+ " contains the ", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Annotation Key",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), " are assigned the provided ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Homology Cluster ID Number",
doc.getStyle("bold"));
doc.insertString(doc.getLength(), ".\n\n", doc.getStyle("regular"));
doc.insertString(doc.getLength(), "(5) Single Column Format\n",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
"If there is only a single entry in the line, This entry is taken to be the ",
doc.getStyle("regular"));
doc.insertString(doc.getLength(), "Annotation Key",
doc.getStyle("bold"));
doc.insertString(
doc.getLength(),
". All annotated features that contain the annotation key are given a homology cluster ID number,"
+ " which is determined by the line number in the file.\n\n",
doc.getStyle("regular"));
doc.insertString(
doc.getLength(),
"For additional help and examples, please consult the JContextExplorer manual.",
doc.getStyle("regular"));
} catch (BadLocationException e) {
e.printStackTrace();
}
return Instructions;
}
// All Actions
@Override
public void actionPerformed(ActionEvent evt) {
// retrieve info
if (evt.getSource().equals(LoadInfo)) {
new InfoFrame(getGenomicWorkingSetInfo(),
"Genomic Working Set Info", this, -170);
}
if (evt.getSource().equals(ClusterInfo)) {
new InfoFrame(getClusterInfo(), "Homology Clusters Info", this, 70);
}
// specify GFF file format details
if (evt.getSource().equals(btnLoad)) {
gffc = new GFFChecker(this);
}
// load genome files, after determining GFF file format stuff.
try {
if (evt.getSource().equals(gffc.getBtnSubmit())) {
// set switches to appropriate state
LoadingGenomeFiles = true;
GenomeWorkingSetLoaded = false;
LoadingGeneClusters = false;
ReadyToSubmit = false;
GeneClustersLoaded = false;
// reset clusters
progressBarClusters.setValue(0);
progressBarClusters.setStringPainted(false);
// String fileName = getGenomeWorkingSetFile();
String fileName = this.getGenomes();
// System.out.println("fileName: " + fileName);
if (fileName != null) {
if (!fileName.equals(GenomeWorkingSetFile)) {
ClusterFileName.setText(strNoFileLoaded);
LoadGenomesWorker lg = new LoadGenomesWorker(fileName);
lg.addPropertyChangeListener(this);
lg.execute();
} else {
GenomeWorkingSetLoaded = true;
ReadyToSubmit = true;
}
} else {
// set everything back to zero
progressBar.setValue(0);
progressBar.setStringPainted(false);
GenomeWorkingSetFileName.setText(strCancelled);
ClusterFileName.setText(strCancelled);
// turn everything off - back to square one
LoadingGenomeFiles = false;
GenomeWorkingSetLoaded = false;
LoadingGeneClusters = false;
ReadyToSubmit = false;
GeneClustersLoaded = false;
GenomeWorkingSetFile = null;
ClustersFile = null;
}
}
} catch (Exception ex) {
// System.out.println("Gffc exception.");
// set everything back to zero
progressBar.setValue(0);
progressBar.setStringPainted(false);
GenomeWorkingSetFileName.setText(strCancelled);
ClusterFileName.setText(strCancelled);
// turn everything off - back to square one
LoadingGenomeFiles = false;
GenomeWorkingSetLoaded = false;
LoadingGeneClusters = false;
ReadyToSubmit = false;
GeneClustersLoaded = false;
GenomeWorkingSetFile = null;
ClustersFile = null;
}
// load clusters file
if (evt.getSource().equals(btnClusterLoad)) {
LoadingGenomeFiles = false;
LoadingGeneClusters = true;
String clusterfileName;// = null;
if (GenomeWorkingSetLoaded == true) {
clusterfileName = getClustersFile();
} else {
JOptionPane
.showMessageDialog(
null,
"You must load a genomic working set before loading pre-computed gene clusters.",
"No Valid Genomic Working Set Loaded",
JOptionPane.ERROR_MESSAGE);
clusterfileName = null;
}
if (clusterfileName != null) {
if (!clusterfileName.equals(ClustersFile)) {
LoadClustersWorker lc = new LoadClustersWorker(
clusterfileName);
lc.addPropertyChangeListener(this);
lc.execute();
}
} else {
progressBarClusters.setValue(0);
progressBarClusters.setStringPainted(false);
LoadingGenomeFiles = false;
if (OS != null) {
OS.setGeneClustersLoaded(false);
}
ClusterFileName.setText(strCancelled);
ClustersFile = null;
GeneClustersLoaded = false;
}
}
if (evt.getSource().equals(btnSubmit)) {
// if (evt.getSource().equals(this.getBtnSubmit())){
if (ReadyToSubmit == true) {
// close this window
sf.dispose();
// open the dendrogram window
invokeDendrograma();
} else {
JOptionPane
.showMessageDialog(
null,
"You must load a genomic working set before proceeding to the main window.",
"No Valid Genomic Working Set Loaded",
JOptionPane.ERROR_MESSAGE);
}
}
}
// ----- Import Data Files ----------------------------------//
// retrieve a data file
private String getGenomeWorkingSetFile() {
// use pre-existing 'FileDialog' GUI window to retrieve file
final FileDialog fd = new FileDialog(sf, "English", FileDialog.LOAD);
fd.setVisible(true);
String GenomeWorkingSetFile = fd.getDirectory() + fd.getFile();
this.GenomeWorkingSetFile_NoPath = fd.getFile();
// String GenomeWorkingSetFile = fd.getFile();
if (fd.getFile() == null) {
GenomeWorkingSetFile = null;
}
GenomesAsSingleFile = true;
return GenomeWorkingSetFile; // file name
}
// retrieve either directory or data file
private String getGenomes() {
// initialize output
JFileChooser GetGenomes = new JFileChooser();
GetGenomes.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
GetGenomes
.setDialogTitle("Select Annotated Genomes Directory or Genome Working Set File");
if (this.ReferenceDirectory != null) {
GetGenomes.setCurrentDirectory(ReferenceDirectory);
} else {
GetGenomes.setCurrentDirectory(new File("."));
}
GetGenomes.showOpenDialog(GetGenomes);
// retrieve a directory
// File[] AllFiles = GetGenomes.getSelectedFiles();
File DirectoryOrGWSFile = GetGenomes.getSelectedFile();
this.GenomeWorkingSetFile_NoPath = DirectoryOrGWSFile.getName();
// note current directory for next time
if (GetGenomes.getCurrentDirectory() != null) {
this.ReferenceDirectory = GetGenomes.getCurrentDirectory();
}
// check if file could be received
if (DirectoryOrGWSFile != null) {
// determine if file or directory loaded
if (DirectoryOrGWSFile.isDirectory()) {
// retrieving info as a directory.
this.GenomesAsSingleFile = false;
// retrieve directory
this.GenomeFiles = DirectoryOrGWSFile.listFiles();
} else {
// all information stored in a single genome working set file.
this.GenomesAsSingleFile = true;
}
}
// System.out.println(DirectoryOrGWSFile.getAbsolutePath());
// for (int i = 0; i < GenomeFiles.length; i++){
// System.out.println(GenomeFiles[i]);
// }
// return the information.
return DirectoryOrGWSFile.getAbsolutePath();
}
// retrieve clusters file
private String getClustersFile() {
//TODO finish this
// JFileChooser getClusters = new JFileChooser();
// getClusters.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
// getClusters
// .setDialogTitle("Select Directory Or File Containing Homology Clusters");
//
// if (this.ReferenceDirectory != null) {
// getClusters.setCurrentDirectory(ReferenceDirectory);
// } else {
// getClusters.setCurrentDirectory(new File("."));
// }
// getClusters.showOpenDialog(getClusters);
// use pre-existing 'FileDialog' GUI window to retrieve file
FileDialog fd = new FileDialog(sf, "English", FileDialog.LOAD);
// set reference directory to match annotated genomes directory
if (ReferenceDirectory != null) {
fd.setDirectory(this.ReferenceDirectory.getAbsolutePath());
}
fd.setVisible(true);
String ClustersFile = fd.getDirectory() + fd.getFile();
this.ClustersFile_NoPath = fd.getFile();
// String GenomeWorkingSetFile = fd.getFile();
if (fd.getFile() == null) {
GenomeWorkingSetFile = null;
return null;
} else {
return ClustersFile; // file name
}
}
// ----- SwingWorker-related ----------------------------------//
// Perform File Loading + Operon computation tasks
class LoadGenomesWorker extends SwingWorker<Void, Void> {
public LoadGenomesWorker(String filename) {
GenomeWorkingSetFile = filename;
}
@Override
protected Void doInBackground() throws Exception {
// disable all buttons
btnLoad.setEnabled(false);
btnClusterLoad.setEnabled(false);
btnSubmit.setEnabled(false);
// LOAD GENOME SET
GenomeWorkingSetFileName.setVisible(false);
progressBar.setVisible(true);
int progress = 0;
setProgress(progress);
progressBar.setStringPainted(true);
// import
OS = new OrganismSet();
OS.setIncludeTypes(IncludeTypes);
OS.setDisplayOnlyTypes(DisplayOnlyTypes);
int OrganismsCompleted = 0;
// define a new linked list, for each annotated genome
LinkedHashMap<String, AnnotatedGenome> Species = new LinkedHashMap<String, AnnotatedGenome>();
// define a new list, for each species name
LinkedList<String> SpeciesNames = new LinkedList<String>();
// import a single genomic working set file
if (GenomesAsSingleFile) {
// determine number of total organisms from the single file
TotalOrganisms = OS
.determineNumberOfSpecies(GenomeWorkingSetFile);
try {
// import buffered reader
BufferedReader br = new BufferedReader(new FileReader(
GenomeWorkingSetFile));
String Line = null;
while ((Line = br.readLine()) != null) {
// if a line or two in the file are incorrectly
// formatted, no worries.
try {
// initialize species name
String SpeciesName;
// parse input
String[] ImportedLine = Line.split("\t");
// create a new AnnotatedGenome
AnnotatedGenome AG = new AnnotatedGenome();
AG.setIncludeTypes(IncludeTypes);
AG.setDisplayOnlyTypes(DisplayOnlyTypes);
// middle line is the sequence line
if (ImportedLine.length == 3) {
// Annotation information
AG.importFromGFFFile(ImportedLine[0]);
// reference to genome file
AG.setGenomeFile(new File(ImportedLine[1]));
// set species name
SpeciesName = ImportedLine[2];
} else if (ImportedLine.length > 1) { // first =
// annotation
// file,
// last =
// species
// name
AG.importFromGFFFile(ImportedLine[0]);
SpeciesName = ImportedLine[(ImportedLine.length - 1)];
AG.setGenomeFile(new File(""));
} else {
// import elements
AG.importFromGFFFile(ImportedLine[0]);
// retrieve species name
String SpeciesNameElements[] = ImportedLine[0]
.split("/");
String SpecName[] = SpeciesNameElements[SpeciesNameElements.length - 1]
.split(".gff");
SpeciesName = SpecName[0];
// genome file
AG.setGenomeFile(new File(""));
}
// set species name
AG.setSpecies(SpeciesName);
// Genus name
String SpeciesAndGenus[] = SpeciesName.split("_");
AG.setGenus(SpeciesAndGenus[0]);
// add Context set
AG.MakeSingleGeneContextSet("SingleGene");
// add to hash map
Species.put(SpeciesName, AG);
// add name to array of species
SpeciesNames.add(SpeciesName);
} catch (Exception ex) {
}
// update progress bar
OrganismsCompleted++;
progress = (int) Math
.round(100 * ((double) OrganismsCompleted / (double) TotalOrganisms));
setProgress(progress);
// progressBar.setValue(progress);
}
br.close();
// save results to OS structure.
// imported data
OS.setSpecies(Species);
OS.setSpeciesNames(SpeciesNames);
// context set information descriptions in OS
LinkedList<ContextSetDescription> CSD = new LinkedList<ContextSetDescription>();
ContextSetDescription Initial = new ContextSetDescription();
Initial.setName("SingleGene");
Initial.setPreprocessed(true);
Initial.setType("IntergenicDist");
CSD.add(Initial);
OS.setCSDs(CSD);
progressBar.setValue(100);
progressBar.setVisible(false);
GenomeWorkingSetFileName.setVisible(true);
GenomeWorkingSetFileName
.setText(GenomeWorkingSetFile_NoPath);
} catch (Exception ex) {
progressBar.setStringPainted(false);
progressBar.setValue(0);
GenomeWorkingSetFileImproperlyLoaded = true;
JOptionPane
.showMessageDialog(
null,
"The file could not be loaded or was improperly formatted.",
"Invalid File Format",
JOptionPane.ERROR_MESSAGE);
GenomeWorkingSetFileName.setText(strCancelled);
}
} else {
try {
// determine number of total organisms
TotalOrganisms = 0;
for (File f : GenomeFiles) {
if (f.getName().contains(".gff")) {
TotalOrganisms++;
}
}
// retrieve all files
for (File f : GenomeFiles) {
if (f.getName().contains(".gff")) {
// new annotated genome
AnnotatedGenome AG = new AnnotatedGenome();
// set appropriate types to import
AG.setIncludeTypes(IncludeTypes);
AG.setDisplayOnlyTypes(DisplayOnlyTypes);
// Annotation information
AG.importFromGFFFile(f.getAbsolutePath());
// reference to genome file
AG.setGenomeFile(f);
// Species Name + genus
String[] SpeciesName = f.getName().split(".gff");
String TheName = SpeciesName[0];
AG.setSpecies(TheName);
String[] Genus = SpeciesName[0].split("_");
String TheGenus = Genus[0];
AG.setGenus(TheGenus);
// add Context set
AG.MakeSingleGeneContextSet("SingleGene");
// add to hash map
Species.put(TheName, AG);
// add name to array of species
SpeciesNames.add(TheName);
// update progress bar
OrganismsCompleted++;
progress = (int) Math
.round(100 * ((double) OrganismsCompleted / (double) TotalOrganisms));
setProgress(progress);
}
}
// save results to OS structure.
// imported data
OS.setSpecies(Species);
OS.setSpeciesNames(SpeciesNames);
// context set information descriptions in OS
LinkedList<ContextSetDescription> CSD = new LinkedList<ContextSetDescription>();
ContextSetDescription Initial = new ContextSetDescription();
Initial.setName("SingleGene");
Initial.setPreprocessed(true);
Initial.setType("IntergenicDist");
CSD.add(Initial);
OS.setCSDs(CSD);
progressBar.setValue(100);
progressBar.setVisible(false);
GenomeWorkingSetFileName.setVisible(true);
GenomeWorkingSetFileName
.setText(GenomeWorkingSetFile_NoPath);
} catch (Exception ex) {
progressBar.setStringPainted(false);
progressBar.setValue(0);
GenomeWorkingSetFileImproperlyLoaded = true;
JOptionPane
.showMessageDialog(
null,
"The file could not be loaded or was improperly formatted.",
"Invalid File Format",
JOptionPane.ERROR_MESSAGE);
GenomeWorkingSetFileName.setText(strCancelled);
}
}
return null;
}
public void done() {
//determine if clusters / annotations loaded.
double NumWithClusters = 0.0;
for (AnnotatedGenome AG : OS.getSpecies().values()){
if (AG.isAGClustersLoaded()){
NumWithClusters++;
}
}
if (NumWithClusters >= 0.8*(double)OS.getSpecies().values().size()){
OS.setGeneClustersLoaded(true);
} else {
OS.setGeneClustersLoaded(false);
}
// adjust switches
LoadingGenomeFiles = false;
GenomeWorkingSetLoaded = true;
LoadingGeneClusters = false;
GeneClustersLoaded = false;
ReadyToSubmit = true;
// adjust buttons
btnLoad.setEnabled(true);
btnClusterLoad.setEnabled(true);
btnSubmit.setEnabled(true);
if (ClusterFileImproperlyLoaded == true) {
ClusterFileName.setVisible(true);
ClusterFileName.setText(strNoFileLoaded);
}
ClusterFileImproperlyLoaded = false;
if (GenomeWorkingSetFileImproperlyLoaded == true) {
GenomeWorkingSetFileName.setVisible(true);
GenomeWorkingSetFileName.setText(strNoFileLoaded);
}
GenomeWorkingSetFileImproperlyLoaded = false;
// all progress bars are made invisible
progressBar.setVisible(false);
progressBarClusters.setVisible(false);
}
}
// Load homology clusters
class LoadClustersWorker extends SwingWorker<Void, Void> {
public LoadClustersWorker(String filename) {
ClustersFile = filename;
}
@Override
protected Void doInBackground() throws Exception {
// disable all buttons
btnLoad.setEnabled(false);
btnClusterLoad.setEnabled(false);
btnSubmit.setEnabled(false);
if (GenomeWorkingSetLoaded == true) {
ClusterFileName.setVisible(false);
progressBarClusters.setVisible(true);
progressBarClusters.setStringPainted(true);
progressBarClusters.setValue(0);
int LineCounter = 0;
int clusterProgress = 0;
setProgress(clusterProgress);
try {
// First: count lines in the file
// import buffered reader
BufferedReader br_count = new BufferedReader(
new FileReader(ClustersFile));
int TotalLines = 0;
// count lines
while (br_count.readLine() != null) {
TotalLines++;
}
// Second: import/process lines in the file
// import buffered reader
BufferedReader br = new BufferedReader(new FileReader(
ClustersFile));
String Line = null;
int ClusterNumCounter = 0;
while ((Line = br.readLine()) != null) {
// import each line
String[] ImportedLine = Line.split("\t");
// increment cluster counter.
ClusterNumCounter++;
// try to parse every line
try {
// Gene Name
if (ImportedLine.length == 1) {
// add cluster number
for (AnnotatedGenome AG : OS.getSpecies()
.values()) {
AG.addClusterNumber(
ImportedLine[0].replace("_ ", " "),
ClusterNumCounter);
}
// largest cluster designation is always the
// last
OS.LargestCluster = TotalLines;
// Gene Name - Cluster Number
} else if (ImportedLine.length == 2) {
// recover bioinfo
int GeneClusterNum = Integer
.parseInt(ImportedLine[1]);
// set largest cluster number
if (OS.LargestCluster < GeneClusterNum) {
OS.LargestCluster = GeneClusterNum;
}
// add cluster number
for (AnnotatedGenome AG : OS.getSpecies()
.values()) {
AG.addClusterNumber(
ImportedLine[0].replace("_", " "),
GeneClusterNum);
}
// Organism - Gene Name - Cluster Number
} else if (ImportedLine.length == 3) {
// recover bioinfo
int GeneClusterNum = Integer
.parseInt(ImportedLine[2]);
// set largest cluster number
if (OS.LargestCluster < GeneClusterNum) {
OS.LargestCluster = GeneClusterNum;
}
// add cluster number
OS.getSpecies()
.get(ImportedLine[0])
.addClusterNumber(
ImportedLine[1].replace("_",
" "), GeneClusterNum);
// Organism - Contig - Gene Name - Cluster
// Number
} else if (ImportedLine.length == 4) {
// recover bioinfo
int GeneClusterNum = Integer
.parseInt(ImportedLine[3]);
// set largest cluster number
if (OS.LargestCluster < GeneClusterNum) {
OS.LargestCluster = GeneClusterNum;
}
// add cluster number
OS.getSpecies()
.get(ImportedLine[0])
.addClusterNumber(
ImportedLine[1],
ImportedLine[2].replace("_",
" "), GeneClusterNum);
// Organism - Contig - Gene Start - Gene Stop -
// Cluster Number
} else if (ImportedLine.length == 5) {
// recover bioinfo
int GeneStart = Integer
.parseInt(ImportedLine[2]);
int GeneStop = Integer
.parseInt(ImportedLine[3]);
int GeneClusterNum = Integer
.parseInt(ImportedLine[4]);
// set largest cluster number
if (OS.LargestCluster < GeneClusterNum) {
OS.LargestCluster = GeneClusterNum;
}
// add cluster number
OS.getSpecies()
.get(ImportedLine[0])
.addClusterNumber(ImportedLine[1],
GeneStart, GeneStop,
GeneClusterNum);
} else {
throw new Exception();
}
} catch (Exception ex) {
}
// report to SwingWorker
LineCounter++;
// update progress
clusterProgress = (int) Math
.round(100 * ((double) LineCounter / (double) TotalLines));
setProgress(clusterProgress);
}
// set status of 'gene clusters loaded' to true
OS.setGeneClustersLoaded(true);
ClusterFileImproperlyLoaded = false;
progressBarClusters.setVisible(false);
ClusterFileName.setVisible(true);
ClusterFileName.setText(ClustersFile_NoPath);
} catch (Exception ex) {
progressBarClusters.setStringPainted(false);
JOptionPane
.showMessageDialog(
null,
"The file could not be loaded or was improperly formatted.",
"Invalid File Format",
JOptionPane.ERROR_MESSAGE);
ClusterFileImproperlyLoaded = true;
LoadingGeneClusters = false;
OS.setGeneClustersLoaded(false);
ClusterFileName.setText(strCancelled);
}
} else {
JOptionPane
.showMessageDialog(
null,
"You must load a genomic working set before loading homologous gene clusters.",
"No Valid Genomic Working Set Loaded",
JOptionPane.ERROR_MESSAGE);
}
return null;
}
public void done() {
// adjust switches
LoadingGenomeFiles = false;
GenomeWorkingSetLoaded = true;
LoadingGeneClusters = false;
GeneClustersLoaded = true;
ReadyToSubmit = true;
// adjust buttons
btnLoad.setEnabled(true);
btnClusterLoad.setEnabled(true);
btnSubmit.setEnabled(true);
// all progress bars are made invisible, all files visible
progressBar.setVisible(false);
progressBarClusters.setVisible(false);
ClusterFileName.setVisible(true);
}
}
// scroll bar signaling
@Override
public void propertyChange(PropertyChangeEvent evt) {
if (evt.getPropertyName() == "progress")
// adjust either loading genomes progress bar or loading clusters
// progress bar
if (GenomeWorkingSetLoaded == false) {
int progress = (Integer) evt.getNewValue();
progressBar.setValue(progress);
} else {
int progress = (Integer) evt.getNewValue();
progressBarClusters.setValue(progress);
}
}
// ----- Getters + Setters ----------------------------------//
public JButton getBtnSubmit() {
return btnSubmit;
}
public void setBtnSubmit(JButton btnSubmit) {
this.btnSubmit = btnSubmit;
}
public LinkedList<String> getIncludeTypes() {
return IncludeTypes;
}
public void setIncludeTypes(LinkedList<String> includeTypes) {
IncludeTypes = includeTypes;
}
public LinkedList<String> getDisplayOnlyTypes() {
return DisplayOnlyTypes;
}
public void setDisplayOnlyTypes(LinkedList<String> displayOnlyTypes) {
DisplayOnlyTypes = displayOnlyTypes;
}
// ----- Launch main frame ----------------------------------//
// create a new dendrogram window, with the loaded OS
public void invokeDendrograma() {
// System.out.println("Breakpoint!");
//OS.ExportExtendedGFFFile();
new JContextExplorer(OS);
// //optional print statements (working)
// System.out.println(OS.getSpeciesNames());
// System.out.println(OS.getSpecies().get("Haloarcula_amylolytica").getGroupings().get(0).getName());
}
}