/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.clothocore.api.data;
import java.io.InputStream;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.ImageIcon;
import javax.swing.JOptionPane;
import javax.swing.SwingUtilities;
import org.clothocore.api.core.Collator;
import org.clothocore.api.core.Collector;
import org.clothocore.api.dnd.RefreshEvent;
import org.clothocore.api.dnd.RefreshEvent.Condition;
import org.clothocore.api.plugin.ClothoConnection;
import org.clothocore.core.Hub;
import org.clothocore.util.dialog.WaitingDialog;
import org.clothocore.util.xml.XMLParser;
import org.openide.util.ImageUtilities;
public class NucSeq extends ObjBase implements Serializable {
/**
* Constructor from database
* @param uuid
* @param sequence
* @param dateCreated
* @param lastmodified
* @param annots
*/
public NucSeq( NucSeqDatum d ) {
super( d );
_seqDatum = d;
if ( !initiateNucSeq( _seqDatum.theSequence ) ) {
return;
}
}
/**
* Constructor for a NucSeq (dna sequence) object. The sequence ends up in the
* private variable 'theSequence'. In constructing it, it checks whether it is
* a legitimate RNA or DNA molecule and whether it contains degenerate codons,
* which is stored as a boolean 'isDegenerate'.
*
* @param inputSeq
* @param strandedness
* @param circularity
*/
public NucSeq( String inputSeq, boolean strandedness, boolean circularity ) {
super();
_seqDatum = new NucSeqDatum();
_datum = _seqDatum;
_datum.name = "nucseq";
_datum.dateCreated = new Date();
_datum.lastModified = new Date();
_seqDatum.uuid = _uuid;
_seqDatum.isSingleStranded = strandedness;
_seqDatum.isCircular = circularity;
lowerArray = new boolean[ inputSeq.length() ];
if ( !initiateNucSeq( inputSeq ) ) {
return;
}
createStartStopCodons();
//NEED TO USE A DIFFERENT OBJBASE CONSTRUCTOR TO SET HASH AS UUID
//_myUUID = generateUUIDAsHash(getSeq());
if ( translation == null ) {
makeTranslationHash();
}
}
//alternate constuctor if circularity and strandedness isn't specified
public NucSeq( String inputSeq ) {
this( inputSeq, false, false );
}
@Override
public ObjType getType() {
return ObjType.NUCSEQ;
}
/*
* Initialize values for start and stop codons
*/
private void createStartStopCodons()
{
_startCodons = new ArrayList<String>();
_startCodons.add("ATG");
_startCodons.add("GTG");
_startCodons.add("TGG");
_startCodons.add("RTG");
_stopCodons = new ArrayList<String>();
_stopCodons.add("TAA");
_stopCodons.add("TAG");
_stopCodons.add("TGA");
_stopCodons.add("TRA");
_stopCodons.add("TAR");
}
protected static ObjBase importFromHashMap( String uuid, HashMap<String, Object> objHash ) {
System.out.println( "working on " + uuid );
String name = (String) objHash.get( "name" );
String sequence = (String) objHash.get( "sequence" );
boolean locked = Boolean.parseBoolean( (String) objHash.get( "isLocked" ) );
//Pull all the annotations and add them to the list
@SuppressWarnings (value="unchecked")
ArrayList<String> arrannot = (ArrayList<String>) objHash.get( "annotations" );
HashSet<String> annots = new HashSet<String>();
if ( arrannot != null ) {
for ( String s : arrannot ) {
annots.add( s );
}
}
Date dateCreated = getDateFromString( (String) objHash.get( "_dateCreated" ) );
Date lastModified = getDateFromString( (String) objHash.get( "_lastModified" ) );
NucSeqDatum d = new NucSeqDatum();
d.uuid = uuid;
d.name = "sequence";
d.dateCreated = dateCreated;
d.lastModified = lastModified;
d.isLocked = locked;
d.annotations = annots;
return new NucSeq( d );
}
@Override
protected HashMap<String, HashMap<String, Object>> generateXml( HashMap<String, HashMap<String, Object>> allObjects ) {
//If the hash already has the object, skip adding anything
if ( allObjects.containsKey( getUUID() ) ) {
return allObjects;
}
//Fill in the individual fields
HashMap<String, Object> datahash = new HashMap<String, Object>();
datahash.put( "objType", getType().toString() );
datahash.put( "uuid", _seqDatum.uuid );
datahash.put( "name", _seqDatum.name );
datahash.put( "_dateCreated", getDateCreatedAsString() );
datahash.put( "_lastModified", getLastModifiedAsString() );
datahash.put( "sequence", _seqDatum.theSequence );
datahash.put( "annotations", _seqDatum.annotations );
for ( String s : _seqDatum.annotations ) {
allObjects = Collector.getAnnotation( s ).generateXml( allObjects );
}
//Add the HashMap to the list and return
allObjects.put( getUUID(), datahash );
return allObjects;
}
public boolean initiateNucSeq( String inputSeq ) {
if(inputSeq == null) {
_seqDatum.theSequence = "!";
setTransient();
return false;
}
char currentchar;
StringBuffer seq = new StringBuffer();
lowerArray = new boolean[ inputSeq.length() ];
//Check whether this is an RNA, DNA, or a bad seq, and if is degenerate:
loopy:
for ( int i = 0; i < inputSeq.length(); i++ ) {
currentchar = inputSeq.charAt( i );
char upperChar = Character.toUpperCase( currentchar );
//Put the case in a format array
if ( currentchar == upperChar ) {
lowerArray[i] = false;
} else {
lowerArray[i] = true;
}
//Build up the new format-free sequence
seq.append( upperChar );
switcheroo:
switch ( upperChar ) {
case '.':
_seqDatum.isLinear = true;
break;
case 'A':
break;
case 'C':
break;
case 'G':
break;
case 'T':
break;
case 'B':
_seqDatum.isDegenerate = true;
break;
case 'D':
_seqDatum.isDegenerate = true;
break;
case 'H':
_seqDatum.isDegenerate = true;
break;
case 'K':
_seqDatum.isDegenerate = true;
break;
case 'M':
_seqDatum.isDegenerate = true;
break;
case 'N':
_seqDatum.isDegenerate = true;
break;
case 'R':
_seqDatum.isDegenerate = true;
break;
case 'S':
_seqDatum.isDegenerate = true;
break;
case 'V':
_seqDatum.isDegenerate = true;
break;
case 'W':
_seqDatum.isDegenerate = true;
break;
case 'Y':
_seqDatum.isDegenerate = true;
break;
case 'U':
_seqDatum.isRNA = true;
break;
default:
System.out.println( "Nucseq had to break on " + upperChar );
_seqDatum.theSequence = "!";
setTransient();
return false;
}
}
_seqDatum.theSequence = seq.toString();
return true;
}
/**
* Finds indices of Open Reading Frames in a given nucleotide sequence and
* returns them as HashMap with start indices for keys and end indices for
* values.
*
* @param s String to check for ORFs
* @param forward Boolean, set to 'true' for finding forward reading frames
* or to 'false' to find ORFs in the reverse complement
*/
@SuppressWarnings (value="unchecked")
public HashMap findORFs(boolean forward, boolean multipleStartCodons) {
String seq = _seqDatum.theSequence;
int len = seqLength();
HashMap orfs = new HashMap();
if (isCircular()) {
seq = seq.concat(seq);
}
Pattern pattern = Pattern.compile(makeORFRegExp(multipleStartCodons, isDegenerate()), Pattern.CASE_INSENSITIVE);
Matcher matcher;
if (forward) {
matcher = pattern.matcher(seq);
}
else {
matcher = pattern.matcher(revComp());
}
int end;
int start;
int pos = 0;
while (matcher.find(pos)) {
start = matcher.start();
end = matcher.end();
if (end > len) {
end = end - len;
}
if (!(start >= len || matcher.group().length() > len)) {
if (forward) {
orfs.put(start, end);
}
else {
orfs.put(len - start, len - end);
}
}
pos = matcher.start() + 3;
}
return orfs;
}
/**
* Returns a regular expression for finding Open Reading Frames using
* data from the codon table
*/
private String makeORFRegExp(boolean msc, boolean allowDegen) {
String regexp = "(";
if (msc) {
for (int i = 0; i < _startCodons.size(); i++) {
if (i + 1 < _startCodons.size())
regexp = regexp + seqToRegExp(_startCodons.get(i), allowDegen) + "|";
else
regexp = regexp + seqToRegExp(_startCodons.get(i), allowDegen) + ")";
}
}
else {
regexp = regexp + seqToRegExp(_startCodons.get(0), allowDegen) + ")";
}
regexp = regexp + "(...)*?(";
for (int i = 0; i < _stopCodons.size(); i++) {
if (i + 1 < _stopCodons.size())
regexp = regexp + seqToRegExp(_stopCodons.get(i), allowDegen) + "|";
else
regexp = regexp + seqToRegExp(_stopCodons.get(i), allowDegen) + ")";
}
return regexp;
}
/**
* Takes a sequence and transforms it into a regular expression for
* searches.
*
* @param s String containing the sequence
*/
private String seqToRegExp(String seq, boolean degen) {
String regexp = "";
String c;
String prefix = "";
String suffix = "";
int rootstart = 0;
int rootend = seq.length();
if (seq.indexOf("<") != -1 || seq.indexOf(">") != -1) {
for (int i = 0; i < seq.length(); i++) {
c = seq.substring(i,i+1);
if (c.equalsIgnoreCase("<")) {
Pattern pattern = Pattern.compile(".", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(regexp);
while (matcher.find()) {
prefix = "(" + prefix + matcher.group() + ")?";
}
rootstart = i+1;
}
else if (c.equalsIgnoreCase(">")) {
Pattern pattern = Pattern.compile(".", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(seq.replaceFirst(regexp,""));
int counter = 0;
matcher.find();
while (matcher.find()) {
suffix = suffix + "(" + matcher.group();
counter++;
}
for (int j=0; j<counter; j++) {
suffix = suffix + ")?";
}
rootend = i;
}
else {
regexp = regexp + c;
}
}
}
//System.out.println("Is");
regexp = prefix + seq.substring(rootstart, rootend) + suffix;
if (degen) {
//The following block strings the regex and protects with @ symbol
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[aA][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[aA]", "[A@D@H@M@N@R@V@W]");
//System.out.println("here?C " + regexp.length());
// (regexp.matches("[a-z[A-Z]\\[\\]]*\\]?(?<!@)[cC]\\[?[a-z[A-Z]\\[\\]]*"))
regexp = regexp.replaceAll("(?<!@)[cC]", "[@BC@H@M@N@S@V@Y]");
//System.out.println("here?G " + regexp.length());
//if (regexp.matches("[a-z[A-Z]\\]]*(?<!@)[gG][a-z[A-Z]\\[]*"))
regexp = regexp.replaceAll("(?<!@)[gG]", "[@B@DG@K@N@R@S@V]");
//System.out.println("here?T " + regexp.length());
//if (regexp.matches("[a-z[A-Z]\\]]*(?<!@)[tT][a-z[A-Z]\\[]*"))
regexp = regexp.replaceAll("(?<!@)[tT]", "[@B@D@H@K@NT@W@Y]");
//System.out.println("here?U " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[uU][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[uU]", "[@B@D@H@K@NU@W@Y]");
//System.out.println("here?B " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[bB][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[bB]", "[BC@DG@H@K@M@N@R@STU@V@W@Y]");
//System.out.println("here?D " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[dD][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[dD]", "[ABDG@H@K@M@N@R@STU@V@W@Y]");
//System.out.println("here?H " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[hH][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[hH]", "[ABCDH@K@M@N@R@STU@V@W@Y]");
//System.out.println("here?K " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[kK][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[kK]", "[BDGHK@N@R@STU@V@W@Y]");
//System.out.println("here?M " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[mM][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[mM]", "[ABCDHM@N@R@S@V@W@Y]");
//System.out.println("here?N " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[nN][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[nN]", "[ABCDGHKMN@R@STU@V@W@Y]");
//System.out.println("here?R " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[rR][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[rR]", "[ABDGHKMNR@S@V@W]");
//System.out.println("here?S " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[sS][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[sS]", "[BCDGHKMNRS@V@Y]");
//System.out.println("here?V " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[vV][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[vV]", "[ABCDGHKMNRSV@W@Y]");
//System.out.println("here?W " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[wW][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[wW]", "[ABDHKMNRTUVW@Y]");
//System.out.println("here?Y " + regexp.length());
//if (regexp.matches("[a-z[A-Z]]*(?<!@)[yY][a-z[A-Z]]*"))
regexp = regexp.replaceAll("(?<!@)[yY]", "[BCDHKMNSTUVWY]");
}
else {
regexp = regexp.replaceAll("(?<!@)[bB]", "[CGTU]");
regexp = regexp.replaceAll("(?<!@)[dD]", "[AGTU]");
regexp = regexp.replaceAll("(?<!@)[hH]", "[ACTU]");
regexp = regexp.replaceAll("(?<!@)[kK]", "[GTU]");
regexp = regexp.replaceAll("(?<!@)[mM]", "[AC]");
regexp = regexp.replaceAll("(?<!@)[nN]", "[ACGTU]");
regexp = regexp.replaceAll("(?<!@)[rR]", "[AG]");
regexp = regexp.replaceAll("(?<!@)[sS]", "[CG]");
regexp = regexp.replaceAll("(?<!@)[vV]", "[ACG]");
regexp = regexp.replaceAll("(?<!@)[wW]", "[ATU]");
regexp = regexp.replaceAll("(?<!@)[yY]", "[CTU]");
degen = true;
}
//System.out.println("here?# " + regexp.length());
if (regexp.indexOf("#") != -1)
regexp = regexp.replaceAll("#", "[ABCDGHKMNRSTUVWY]*?");
//Unprotect
regexp = regexp.replaceAll("@", "");
//System.out.println("This Heap Space?");
return regexp;
}
/**
* Determines the GC content of a string of nucleotides, returns a double
* in the range from 0 to 1
*
*/
public double[] gcContent() {
String seq = _seqDatum.theSequence;
seq = seq.toUpperCase();
int len = seqLength();
double gcMin = 0;
double gcMax = 0;
double[] gc = new double[2];
for (int i = 0; i < len; i++) {
String n = seq.substring(i, i + 1);
if (n.matches("[CGS]")) {
gcMin++;
gcMax++;
}
else if (n.matches("[RYMKBDHVN]")) {
gcMax++;
}
}
gcMin = gcMin / len;
gcMax = gcMax / len;
gc[0] = gcMin;
gc[1] = gcMax;
return gc;
}
/**
* Determines the approximate melting point (Celsius) of a sequence of DNA
* using a Nearest-Neighbor method, assuming 1.0 M [NaCl] and
* 50 nM [primer].
*
*/
public double meltingTemp () {
/* Resources:
* http://en.wikipedia.org/wiki/DNA_melting#Nearest-neighbor_method
* http://www.basic.northwestern.edu/biotools/oligocalc.html
* http://dna.bio.puc.cl/cardex/servers/dnaMATE/tm-pred.html
*/
String seq = _seqDatum.theSequence;
int len = seqLength();
double concP = 50 * java.lang.Math.pow(10, -9);
double dH = 0;
double dS = 0;
double logCt = 0;
double R = 1.987;
double temp;
String pair;
seq = seq.toUpperCase();
// Checks terminal base pairs
char init = seq.charAt(0);
if (init == 'G' || init == 'C') {
dH += 0.1;
dS += -2.8;
}
else if (init == 'A' || init == 'T') {
dH += 2.3;
dS += 4.1;
}
init = seq.charAt(len - 1);
if (init == 'G' || init == 'C') {
dH += 0.1;
dS += -2.8;
}
else if (init == 'A' || init == 'T') {
dH += 2.3;
dS += 4.1;
}
// Checks nearest neighbor pairs
for (int i = 0; i < len - 1; i++) {
pair = seq.substring(i,i+2);
if (pair.equals("AA") || pair.equals("TT")) {
dH += -7.9;
dS += -22.2;
}
else if (pair.equals("AG") || pair.equals("CT")) {
dH += -7.8;
dS += -21.0;
}
else if (pair.equals("AT")) {
dH += -7.2;
dS += -20.4;
}
else if (pair.equals("AC") || pair.equals("GT") ) {
dH += -8.4;
dS += -22.4;
}
else if (pair.equals("GA") || pair.equals("TC")) {
dH += -8.2;
dS += -22.2;
}
else if (pair.equals("GG") || pair.equals("CC")) {
dH += -8.0;
dS += -19.9;
}
else if (pair.equals("GC")) {
dH += -9.8;
dS += -24.4;
}
else if (pair.equals("TA")) {
dH += -7.2;
dS += -21.3;
}
else if (pair.equals("TG") || pair.equals("CA")) {
dH += -8.5;
dS += -22.7;
}
else if (pair.equals("CG") ) {
dH += -10.6;
dS += -27.2;
}
}
// Checks for self-complementarity
int mid;
if (len % 2 == 0) {
mid = len / 2;
if (seq.substring(0, mid).equals(new NucSeq(seq.substring(mid,len)).revComp())) {
dS += -1.4;
}
}
else {
mid = (len - 1) / 2;
if (seq.substring(0, mid).equals(new NucSeq(seq.substring(mid + 1,len)).revComp())) {
dS += -1.4;
}
}
// dH is in kCal, dS is in Cal - equilibrating units
dH = dH * 1000;
// logCt = java.lang.Math.log(1 / concP);
logCt = java.lang.Math.log(concP);
temp = (dH / (dS + (R * logCt))) - 273.15;
//return temp;
return temp;
}
/**
* Reverse complement this NucSeq. The case will be saved with this
* operation, and the annotations will be repositioned.
*/
public void revCompThis() {
char currentchar;
StringBuffer seq = new StringBuffer();
boolean[] newLower = new boolean[ _seqDatum.theSequence.length() ];
for ( int x = (_seqDatum.theSequence.length() - 1); x >= 0; x-- ) {
currentchar = _seqDatum.theSequence.charAt( x );
char appendChar = ' ';
switch ( currentchar ) { // (Assume N is an integer variable.)
case 'A':
if ( _seqDatum.isRNA ) {
appendChar = 'U';
} else {
appendChar = 'T';
}
break;
case 'T':
appendChar = 'A';
break;
case 'C':
appendChar = 'G';
break;
case 'G':
appendChar = 'C';
break;
case '&':
appendChar = '&';
break;
case 'R':
appendChar = 'Y';
break;
case 'Y':
appendChar = 'R';
break;
case 'M':
appendChar = 'K';
break;
case 'K':
appendChar = 'M';
break;
case 'W':
appendChar = 'W';
break;
case 'S':
appendChar = 'S';
break;
case 'B':
appendChar = 'V';
break;
case 'D':
appendChar = 'H';
break;
case 'H':
appendChar = 'D';
break;
case 'V':
appendChar = 'B';
break;
case 'N':
appendChar = 'N';
break;
case 'U':
appendChar = 'A';
break;
default:
break;
}
seq.append( appendChar );
if ( lowerArray[x] ) {
appendChar = Character.toLowerCase( appendChar );
newLower[_seqDatum.theSequence.length() - x - 1] = true;
} else {
newLower[_seqDatum.theSequence.length() - x - 1] = false;
}
}
//update the sequence
lowerArray = newLower;
if(changeSeq(seq.toString())) {
//Invert all annotations
for ( String s : _seqDatum.annotations ) {
Annotation an = Collector.getAnnotation( s );
an.invert( seq.length() );
}
}
}
/**
* Subroutine revComp returns the reverse complement of theSequence,
* in all uppercase as a String. To actually reverse complement this
* NucSeq, and also invert its annotations, use revCompThis instead.
*
* @return a String that is the reverse complement
*/
public String revComp() {
StringBuffer seq = new StringBuffer();
char currentchar;
for ( int x = (_seqDatum.theSequence.length() - 1); x >= 0; x-- ) {
currentchar = _seqDatum.theSequence.charAt( x );
char outchar = ' ';
switch ( currentchar ) { // (Assume N is an integer variable.)
case 'A':
if ( _seqDatum.isRNA ) {
outchar = 'U';
} else {
outchar = 'T';
}
break;
case 'T':
outchar = 'A';
break;
case 'C':
outchar = 'G';
break;
case 'G':
outchar = 'C';
break;
case '&':
outchar = '&';
break;
case 'R':
outchar = 'Y';
break;
case 'Y':
outchar = 'R';
break;
case 'M':
outchar = 'K';
break;
case 'K':
outchar = 'M';
break;
case 'W':
outchar = 'W';
break;
case 'S':
outchar = 'S';
break;
case 'B':
outchar = 'V';
break;
case 'D':
outchar = 'H';
break;
case 'H':
outchar = 'D';
break;
case 'V':
outchar = 'B';
break;
case 'N':
outchar = 'N';
break;
case 'U':
outchar = 'A';
break;
default:
break;
}
seq.append( outchar );
} // end for loop
return seq.toString();
}
/**
* Get the NucSeq in Genbank format from the user currently logged in
* @return a String in Genbank format
*/
public String getGenbank() {
return getGenbank( new Person[]{ Collector.getCurrentUser() } );
}
/**
* Get the NucSeq in Genbank format with annotations from a specific
* list of users
* @param users
* @return a String in Genbank format
*/
public String getGenbank( Person[] users ) {
return "not yet implemented";
}
/**
* Annotate the NucSeq from the complete database worth of features
* If Person is null, uses the current user from Collector
*
* The Person is used as the author of the Annotation
*/
public void autoAnnotate( Person user) {
System.out.println( "I'm autoannotating your nucSeq from all database features" );
autoAnnotate( null, user, false );
}
/**
* Annotate the NucSeq from the features in a particular Collection
* If Person is null, uses the current user from Collector
*
* The Person is used as the author of the Annotation
* @param col
*/
public void autoAnnotate( Collection col, Person user ) {
HashSet<String> allfeatures = col.getAllLinksOf( ObjType.FEATURE );
System.out.println( "Autoannotating with all features from a particular collection:" );
for ( String s : allfeatures ) {
System.out.println( "autoannotate with " + s );
}
autoAnnotate( allfeatures, user, true );
}
/**
* Relay for other two autoAnnotate calls, but can also use this in Apps directly.
* For using all features in the database, set constrainTo == false.
*
* To search a particularly Collection,
* call autoAnnotate ( Collection col, Person user ). To search all collections of
* a particular user call autoAnnotate ( Person user )
*
* The Person is used as the author of the Annotation
*
* @param onlyFeatures the list of Feature UUIDs requested for autoannotation
* @param user the Person to be set as author of the Annotation
* @param constrainTo true if should constrain annotations to supplied list, otherwise false
*/
public void autoAnnotate( HashSet<String> onlyFeatures, Person user, Boolean constrainTo ) {
if ( !featuresInitiated ) {
initiateFeatureTable();
}
String revcomp = this.revComp();
for ( int i = 0; i < featureTable.length; i++ ) {
if(constrainTo) {
if ( onlyFeatures != null ) {
if ( !onlyFeatures.contains( featureTable[i][0] ) ) {
System.out.println( featureTable[i][0] + " is not requested" );
continue;
}
}
}
try {
testFeature(featureTable[i][2], featureTable[i][0], user, revcomp );
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* Iterated method that compares one feature sequence to the target
*
* @param teststring
* @param uuid
* @param user
* @param revcomp
*/
private void testFeature(String teststring, String uuid, Person user, String revcomp ) {
Pattern p = Pattern.compile(teststring);
//Check Feature exact matches in forward orientation
String[] text = {_seqDatum.theSequence};
for (int i = 0; i < text.length; i++) {
Matcher matcher = p.matcher(text[i]);
while (matcher.find()) {
System.out.println( "start=" + matcher.start() + " end = " + matcher.end());
Feature f = Collector.getFeature( uuid );
if(f==null || f.isDeleted()) {
return;
}
int start = matcher.start();
int end = matcher.end();
if(f.isCDS()) {
try {
//For CDS features, if the 5' sequences is a start codon, include that in annotation
String fiveprime = text[i].substring(start-3, start);
System.out.println("fiveprime is " + fiveprime);
if(fiveprime.equals("ATG") || fiveprime.equals("TTG") || fiveprime.equals("GTG")) {
start = start-3;
}
} catch(Exception e) {
}
try {
//For CDS features, if the 3' sequences is a stop codon, include that in annotation
String threeprime = text[i].substring(end, end+3);
System.out.println("threeprime is " + threeprime);
if(threeprime.equals("TAA") || threeprime.equals("TGA") || threeprime.equals("TAG")) {
end = end+3;
}
} catch(Exception e) {
}
}
Annotation annot = new Annotation( f, this, null, null, start, end, user, true, null );
System.out.println( "I found a forward annotation at " + start );
setChanged(org.clothocore.api.dnd.RefreshEvent.Condition.ANNOTATION_TO_NUCSEQ);
}
}
//Check it as reverse complement
String[] text2 = {revcomp};
for (int i = 0; i < text.length; i++) {
Matcher matcher = p.matcher(text2[i]);
while (matcher.find()) {
System.out.println( "start=" + matcher.start() + " end = " + matcher.end());
Feature f = Collector.getFeature( uuid );
if(f==null || f.isDeleted()) {
return;
}
int index = _seqDatum.theSequence.length() - matcher.start();
int start = index - teststring.length();
int end = index;
if(f.isCDS()) {
try {
//For CDS features, if the 5' sequences is a an RC stop codon, include it
String fiveprime = text[i].substring(start-3, start);
System.out.println("fiveprime is " + fiveprime);
if(fiveprime.equals("TTA") || fiveprime.equals("TCA") || fiveprime.equals("CTA")) {
start = start-3;
}
} catch(Exception e) {
}
try {
//For CDS features, if the 3' sequences is a an RC start codon, include that in annotation
String threeprime = text[i].substring(end, end+3);
System.out.println("threeprime is " + threeprime);
if(threeprime.equals("CAT") || threeprime.equals("CAA") || threeprime.equals("CAC")) {
end = end+3;
}
} catch(Exception e) {
}
}
Annotation annot = new Annotation( f, this, null, null, start, end, user, false, null );
System.out.println( "I found a reverse annotation at " + start );
setChanged(org.clothocore.api.dnd.RefreshEvent.Condition.ANNOTATION_TO_NUCSEQ);
}
}
}
public void removeAnnotations() {
for ( String s : _seqDatum.annotations ) {
_seqDatum.removeAnnotations.add( s );
}
_seqDatum.annotations = new HashSet<String>();
setChanged(org.clothocore.api.dnd.RefreshEvent.Condition.ANNOTATION_TO_NUCSEQ);
}
/**
* Recursively save all child elements and then call ObjBase to save itself.
*/
@Override
public synchronized boolean save( ClothoConnection conn ) {
System.out.println( "============ Starting nucseq save of " + getUUID() + " size " + _seqDatum.annotations.size() );
if ( !isChanged() ) {
System.out.println( "nucseq didn't require saving" );
return true;
}
//Temporarily hide the annotations
HashSet<String> tempannots = _seqDatum.annotations;
_seqDatum.annotations = new HashSet<String>();
//Save the sequences without the annotations
if(!super.save( conn )) {
return false;
}
//Re-add the annotations and redo the saves
_seqDatum.annotations = tempannots;
for ( String s : _seqDatum.annotations ) {
Annotation att = Collector.getAnnotation( s );
System.out.println( "nucseq save saving annotation " + att.getUUID() );
if ( att == null ) {
return false;
}
if ( !att.save( conn ) ) {
return false;
}
}
for ( String s : _seqDatum.removeAnnotations ) {
Annotation att = Collector.getAnnotation( s );
System.out.println( "nucseq save deleting annotation " + att.getUUID() );
if ( att != null ) {
if ( !att.deleteFromDatabase() ) {
return false;
}
}
_seqDatum.removeAnnotations = new HashSet<String>();
}
return super.save( conn );
}
@Override
public boolean addObject( ObjBase dropObject ) {
return false;
}
void setLocked( boolean isit ) {
_seqDatum.isLocked = isit;
}
/**
* Add a user-defined non-Feature Annotation
* @param uuid
*/
void addAnnotationLink( String uuid ) {
_seqDatum.annotations.add( uuid );
System.out.println("NucSeq " + this.getUUID() + " added: " + uuid);
setChanged(Condition.ANNOTATION_TO_NUCSEQ);
}
public ArrayList<Integer> find( NucSeq seq ) {
ArrayList<Integer> out = new ArrayList<Integer>();
String testSeq = seq._seqDatum.theSequence;
int start = 0;
searchforward:
while ( true ) {
String test = _seqDatum.theSequence.substring( start );
System.out.println( test.substring( 0, 100 ) );
int b = test.indexOf( testSeq );
if ( b > 0 ) {
out.add( b + start );
System.out.println( "for adding: " + b );
start += b;
start += 1;
System.out.println( "New start: " + start );
} else {
break searchforward;
}
}
testSeq = seq.revComp();
start = 0;
searchreverse:
while ( true ) {
String test = _seqDatum.theSequence.substring( start );
System.out.println( test.substring( 0, 100 ) );
int b = test.indexOf( testSeq );
if ( b > 0 ) {
out.add( b + start );
System.out.println( "for adding: " + b );
start += b;
start += 1;
System.out.println( "New start: " + start );
} else {
break searchreverse;
}
}
if ( out.size() == 0 ) {
System.out.println( "I didn't find any" );
}
return out;
}
public String translate( int frame ) {
return translate( frame, _seqDatum.theSequence.length() );
}
public String translate( int start, int end ) {
int extra = (end - start) % 3;
if ( extra % 3 > 0 ) {
System.out.println( "You gave me an invalid translation query: " + _seqDatum.theSequence.substring( start, end ) );
return "*";
}
String seq = _seqDatum.theSequence.substring( start, end );
int value;
int i = 0;
String acodon = "";
String outSeq = "";
while ( i < seq.length() ) {
acodon = seq.substring( i, i + 3 );
if ( translation.containsKey( acodon ) ) {
int anum = translation.get( acodon );
outSeq += (char) anum;
} else {
outSeq += "?";
}
i = i + 3;
}
return outSeq;
}
public char getCharAt( int i ) {
return _seqDatum.theSequence.charAt( i );
}
public int seqLength() {
return _seqDatum.theSequence.length();
}
public boolean isDegenerate() {
return _seqDatum.isDegenerate;
}
public boolean isRNA() {
return _seqDatum.isRNA;
}
public boolean isLocked() {
return _seqDatum.isLocked;
}
public boolean isCircular() {
return _seqDatum.isCircular;
}
public boolean isSingleStranded() {
return _seqDatum.isSingleStranded;
}
public HashSet<Annotation> getAnnotations() {
HashSet<Annotation> out = new HashSet<Annotation>();
for ( String s : _seqDatum.annotations ) {
System.out.println("retrieving an annoation link: " + s);
out.add( Collector.getAnnotation( s ) );
}
return out;
}
public HashSet<String> getAnnotationLinks() {
return this._seqDatum.annotations;
}
/**
* Returns the user-formatted version of the String
* @return
*/
@Override
public String toString() {
StringBuffer seq = new StringBuffer();
for ( int i = 0; i < _seqDatum.theSequence.length(); i++ ) {
char letter = _seqDatum.theSequence.charAt( i );
if ( lowerArray[i] ) {
letter = Character.toLowerCase( letter );
}
seq.append( letter );
}
return seq.toString();
}
/**
* Returns the unformatted all-caps string, used
* for bioinformaticcy treatment
*/
public String getSeq() {
return _seqDatum.theSequence;
}
/**
* Returns the unformatted all-caps string with all
* degeneracy positions replaced by regex
* @return
*/
public String getMatcher() {
String out = _seqDatum.theSequence;
out.replaceAll("N", ".");
out.replaceAll("R", ".");
out.replaceAll("K", ".");
out.replaceAll("S", ".");
return out;
}
/**
* Change the sequence of this NucSeq. Parts, Vectors,
* features, and oligos "lock" their NucSeq...you must
* call changeSeq from the part, vector, Feature, or oligo
* to change their sequence.
*
* @param newseq
*/
public boolean changeSeq( String newseq ) {
if ( _seqDatum.isLocked ) {
return false;
}
//ADD UNDO HERE FOR THESEQUENCE AND ANNOTATIONS, THEN CLEAR ANNOTATIONS
return APIchangeSeq( newseq );
}
boolean APIchangeSeq( String newseq ) {
if(initiateNucSeq( newseq )) {
setChanged(Condition.SEQUENCE_CHANGED);
return true;
} else {
fireData(new RefreshEvent(this, Condition.SEQUENCE_CHANGED));
return false;
}
}
static {
makeTranslationHash();
}
private static void makeTranslationHash() {
translation = new Hashtable<String, Integer>();
translation.put( "TTT", new Integer( 70 ) );
translation.put( "TTC", new Integer( 70 ) );
translation.put( "TTA", new Integer( 76 ) );
translation.put( "TTG", new Integer( 76 ) );
translation.put( "CTT", new Integer( 76 ) );
translation.put( "CTC", new Integer( 76 ) );
translation.put( "CTA", new Integer( 76 ) );
translation.put( "CTG", new Integer( 76 ) );
translation.put( "ATT", new Integer( 73 ) );
translation.put( "ATC", new Integer( 73 ) );
translation.put( "ATA", new Integer( 73 ) );
translation.put( "ATG", new Integer( 77 ) );
translation.put( "GTT", new Integer( 86 ) );
translation.put( "GTC", new Integer( 86 ) );
translation.put( "GTA", new Integer( 86 ) );
translation.put( "GTG", new Integer( 86 ) );
translation.put( "TCT", new Integer( 83 ) );
translation.put( "TCC", new Integer( 83 ) );
translation.put( "TCA", new Integer( 83 ) );
translation.put( "TCG", new Integer( 83 ) );
translation.put( "CCT", new Integer( 80 ) );
translation.put( "CCC", new Integer( 80 ) );
translation.put( "CCA", new Integer( 80 ) );
translation.put( "CCG", new Integer( 80 ) );
translation.put( "ACT", new Integer( 84 ) );
translation.put( "ACC", new Integer( 84 ) );
translation.put( "ACA", new Integer( 84 ) );
translation.put( "ACG", new Integer( 84 ) );
translation.put( "GCT", new Integer( 65 ) );
translation.put( "GCC", new Integer( 65 ) );
translation.put( "GCA", new Integer( 65 ) );
translation.put( "GCG", new Integer( 65 ) );
translation.put( "TAT", new Integer( 89 ) );
translation.put( "TAC", new Integer( 89 ) );
translation.put( "TAA", new Integer( 42 ) );
translation.put( "TAG", new Integer( 42 ) );
translation.put( "CAT", new Integer( 72 ) );
translation.put( "CAC", new Integer( 72 ) );
translation.put( "CAA", new Integer( 81 ) );
translation.put( "CAG", new Integer( 81 ) );
translation.put( "AAT", new Integer( 78 ) );
translation.put( "AAC", new Integer( 78 ) );
translation.put( "AAA", new Integer( 75 ) );
translation.put( "AAG", new Integer( 75 ) );
translation.put( "GAT", new Integer( 68 ) );
translation.put( "GAC", new Integer( 68 ) );
translation.put( "GAA", new Integer( 69 ) );
translation.put( "GAG", new Integer( 69 ) );
translation.put( "TGT", new Integer( 67 ) );
translation.put( "TGC", new Integer( 67 ) );
translation.put( "TGA", new Integer( 42 ) );
translation.put( "TGG", new Integer( 87 ) );
translation.put( "CGT", new Integer( 82 ) );
translation.put( "CGC", new Integer( 82 ) );
translation.put( "CGA", new Integer( 82 ) );
translation.put( "CGG", new Integer( 82 ) );
translation.put( "AGT", new Integer( 83 ) );
translation.put( "AGC", new Integer( 83 ) );
translation.put( "AGA", new Integer( 82 ) );
translation.put( "AGG", new Integer( 82 ) );
translation.put( "GGT", new Integer( 71 ) );
translation.put( "GGC", new Integer( 71 ) );
translation.put( "GGA", new Integer( 71 ) );
translation.put( "GGG", new Integer( 71 ) );
}
public static void refreshFeatureTable() {
generateFeatureTable( false, true );
}
public static void initiateFeatureTable() {
generateFeatureTable( true, false );
}
private static void generateFeatureTable( boolean init, boolean backgroundMode ) {
if ( initiating ) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
dialog = new WaitingDialog( new javax.swing.JFrame(), "Clotho is pulling down all features in preparation for autoannotation, this will take a moment. Hold on.", true );
}
});
}
initiating = true;
if ( init ) {
if ( featuresInitiated ) {
return;
}
}
if ( !backgroundMode ) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
dialog = new WaitingDialog( new javax.swing.JFrame(), "Clotho is pulling down all features in preparation for autoannotation, this will take a moment. Hold on.", true );
}
});
}
featureTable = Hub.defaultConnection.getTableAsArray( ObjType.FEATURE );
for ( int i = 0; i < featureTable.length; i++ ) {
try {
String astring = featureTable[i][2];
String string2 = astring.toUpperCase();
featureTable[i][2] = string2.replaceAll("N", ".");
} catch(Exception e) {
featureTable[i][2] = "XXXXXXXXXXXXXXXXXX";
}
}
featuresInitiated = true;
if ( dialog != null ) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
dialog.dispose();
}
});
}
initiating = false;
}
/**
* Relayed from feature constructors to add the local memory feature to the table of autoannotations
* @param afeature
*/
static void addFeatureToTable(Feature afeature) {
if(afeature==null) {
return;
}
//Transfer old data into new table with one more row
int newsize = featureTable.length +1;
String[][] newtable = new String[newsize][5];
for ( int i = 0; i < featureTable.length; i++ ) {
newtable[i][2] = featureTable[i][2];
}
//Add the data to the new row
String seq = afeature.getSeq().getSeq();
featureTable[featureTable.length][2] = seq.replaceAll("N", ".");
}
/**
* This is the general method called to perform all biofafety checks
* on a DNA sequence. Called from part, vector, and Feature factory
* methods.
*
* @return the biosafety level of this NucSeq
*/
Short performBiosafetyCheck() {
System.out.println( "performBiosafetyCheck triggered" );
short rg = -1;
rg = getBSLfromServer();
//If it's RG3+, show a special message
if ( rg == 4 ) {
ImageIcon bslicon = ImageUtilities.loadImageIcon( "org/clothocore/images/BIOHAZARD.png", false );
JOptionPane.showMessageDialog( null,
"You have executed a risk group check on a sequence that came back\n"
+ "as Risk Group 4.\nSuch materials could be highly dangerous!\n"
+ "You should examine your design closer before proceeding.",
"Risk Group 4 material!",
JOptionPane.INFORMATION_MESSAGE,
bslicon );
}
if ( rg == 5 ) {
ImageIcon bslicon = ImageUtilities.loadImageIcon( "org/clothocore/images/BIOHAZARD.png", false );
JOptionPane.showMessageDialog( null,
"You have executed a risk group check on a sequence that came back\n"
+ "as being highly similar to a select agent.\nSuch materials could be highly dangerous and potential illegal!\n"
+ "You should examine your design closer before proceeding.",
"Select Agent detected!",
JOptionPane.INFORMATION_MESSAGE,
bslicon );
}
return rg;
}
/**
* This queries the bsl server and parses the xml to
* get the biosafety level of the NucSeq
*
* @return the biosafety level of this NucSeq
*/
private short getBSLfromServer(){
//If it's already failed 3 times, then don't bother anymore
if(failCount>3) {
return -1;
}
//Form the URL query
String seq = getSeq();
URL urlRobot;
try {
String urlstr = _BSLServerURL + "\"" + seq + "\"";
urlRobot = new URL(urlstr);
} catch (Exception e) {
e.printStackTrace();
return -1;
}
XMLParser myParser = null;
try {
//Starts reading the URL
InputStream urlRobotStream = urlRobot.openStream();
//Read the file and access it in an xmlParser, then close it
try {
myParser = new XMLParser(urlRobotStream, "output" );
} catch(Exception e) {
System.out.println("Biosafety server data could not be parsed");
failCount++;
updateBSLServer(_updateBSLURL1);
return -1;
}
urlRobotStream.close();
} catch (java.net.ConnectException e) {
System.out.println("Biosafety server not available");
//Keeps count of failures. Once pass 3 give up.
failCount++;
updateBSLServer(_updateBSLURL1);
return -1;
} catch (java.io.IOException ex) {
System.out.println("Biosafety server not available");
failCount++;
updateBSLServer(_updateBSLURL1);
return -1;
}
if(myParser==null) {
System.out.println("Biosafety server information could not be parsed");
failCount++;
updateBSLServer(_updateBSLURL1);
return-1;
}
try {
String bslvalue = myParser.getFirstTag("bsl");
short bsl = Short.parseShort(bslvalue);
System.out.println("Biosafety server returning risk group " + bsl);
if(bsl == (short) 0) {
bsl=1;
}
return bsl;
} catch(Exception e) {
failCount++;
updateBSLServer(_updateBSLURL1);
return -1;
}
}
/**
* The first time this class is called, set the biosafety BLAST
* server from preferences. If no preference is set, try retrieving
* it from an XML file online
*/
static {
String tempurl = Collator.getPreference("NucSeqBSLServerAddress");
try {
URL url = new URL(tempurl);
} catch (Exception ex) {
updateBSLServer("http://www.bu.edu/ece-clotho/xmlfes/updatebsl.xml");
}
}
/**
* Request that NucSeq update its biosafety server.
*/
public static void updateBSLServer(String url) {
//Form URL of the file
URL urlRobot = null;
try {
urlRobot = new URL(url);
} catch (Exception e) {
e.printStackTrace();
if(!url.equals(_updateBSLURL2)) {
updateBSLServer(_updateBSLURL2);
return;
}
}
XMLParser myParser = null;
try {
//Starts reading the URL
InputStream urlRobotStream = urlRobot.openStream();
//Read the file and access it in an xmlParser, then close it
myParser = new XMLParser(urlRobotStream, "update" );
urlRobotStream.close();
} catch (Exception e) {
e.printStackTrace();
if(!url.equals(_updateBSLURL2)) {
updateBSLServer(_updateBSLURL2);
return;
}
}
if(myParser==null) {
return;
}
String newurl = myParser.getFirstTag("url");
try {
URL urly = new URL(newurl);
} catch (MalformedURLException ex) {
return;
}
Collator.putPreference("NucSeqBSLServerAddress", newurl);
_BSLServerURL = newurl;
System.out.println("The new BSL server from " + urlRobot.getPath() + " is " + newurl);
}
/*-----------------
variables
-----------------*/
private static final String _updateBSLURL1 = "http://www.bu.edu/ece-clotho/xmlfiles/updatebsl.xml";
private static final String _updateBSLURL2 = "http://andersonlab.qb3.berkeley.edu/Software/updatebsl.xml";
private static short failCount = 0;
private static String _BSLServerURL = "http://cidar1.bu.edu/cgi-bin/tst.pl?";
private static Hashtable<String, Integer> translation;
protected boolean[] lowerArray;
private NucSeqDatum _seqDatum;
public static class NucSeqDatum extends ObjBaseDatum {
public boolean isDegenerate = false; // if it has N's or R's and so on
public boolean isRNA = false; // if it has U's
public boolean isLinear = false; // if it '.''s
public boolean isSingleStranded = false; // if its an oligo
public boolean isCircular = false; // if its a plasmid
public String theSequence;
public HashSet<String> annotations = new HashSet<String>(); //The list of annoations
public HashSet<String> removeAnnotations = new HashSet<String>(); // ? What is is this?
public boolean isLocked = false;
@Override
public ObjType getType() {
return ObjType.NUCSEQ;
}
}
private static boolean featuresInitiated = false;
private static boolean initiating = false;
private static String[][] featureTable;
private static WaitingDialog dialog;
private ArrayList<String> _stopCodons;
private ArrayList<String> _startCodons;
/******* FIELDS *******/
public static enum Fields {
NAME,
DATE_CREATED,
LAST_MODIFIED,
SEQUENCE,
VECTORS,
PARTS,
ANNOTATIONS,
FEATURES,
OLIGOS
}
}