/*
* Copyright (C) Lennart Martens
*
* Contact: lennart.martens AT UGent.be (' AT ' to be replaced with '@')
*/
/*
* Created by IntelliJ IDEA.
* User: Lennart
* Date: 7-okt-02
* Time: 10:26:23
*/
package com.compomics.util.protein;
import org.apache.log4j.Logger;
import java.io.BufferedReader;
import java.io.StringReader;
import java.io.IOException;
import java.io.PrintWriter;
/*
* CVS information:
*
* $Revision: 1.3 $
* $Date: 2007/07/06 09:41:53 $
*/
/**
* This class implements the behaviour for a Protein instance.
* A lot of functionality is borrowed from the AASequenceImpl class.
* The internal representation corresponds most closely to a FASTA
* entry.
*
* @author Lennart Martens
* @see com.compomics.util.protein.AASequenceImpl
*/
public class Protein {
// Class specific log4j logger for Protein instances.
Logger logger = Logger.getLogger(Protein.class);
/**
* The sequence is a very important element in a Protein instance.
*/
private AASequenceImpl iSequence = null;
/**
* The header for this Protein.
*/
private Header iHeader = null;
/**
* This flag indicates whether the protein has been truncated at
* any point. This is important when considering enzymatic cleavage
* of these proteins, since their C-terminal part no longer
* makes sense - it has been blunted by the truncation process!
*/
private boolean iTruncated = false;
/**
* This int will indicate at which position a protein has been truncated.
* This code is only meaningful when the 'isTruncated()' method returns 'true'.
*/
private int iTruncationPosition = 0;
/**
* The code for an N-terminal truncation.
* These variables can be compared to the result of the
* 'getTruncationPosition()' method, BUT ONLY when the
* 'isTruncated()' method returns true.
*/
public static final int NTERMTRUNC = 1;
/**
* The code for an C-terminal truncation.
* These variables can be compared to the result of the
* 'getTruncationPosition()' method, BUT ONLY when the
* 'isTruncated()' method returns true.
*/
public static final int CTERMTRUNC = 2;
/**
* This constructor requires an AASequenceImpl as argument.
* This will be the sequence around which the protein will be built.
*
* @param aSequence AASequenceImpl around which this protein will be built.
*/
public Protein(AASequenceImpl aSequence) {
this(null, aSequence);
}
/**
* This constructor allows the passing of a Header, as well as an
* AASequenceImpl for this Protein.
*
* @param aHeader Header with the header information for this Protein.
* @param aSequence AASequenceImpl with the sequence for this Protein.
*/
public Protein(Header aHeader, AASequenceImpl aSequence) {
this(aHeader, aSequence, false, 0);
}
/**
* This constructor allows the passing of a Header, as well as an
* AASequenceImpl for this Protein. It also allows for the specification
* of the 'truncatedness' of the Protein.
*
* @param aHeader Header with the header information for this Protein.
* @param aSequence AASequenceImpl with the sequence for this Protein.
* @param aTruncated boolean that indicates whether this Protein has been truncated.
* @param aTruncationPosition int with the coded position for the truncation (N-Term or C-Term).
*/
public Protein(Header aHeader, AASequenceImpl aSequence, boolean aTruncated, int aTruncationPosition) {
this.iSequence = aSequence;
this.iHeader = aHeader;
this.iTruncated = aTruncated;
this.iTruncationPosition = aTruncationPosition;
}
/**
* This constructor allows for the construction of a Protein instance
* by passing a FASTA entry.
*
* @param aFASTAString String with the FASTA representation of the Protein.
*/
public Protein(String aFASTAString) {
this(aFASTAString, false, 0);
}
/**
* This constructor allows for the construction of a Protein instance
* by passing a FASTA entry and boolean flag for truncation.
*
* @param aFASTAString String with the FASTA representation of the Protein.
* @param aTruncated boolean that indicates whether this Protein has been truncated.
* @param aTruncationPosition int with the coded position for the truncation (N-Term or C-Term).
*/
public Protein(String aFASTAString, boolean aTruncated, int aTruncationPosition) {
try {
// Parse the FASTA entry.
BufferedReader br = new BufferedReader(new StringReader(aFASTAString));
// First line is the header.
this.iHeader = Header.parseFromFASTA(br.readLine());
// Next, read the remaining lines that make up the sequence.
StringBuffer lSB = new StringBuffer();
String line = null;
while((line = br.readLine()) != null) {
lSB.append(line);
}
// Initialize the sequence.
this.iSequence = new AASequenceImpl(lSB.toString());
// Initialize truncation.
this. iTruncated = aTruncated;
this.iTruncationPosition = aTruncationPosition;
} catch(IOException ioe) {
// We certainly do NONT expect an IOException...
throw new IllegalArgumentException("Unable to process your FASTA String ('" + aFASTAString + "'). IOException: " + ioe.getMessage() + ".");
}
}
/**
* This constructor allows for the construction of a Protein instance through the
* passing of a Header String and a Sequence String. This is mainly useful to obtain
* a Protein instance without a Header.
*
* @param aHeader String with the header (can be 'null').
* @param aSequence String with the sequence.
*/
public Protein(String aHeader, String aSequence) {
this(aHeader, aSequence, false, 0);
}
/**
* This constructor allows for the construction of a Protein instance through the
* passing of a Header String and a Sequence String. This is mainly useful to obtain
* a Protein instance without a Header.
*
* @param aHeader String with the header (can be 'null').
* @param aSequence String with the sequence.
* @param aTruncated boolean that indicates whether this Protein has been truncated.
* @param aTruncationPosition int with the coded position for the truncation (N-Term or C-Term).
*/
public Protein(String aHeader, String aSequence, boolean aTruncated, int aTruncationPosition) {
this.iHeader = Header.parseFromFASTA(aHeader);
this.iSequence = new AASequenceImpl(aSequence);
this.iTruncated = aTruncated;
this.iTruncationPosition = aTruncationPosition;
}
/**
* This method reports on the header for the current
* protein.
*
* @return Header with the current header for this protein.
*/
public Header getHeader() {
return this.iHeader;
}
/**
* This method reports on the sequence for the current
* protein.
*
* @return AASequenceImpl with the current sequence for this protein.
*/
public AASequenceImpl getSequence() {
return this.iSequence;
}
/**
* This method truncates the sequence for this protein on the N-terminus
* to the requested size.
*
* @param aSize int with the size of the resulting N-terminal sequence
* @return Protein with an N-terminal truncated sequence.
*/
public Protein getNTermTruncatedProtein(int aSize) {
// First get the current sequence.
AASequenceImpl sequence = this.getSequence();
// Extract start and end locations.
// Start is 1 (human readable!), unless a startlocation is specified for
// the current protein, in which case the current startlocation is kept.
int start = this.getHeader().getStartLocation();
if(start < 0) {
start = 1;
}
// The endlocation is (start + truncated length).
// Where truncation length is the
sequence = sequence.getNTermTruncatedSequence(aSize);
int end = start + sequence.getLength() - 1;
// Get a copy of the header and set the location.
Header header = (Header)this.getHeader().clone();
header.setLocation(start, end);
// See if we should flag the truncatedness.
boolean flag = false;
if(this.getLength() > aSize) {
flag = true;
}
// Return the newly constructed Protein.
return new Protein(header, sequence, flag, Protein.NTERMTRUNC);
}
/**
* This method truncates the sequence for this protein on the C-terminus
* to the requested size.
*
* @param aSize int with the size of the resulting C-terminal sequence
* @return Protein with an C-terminal truncated sequence.
*/
public Protein getCTermTruncatedProtein(int aSize) {
// First get the current sequence.
AASequenceImpl sequence = this.getSequence();
// Extract start and end locations.
// End location is the current endlocation, or, if there isn't any,
// the length of the sequence.
int end = this.getHeader().getEndLocation();
if(end < 0) {
end = sequence.getLength();
}
// Start is (last residu minus truncated size).
sequence = sequence.getCTermTruncatedSequence(aSize);
int start = end - sequence.getLength() + 1;
// get a copy of the header and set the location.
Header header = (Header)this.getHeader().clone();
header.setLocation(start, end);
// See if we should flag the truncatedness.
boolean flag = false;
if(this.getLength() > aSize) {
flag = true;
}
// Return the newly constructed Protein.
return new Protein(header, sequence, flag, Protein.CTERMTRUNC);
}
/**
* This method reports on the length of the sequence for the current protein.
*
* @return long with the length of the sequence for the current protein.
*/
public long getLength() {
return this.getSequence().getLength();
}
/**
* This method returns the protein weight in Da.
*
* @return double with the mass of the Protein in Da.
*/
public double getMass() {
return this.getSequence().getMass();
}
/**
* Simple setter for the header.
*
* @param aHeader the Header to set for this protein.
*/
public void setHeader(Header aHeader) {
this.iHeader = aHeader;
}
/**
* Simple setter for the sequence.
*
* @param aSequence the AASequenceImpl with the
* sequence to set for this protein.
*/
public void setSequence(AASequenceImpl aSequence) {
this.iSequence = aSequence;
}
/**
* This method can be used to append this protein to the
* FASTA DB flatfile the PrintWriter points to.
*
* @param aOut PrintWriter to write the file to.
* @exception IOException when the writing failed.
*/
public void writeToFASTAFile(PrintWriter aOut) throws IOException {
aOut.println(this.getHeader().getAbbreviatedFASTAHeaderWithAddenda());
StringBuffer sequence = new StringBuffer(this.getSequence().getSequence());
// Next we want to ensure only 60 characters are present on each line.
// So at every 59th character, insert and endline.
// First of all, see if the sequence is long enough!
if(sequence.length()>59) {
int offset = 58;
while(true) {
// Insert endline.
sequence.insert(offset, "\n");
// See if we're not overextending our reach here.
offset += 59;
if(offset > sequence.length()) {
break;
}
}
}
aOut.println(sequence.toString());
}
/**
* This method reports on the 'truncatedness' of the protein.
*
* @return boolean whether this protein is the result of a truncation.
*/
public boolean isTruncated() {
return this.iTruncated;
}
/**
* This method reports on the position of the truncation.
* Note that the method can only be trusted when the 'isTruncated()'
* method returns 'true'.
*
* @return int with the code for the position (either N-term or C-term).
* This return code can be evaluated against the constants defined on this class.
*/
public int getTruncationPosition() {
return this.iTruncationPosition;
}
/**
* This method will check equality between this object
* and another Protein instance.
*/
public boolean equals(Object o) {
boolean result = false;
if(o instanceof Protein) {
Protein p = (Protein)o;
if((p.iHeader.getFullHeaderWithAddenda().equals(this.iHeader.getFullHeaderWithAddenda())) && (p.iSequence.getModifiedSequence().equals(this.iSequence.getModifiedSequence())) && (p.iTruncated == this.iTruncated) && (p.iTruncationPosition == this.iTruncationPosition)) {
result = true;
}
}
return result;
}
}