/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio; import static fr.ens.biologie.genomique.eoulsan.util.StringUtils.trim; import static fr.ens.biologie.genomique.eoulsan.util.Utils.equal; import fr.ens.biologie.genomique.eoulsan.util.Utils; /** * This class define a Sequence. * @since 1.0 * @author Laurent Jourdren */ public class Sequence { protected int id; protected String name; protected String description; protected Alphabet alphabet = Alphabets.AMBIGUOUS_DNA_ALPHABET; protected String sequence; // // Getters // /** * Get the id of the sequence. * @return the id of the sequence */ public final int getId() { return this.id; } /** * Set the name of the sequence. * @return the name of the sequence */ public final String getName() { return this.name; } /** * Get the identifier in the name. * @return the identifier in the name */ public final String getIdentifierInName() { if (this.name==null) { return null; } int pos = this.name.indexOf(' '); if (pos==-1) { return this.name; } return this.name.substring(0, pos); } /** * Get the description in the name. * @return the description in the name */ public final String getDescriptionInName() { if (this.name==null) { return null; } int pos = this.name.indexOf(' '); if (pos==-1) { return ""; } return trim(this.name.substring(pos)); } /** * Get the description of the sequence. * @return a string with the description */ public final String getDescription() { return this.description; } /** * Get the alphabet used for the sequence. * @return the alphabet of the sequence */ public final Alphabet getAlphabet() { return this.alphabet; } /** * Get the sequence of the sequence. * @return a string with the sequence */ public final String getSequence() { return this.sequence; } // // Setters // /** * Set the id of the sequence. * @param id id to set */ public final void setId(final int id) { this.id = id; } /** * Set the name of the sequence. * @param name the name to set */ public final void setName(final String name) { this.name = trim(name); } /** * Set the name of the sequence and validate this name. Even if the name is * not validated, the name parameter will be the name of the object after * execution of this method. * @param name the name to set * @return true if the name is valid. */ public final boolean setNameWithValidation(final String name) { this.name = trim(name); return validateName(); } /** * Set the description of the sequence. * @param description the description to set */ public final void setDescription(final String description) { this.description = trim(description); } /** * Set the alphabet of the sequence. * @param alphabet the alphabet to set */ public final void setAlphabet(final Alphabet alphabet) { if (alphabet == null) { throw new NullPointerException("The alphabet is null"); } this.alphabet = alphabet; } /** * Set the sequence. * @param sequence Sequence to set */ public final void setSequence(final String sequence) { this.sequence = trim(sequence); } /** * Set the sequence and validate this sequence. Even if the sequence is not * validated, the sequence parameter will be the name of the object after * execution of this method. * @param sequence Sequence to set * @return true if the name is valid. */ public final boolean setSequenceWithValidation(final String sequence) { this.sequence = trim(sequence); return validateSequence(); } /** * Set sequence values with the values of another sequence * @param sequence sequence object with values to use to fill current object */ public void set(final Sequence sequence) { if (sequence == null) { throw new NullPointerException("Sequence is null"); } this.id = sequence.id; this.name = sequence.name; this.description = sequence.description; this.alphabet = sequence.alphabet; this.sequence = sequence.sequence; } // // Sequence string management // /** * Get the length of the read. * @return the length of the read */ public int length() { if (this.sequence == null) { return 0; } return this.sequence.length(); } /** * Create a sub-sequence from the current sequence. Note that index start at * 0. * @param beginIndex begin index of the sub-sequence * @param endIndex end index of the sub-sequence * @return a new sequence object with a sub-sequence of the current object */ public Sequence subSequence(final int beginIndex, final int endIndex) { if (this.sequence == null) { return null; } if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } if (endIndex > length()) { throw new StringIndexOutOfBoundsException(endIndex); } if (beginIndex > endIndex) { throw new StringIndexOutOfBoundsException(endIndex - beginIndex); } return new Sequence(-1, this.name == null ? null : this.name + "[part]", getSequence().substring(beginIndex, endIndex)); } /** * Contact two sequences. * @param sequence sequence to contact * @return a new sequence object with the sequence of the current object and * the sequence of the input sequence */ public Sequence concat(final Sequence sequence) { if (sequence == null || sequence.getSequence() == null) { return new Sequence(this); } final Sequence result = new Sequence(); result.name = this.name + "[merged]"; result.alphabet = this.alphabet; if (this.sequence == null) { result.sequence = sequence.sequence; } else { result.sequence = this.sequence + sequence.sequence; } return result; } /** * Count the number of times of a non overlapping sequence is found in the * current sequence. * @param sequence query sequence * @return the number of time that query sequence was found. */ public int countSequence(final Sequence sequence) { if (sequence == null) { return 0; } return countSequence(sequence.getSequence()); } /** * Count the number of times o fa non overlapping string is found in the * current sequence. * @param s query string * @return the number of time that query sequence was found. */ public int countSequence(final String s) { if (s == null || this.sequence == null || s.length() == 0) { return 0; } int count = 0; int index = 0; while ((index = this.sequence.indexOf(s, index)) != -1) { count++; index += s.length(); } return count; } // // Other methods // /** * Get the tm of the sequence. * @return the tm of the sequence */ public final float getTm() { return getTm(50, 50); } /** * Get the tm of the sequence. * @param dnac DNA concentration [nM] * @param saltc salt concentration [mM * @return the tm temp for the sequence */ public final float getTm(final float dnac, final float saltc) { return MeltingTemp.tmstalucDNA(this.sequence, dnac, saltc); } /** * Get the GC percent for the sequence. * @return the GC percent for the sequence */ public final double getGCPercent() { if (this.sequence == null) { return Double.NaN; } final int len = this.sequence.length(); int count = 0; for (int i = 0; i < len; i++) { if (this.sequence.charAt(i) == 'G' || this.sequence.charAt(i) == 'C') { count++; } } return (double) count / (double) len; } /** * Set the sequence as the reverse. */ public final void reverse() { this.sequence = reverse(this.sequence); } /** * Get the sequence as the reverse. * @param sequence sequence to reverse complement * @return the reverse complement sequence */ public static final String reverse(final String sequence) { if (sequence == null) { return null; } final char[] array = sequence.toCharArray(); final int len = array.length; final StringBuilder sb = new StringBuilder(len); for (int i = len - 1; i >= 0; i--) { sb.append(array[i]); } return sb.toString(); } /** * Set the sequence as the complement. */ public final void complement() { this.sequence = complement(this.sequence, this.alphabet); } /** * Get the sequence as the complement. This method work only with * A,T,G and C bases. * @param sequence sequence to reverse complement * @param alphabet alphabet of the sequence to reverse complement * @return the reverse complement sequence */ public static final String complement(final String sequence, final Alphabet alphabet) { if (sequence == null || alphabet == null) { return null; } final char[] array = sequence.toCharArray(); final int len = array.length; final StringBuilder sb = new StringBuilder(len); for (int i = 0; i < array.length; i++) { sb.append(alphabet.getComplement(array[i])); } return sb.toString(); } /** * Set the sequence as the reverse complement. */ public final void reverseComplement() { this.sequence = reverseComplement(this.sequence, this.alphabet); } /** * Get the sequence as the reverse complement. This method work only with * A,T,G and C bases. * @param sequence sequence to reverse complement * @param alphabet alphabet of the sequence to reverse complement * @return the reverse complement sequence */ public static final String reverseComplement(final String sequence, final Alphabet alphabet) { if (sequence == null || alphabet == null) { return null; } final char[] array = sequence.toCharArray(); final int len = array.length; final StringBuilder sb = new StringBuilder(len); for (int i = len - 1; i >= 0; i--) { sb.append(alphabet.getComplement(array[i])); } return sb.toString(); } // // Output methods // /** * Return the sequence object in string in Fasta format. * @return the sequence in Fasta format */ public String toFasta() { return '>' + (this.name == null ? "" : this.name) + '\n' + (this.sequence == null ? "" : this.sequence + '\n'); } /** * Return the sequence object in string in Fasta format with a maximal width * for lines. * @return the sequence in Fasta format */ public String toFasta(final int width) { if (width < 1) { return toFasta(); } final StringBuilder sb = new StringBuilder(); sb.append('>'); sb.append(this.name); sb.append('\n'); final int len = this.sequence.length(); int pos = 0; while (pos < len) { final int nextPos = pos + width; sb.append(this.sequence.subSequence(pos, nextPos > len ? len : nextPos)); sb.append('\n'); pos = nextPos; } return sb.toString(); } // // Parser methods // /** * Parse one fastq sequence * @param s string to parse */ public void parseFasta(final String s) { if (s == null || s.trim().length() == 0) { setName(null); setSequence(null); return; } final String[] lines = s.split("\n"); String name = null; final StringBuilder seq = new StringBuilder(); boolean first = true; for (String line : lines) { final String trimmed = line.trim(); if (first) { first = false; if (!trimmed.startsWith(">")) { break; } name = trimmed.substring(1).trim(); } else { if (trimmed.startsWith(">")) { break; } seq.append(trimmed); } } if (seq.length() > 0) { setName(name); setSequence(seq.toString()); } else { setName(null); setSequence(null); } } // // Validation methods // /** * Validate the name field of the object. * @return true if the name field of this object is valid */ protected final boolean validateName() { return this.name != null && this.name.length() > 0; } /** * Validate the sequence field of the object. The sequence must be not null, * have a length greater than 0 and all the letter of the sequence must be in * the current alphabet. * @return true if the sequence field of this object is valid */ protected final boolean validateSequence() { final String seq = this.sequence; final int len = seq == null ? 0 : seq.length(); if (len == 0) { return false; } final Alphabet alphabet = this.alphabet; final char[] array = this.sequence.toCharArray(); for (int i = 0; i < len; i++) { if (!alphabet.isLetterValid(array[i])) { return false; } } return true; } /** * Check if the sequence is valid. To be valid a sequence must get a name and * a sequence with a length > 0. Only authorized bases are "ATGCNXatgcnx". * @return true if the sequence is validated */ public boolean validate() { return validateName() && validateSequence(); } // // Object methods // @Override public int hashCode() { return Utils.hashCode(this.id, this.name, this.description, this.alphabet, this.sequence); } @Override public boolean equals(final Object o) { if (o == this) { return true; } if (!(o instanceof Sequence)) { return false; } final Sequence that = (Sequence) o; return this.id == that.id && equal(this.name, that.name) && equal(this.description, that.description) && equal(this.alphabet, that.alphabet) && equal(this.sequence, that.sequence); } @Override public String toString() { return this.getClass().getSimpleName() + "{id=" + this.id + ", name=" + this.name + ", description=" + this.description + ", alphabet=" + this.alphabet.toString() + ", sequence=" + this.sequence + "}"; } // // Constructor // /** * Public constructor. */ public Sequence() { } /** * Public constructor. * @param id identifier of the sequence * @param name Name of the sequence * @param sequence Sequence of the sequence */ public Sequence(final int id, final String name, final String sequence) { this.id = id; this.name = name; this.sequence = sequence; } /** * Public constructor. * @param id identifier * @param name Name of the sequence * @param sequence Sequence of the sequence * @param description Description of the sequence */ public Sequence(final int id, final String name, final String sequence, final String description) { this.id = id; this.name = name; this.sequence = sequence; this.description = description; } /** * Public constructor * @param sequence Sequence object which value will be used in the new object */ public Sequence(final Sequence sequence) { if (sequence == null) { throw new NullPointerException("Sequence is null"); } this.id = sequence.id; this.name = sequence.name; this.alphabet = sequence.alphabet; this.sequence = sequence.sequence; this.description = sequence.description; } }