/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.bio;
import static fr.ens.biologie.genomique.eoulsan.util.StringUtils.trim;
import static fr.ens.biologie.genomique.eoulsan.util.Utils.equal;
import fr.ens.biologie.genomique.eoulsan.util.Utils;
/**
* This class define a read sequence.
* @since 1.0
* @author Laurent Jourdren
*/
public final class ReadSequence extends Sequence {
private FastqFormat fastqFormat = FastqFormat.FASTQ_SANGER;
private String quality;
//
// Getters
//
/**
* Get the fastq format value.
* @return the fastq format
*/
public final FastqFormat getFastqFormat() {
return this.fastqFormat;
}
/**
* Get the quality of the sequence.
* @return a string with the quality
*/
public final String getQuality() {
return this.quality;
}
//
// Setters
//
/**
* Set the fastq format value.
* @param fastqFormat the fastq format to set
*/
public final void setFastqFormat(final FastqFormat fastqFormat) {
if (fastqFormat == null) {
throw new NullPointerException("The FastqFormat is null");
}
this.fastqFormat = fastqFormat;
}
/**
* Set the quality.
* @param quality Sequence to set
*/
public final void setQuality(final String quality) {
this.quality = trim(quality);
}
/**
* Set the ReadSequence with the values of another ReadSequence.
* @param rs ReadSequence to use to set the values of this ReadSequence
*/
public final void set(final ReadSequence rs) {
if (rs == null) {
return;
}
this.setId(rs.getId());
this.setName(rs.getName());
this.setSequence(rs.getSequence());
this.setQuality(rs.getQuality());
this.setFastqFormat(rs.getFastqFormat());
}
//
// Quality methods
//
public int[] qualityScores() {
if (this.quality == null) {
return null;
}
final char[] qualities = this.quality.toCharArray();
final int len = qualities.length;
final FastqFormat format = this.fastqFormat;
final int[] result = new int[len];
for (int i = 0; i < len; i++) {
result[i] = format.getScore(qualities[i]);
}
return result;
}
public double[] errorProbabilities() {
if (this.quality == null) {
return null;
}
final char[] qualities = this.quality.toCharArray();
final int len = qualities.length;
final FastqFormat format = this.fastqFormat;
final double[] result = new double[len];
for (int i = 0; i < len; i++) {
result[i] = format.getProbability(qualities[i]);
}
return result;
}
//
// Sequence methods
//
/**
* Create a sub-sequence from the current sequence. Note that index start at
* 0.
* @param beginIndex begin index of the sub-sequence
* @param endIndex end index of the sub-sequence
* @return a new sequence object with a sub-sequence of the current object
*/
@Override
public ReadSequence subSequence(final int beginIndex, final int endIndex) {
if (this.sequence == null
|| this.quality == null
|| this.sequence.length() != this.quality.length()) {
return null;
}
if (beginIndex < 0) {
throw new StringIndexOutOfBoundsException(beginIndex);
}
if (endIndex > length()) {
throw new StringIndexOutOfBoundsException(endIndex);
}
if (beginIndex > endIndex) {
throw new StringIndexOutOfBoundsException(endIndex - beginIndex);
}
final ReadSequence result =
new ReadSequence(-1, this.name == null ? null : this.name + "[part]",
this.sequence.substring(beginIndex, endIndex),
this.quality.substring(beginIndex, endIndex));
result.fastqFormat = this.fastqFormat;
return result;
}
/**
* Contact two ReadSequences.
* @param sequence sequence to contact
* @return a new sequence object with the sequence of the current object and
* the sequence of the input sequence
*/
public ReadSequence concat(final ReadSequence sequence) {
final ReadSequence result = new ReadSequence();
result.setName(this.name + "[merged]");
result.fastqFormat = this.fastqFormat;
result.alphabet = this.alphabet;
if (sequence == null) {
result.sequence = this.sequence;
result.quality = this.quality;
return result;
}
if (this.sequence == null) {
result.sequence = sequence.sequence;
} else if (sequence.sequence == null) {
result.sequence = this.sequence;
} else {
result.sequence = this.sequence + sequence.sequence;
}
if (this.quality == null) {
result.quality = sequence.quality;
} else if (sequence.quality == null) {
result.quality = this.quality;
} else {
result.quality = this.quality + sequence.quality;
}
return result;
}
//
// Output methods
//
/**
* Return the sequence in FastQ format.
* @return a String with the sequence in FastQ format
*/
public final String toFastQ() {
return toFastQ(this.name, this.sequence, this.quality, false);
}
/**
* Return the sequence in FastQ format.
* @param repeatId repeat the id on the 3rd line of the fastq entry
* @return a String with the sequence in FastQ format
*/
public final String toFastQ(final boolean repeatId) {
return toFastQ(this.name, this.sequence, this.quality, repeatId);
}
/**
* Return the sequence in FastQ format.
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
* @return a String with the sequence in FastQ format
*/
public static final String toFastQ(final String name, final String sequence,
final String quality) {
return toFastQ(name, sequence, quality, false);
}
/**
* Return the sequence in FastQ format.
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
* @param repeatId repeat the id on the 3rd line of the fastq entry
* @return a String with the sequence in FastQ format
*/
public static final String toFastQ(final String name, final String sequence,
final String quality, final boolean repeatId) {
if (name == null || sequence == null || quality == null) {
return null;
}
return '@'
+ name + '\n' + sequence + '\n' + '+' + (repeatId ? name : "") + '\n'
+ quality;
}
/**
* Return the sequence in TFQ format.
* @return a String with the sequence in FastQ format
*/
public final String toTFQ() {
return toTFQ(true);
}
/**
* Return the sequence in TFQ format.
* @param withId true if id must be added to the result
* @return a String with the sequence in FastQ format
*/
public final String toTFQ(final boolean withId) {
return toTFQ(withId, this.name, this.sequence, this.quality);
}
/**
* Return the sequence in TFQ format.
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
* @return a String with the sequence in FastQ format
*/
public static final String toTFQ(final String name, final String sequence,
final String quality) {
return toTFQ(true, name, sequence, quality);
}
/**
* Return the sequence in TFQ format.
* @param withId true if id must be added to the result
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
* @return a String with the sequence in FastQ format
*/
public static final String toTFQ(final boolean withId, final String name,
final String sequence, final String quality) {
if (name == null || sequence == null || quality == null) {
return null;
}
if (withId) {
return name + '\t' + sequence + '\t' + quality;
}
return '\t' + sequence + '\t' + quality;
}
/**
* Return the key for the read (the name).
* @return a string with the name of the read as the key
*/
public final String toOutKey() {
return this.name;
}
/**
* Return the value for the read (the sequence + the quality).
* @return a string with the sequence and the quality of the read as the value
*/
public final String toOutValue() {
return this.sequence + "\t" + this.quality;
}
//
// Parsing methods
//
/**
* Parse a FastQ sequence.
* @param fastQ FastQ sequence to parse
*/
public final void parseFastQ(final String fastQ) {
if (fastQ == null) {
return;
}
final int indexCR1 = fastQ.indexOf('\n');
final int indexCR2 = fastQ.indexOf('\n', indexCR1 + 1);
final int indexCR3 = fastQ.indexOf('\n', indexCR2 + 1);
final int indexCR4 = fastQ.indexOf('\n', indexCR3 + 1);
this.name = fastQ.substring(1, indexCR1);
this.sequence = fastQ.substring(indexCR1 + 1, indexCR2);
if (indexCR4 == -1) {
this.quality = fastQ.substring(indexCR3 + 1);
} else {
this.quality = fastQ.substring(indexCR3 + 1, indexCR4);
}
}
/**
* Parse a read.
* @param s String to parse
*/
public final void parse(final String s) {
if (s == null) {
return;
}
final int indexTab1 = s.indexOf('\t');
final int indexTab2 = s.indexOf('\t', indexTab1 + 1);
this.name = s.substring(0, indexTab1);
this.sequence = s.substring(indexTab1 + 1, indexTab2);
this.quality = s.substring(indexTab2 + 1);
}
/**
* Parse a read in key/value format.
* @param key key to parse
* @param value value to parse
*/
public final void parseKeyValue(final String key, final String value) {
if (key == null || value == null) {
return;
}
this.name = key;
final int indexTab = value.indexOf('\t');
this.sequence = value.substring(0, indexTab);
this.quality = value.substring(indexTab + 1);
}
//
// Validation methods
//
protected boolean validateQuality() {
final String q = this.quality;
if (q == null) {
return false;
}
final int len = q.length();
if (len == 0 || len != length()) {
return false;
}
for (int i = 0; i < len; i++) {
if (!this.fastqFormat.isCharValid(q.charAt(i))) {
return false;
}
}
return true;
}
/**
* Check if the read is valid.
* @return true if the read is validated
*/
@Override
public boolean validate() {
return validateName() && validateSequence() && validateQuality();
}
//
// Object methods
//
@Override
public int hashCode() {
return Utils.hashCode(this.id, this.name, this.description, this.alphabet,
this.sequence, this.quality, this.fastqFormat);
}
@Override
public boolean equals(final Object o) {
if (o == this) {
return true;
}
if (!(o instanceof ReadSequence) || !super.equals(o)) {
return false;
}
final ReadSequence that = (ReadSequence) o;
return this.fastqFormat.equals(that.fastqFormat)
&& equal(this.quality, that.quality);
}
@Override
public String toString() {
return this.getClass().getSimpleName()
+ "{id=" + this.id + ", name=" + this.name + ", description="
+ this.description + ", alphabet=" + this.alphabet + ", sequence="
+ this.sequence + ", fastqFormat=" + this.fastqFormat + ", quality="
+ this.quality + "}";
}
//
// Constructor
//
/**
* Public constructor.
*/
public ReadSequence() {
super();
this.alphabet = Alphabets.READ_DNA_ALPHABET;
}
/**
* Public constructor.
* @param id identifier
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
*/
public ReadSequence(final int id, final String name, final String sequence,
final String quality) {
this(id, name, sequence, quality, FastqFormat.FASTQ_SANGER);
}
/**
* Public constructor.
* @param id identifier
* @param name Name of the read
* @param sequence Sequence of the read
* @param quality Quality of the read
*/
public ReadSequence(final int id, final String name, final String sequence,
final String quality, final FastqFormat fastqFormat) {
super(id, name, sequence);
this.quality = quality;
this.fastqFormat = fastqFormat;
this.alphabet = Alphabets.READ_DNA_ALPHABET;
}
}