/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.checkers;
import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.READS_FASTQ;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.bio.BadBioEntryException;
import fr.ens.biologie.genomique.eoulsan.bio.FastqFormat;
import fr.ens.biologie.genomique.eoulsan.bio.IlluminaReadId;
import fr.ens.biologie.genomique.eoulsan.bio.ReadSequence;
import fr.ens.biologie.genomique.eoulsan.bio.io.FastqReader;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
/**
* This class define a checker on FASTQ files.
* @since 1.0
* @author Laurent Jourdren
*/
public class ReadsChecker implements Checker {
public static final int MAX_READS_TO_CHECK = 1000;
@Override
public String getName() {
return "reads_checker";
}
@Override
public DataFormat getFormat() {
return READS_FASTQ;
}
@Override
public Set<DataFormat> getCheckersRequired() {
return Collections.emptySet();
}
@Override
public void configure(final Set<Parameter> stepParameters)
throws EoulsanException {
}
@Override
public boolean check(final Data data, final CheckStore checkInfo)
throws EoulsanException {
if (data == null) {
throw new NullPointerException("The sample is null");
}
if (checkInfo == null) {
throw new NullPointerException("The check info info is null");
}
final int inFileCount = data.getDataFileCount();
if (inFileCount < 1) {
throw new EoulsanException("No reads file found.");
}
if (inFileCount > 2) {
throw new EoulsanException(
"Cannot handle more than 2 reads files at the same time.");
}
// Get FASTQ format
final FastqFormat format = data.getMetadata().getFastqFormat();
// Single end mode
if (inFileCount == 1) {
checkReadFile(data.getDataFile(0), format);
}
// Paired end mode
if (inFileCount == 2) {
checkReadFile(data.getDataFile(0), format, true, 1);
checkReadFile(data.getDataFile(1), format, true, 2);
}
return true;
}
private void checkReadFile(final DataFile file, final FastqFormat format)
throws EoulsanException {
checkReadFile(file, format, false, -1);
}
private void checkReadFile(final DataFile file, final FastqFormat format,
final boolean checkPairMember, final int pairMember)
throws EoulsanException {
// If the file does not exists do nothing
if (!file.exists()) {
return;
}
final InputStream is;
try {
is = file.open();
checkReadsFile(is, MAX_READS_TO_CHECK, format, checkPairMember,
pairMember);
} catch (IOException e) {
throw new EoulsanException("Error while reading reads of sample "
+ file.getSource() + " for checking: " + e.getMessage(), e);
} catch (BadBioEntryException e) {
throw new EoulsanException("Found bad read entry in sample "
+ file.getSource() + " (cause: " + e.getMessage()
+ ") when checking: " + e.getEntry(), e);
}
}
private boolean checkReadsFile(final InputStream is, final int maxReadToCheck,
final FastqFormat format, final boolean checkPairMember,
final int pairMember) throws IOException, BadBioEntryException {
final FastqReader reader = new FastqReader(is);
int count = 0;
for (final ReadSequence read : reader) {
if (count > maxReadToCheck) {
break;
}
// For the first read check the id
if (checkPairMember && count == 0) {
final String readId = read.getName();
int readPairMember = -1;
try {
final IlluminaReadId irid = new IlluminaReadId(readId);
readPairMember = irid.getPairMember();
if (readPairMember != pairMember) {
throw new BadBioEntryException(
"Invalid pair member number, " + pairMember + " was excepted",
read.getName());
}
// check the quality string
if (format != null) {
final int invalidChar = format.findInvalidChar(read.getQuality());
if (invalidChar != -1) {
throw new BadBioEntryException(
"Invalid quality character found for "
+ format.getName() + " format: " + (char) invalidChar,
read.getQuality());
}
}
readPairMember = irid.getPairMember();
} catch (EoulsanException e) {
// Not an Illumina id
if (readId.endsWith("/1")) {
readPairMember = 1;
} else if (readId.endsWith("/2")) {
readPairMember = 2;
}
}
if (readPairMember > 0 && readPairMember != pairMember) {
reader.close();
throw new BadBioEntryException(
"Invalid pair member number, " + pairMember + " was excepted",
read.getName());
}
}
// check the quality string
if (format != null) {
final int invalidChar = format.findInvalidChar(read.getQuality());
if (invalidChar != -1) {
reader.close();
throw new BadBioEntryException(
"Invalid quality character found for "
+ format.getName() + " format: " + (char) invalidChar,
read.getQuality());
}
}
count++;
}
reader.throwException();
reader.close();
return true;
}
}