/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio.readsfilters; import java.util.regex.Pattern; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.bio.ReadSequence; /** * Define a filter that remove terminal polyN sequences of a read and check the * length of the resulting read. * @since 1.0 * @author Maria Bernard * @author Laurent Jourdren */ public class TrimReadFilter extends AbstractReadFilter { public static final String FILTER_NAME = "trim"; private static final Pattern PATTERN = Pattern.compile("NN+$"); private int lengthThreshold; /** * Trim the read sequence and quality if ends with polyN. * @param read Read to trim */ public static final void trim(final ReadSequence read) { if (read == null || read.getSequence() == null || read.getQuality() == null || read.getSequence().length() != read.getQuality().length() || read.getSequence().length() == 0) { return; } final String[] splitResult = PATTERN.split(read.getSequence()); // Test if the sequence contains only N nucleotides if (splitResult == null || splitResult.length == 0) { read.setSequence(""); read.setQuality(""); return; } final ReadSequence tmp = read.subSequence(0, splitResult[0].length()); read.setSequence(tmp.getSequence()); read.setQuality(tmp.getQuality()); } @Override public boolean accept(final ReadSequence read) { if (read == null) { return false; } trim(read); return read.length() > this.lengthThreshold; } @Override public String getName() { return FILTER_NAME; } @Override public String getDescription() { return "Trim ReadFilter"; } @Override public void setParameter(final String key, final String value) throws EoulsanException { if (key == null || value == null) { return; } if ("length.threshold".equals(key.trim())) { try { this.lengthThreshold = Integer.parseInt(value.trim()); } catch (NumberFormatException e) { return; } if (this.lengthThreshold < 1) { throw new EoulsanException( "Invalid length threshold: " + this.lengthThreshold); } } else { throw new EoulsanException( "Unknown parameter for " + getName() + " read filter: " + key); } } @Override public void init() { if (this.lengthThreshold < 1) { throw new IllegalArgumentException( "Length threshold is not set for " + getName() + " read filter."); } } @Override public String toString() { return this.getClass().getSimpleName() + "{lengthThreshold=" + this.lengthThreshold + "}"; } // // Constructor // /** * Public constructor. */ public TrimReadFilter() { } }