/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio.io; import static fr.ens.biologie.genomique.eoulsan.bio.io.BioCharsets.FASTA_CHARSET; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import fr.ens.biologie.genomique.eoulsan.util.FileUtils; /** * This class allow to parse FASTA files line by line without storing the whole * sequence in memory. * @since 1.1 * @author Laurent Jourdren */ public class FastaLineParser { final BufferedReader reader; private String seqName; private String sequence; private boolean fastaSectionFound; /** * Parse the next sequence line of the FASTA file and return the current * sequence name. * @return the current sequence name * @throws IOException if an error occurs while reading the FASTA file */ public String parseNextLineAndGetSequenceName() throws IOException { String line = null; while ((line = this.reader.readLine()) != null) { // Trim the line final String trim = line.trim(); // discard empty lines if ("".equals(trim)) { continue; } if (!this.fastaSectionFound) { if (line.startsWith("##FASTA")) { this.fastaSectionFound = true; } continue; } if (trim.charAt(0) == '>') { this.seqName = trim.substring(1); continue; } else if (this.seqName == null) { throw new IOException( "No fasta header found at the beginning of the fasta file: " + line); } this.sequence = trim; return this.seqName; } return null; } /** * Get the sequence of the last read line. * @return a String with the sequence trimmed */ public String getSequence() { return this.sequence; } // // Constructors // /** * Public constructor. * @param is InputStream to use */ public FastaLineParser(final InputStream is) { this(is, false); } /** * Public constructor. * @param is InputStream to use * @param gffFile the input file is a GFF file */ public FastaLineParser(final InputStream is, final boolean gffFile) { if (is == null) { throw new NullPointerException("InputStream is null"); } this.reader = new BufferedReader(new InputStreamReader(is, FASTA_CHARSET)); if (!gffFile) { this.fastaSectionFound = true; } } /** * Public constructor. * @param file File to use */ public FastaLineParser(final File file) throws FileNotFoundException { this(file, false); } /** * Public constructor. * @param file File to use * @param gffFile the input file is a GFF file */ public FastaLineParser(final File file, final boolean gffFile) throws FileNotFoundException { if (file == null) { throw new NullPointerException("File is null"); } this.reader = FileUtils.createBufferedReader(file, FASTA_CHARSET); if (!gffFile) { this.fastaSectionFound = true; } } /** * Public constructor. * @param filename File to use */ public FastaLineParser(final String filename) throws FileNotFoundException { this(filename, false); } /** * Public constructor. * @param filename File to use * @param gffFile the input file is a GFF file */ public FastaLineParser(final String filename, final boolean gffFile) throws FileNotFoundException { this.reader = FileUtils.createBufferedReader(filename, FASTA_CHARSET); if (!gffFile) { this.fastaSectionFound = true; } } }