/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SAMTextHeaderCodec; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.util.ArrayList; import java.util.List; import fr.ens.biologie.genomique.eoulsan.data.DataFile; /** * This class define utility methods for SAM objects * @since 1.2 * @author Laurent Jourdren */ public class SAMUtils { /** * Read the SAM header of a SAM file. * @param file file to read * @return a String with the SAM header * @throws FileNotFoundException if the file cannot be found */ public static String readSAMHeader(final File file) throws FileNotFoundException { if (file == null) { throw new NullPointerException("The file is null"); } return readSAMHeader(new FileInputStream(file)); } /** * Read the SAM header of a SAM file. * @param dataFile file to read * @return a String with the SAM header * @throws IOException if an error occurs while reading the file */ public static String readSAMHeader(final DataFile dataFile) throws IOException { if (dataFile == null) { throw new NullPointerException("The data file is null"); } return readSAMHeader(dataFile.open()); } /** * Read the SAM header of a SAM file. * @param is input stream * @return a String with the SAM header */ public static String readSAMHeader(final InputStream is) { if (is == null) { throw new NullPointerException("The input stream is null."); } // Read SAM file header final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(is)); final SAMFileHeader header = reader.getFileHeader(); // Close reader // reader.close(); final StringWriter headerTextBuffer = new StringWriter(); new SAMTextHeaderCodec().encode(headerTextBuffer, header); return headerTextBuffer.toString(); } /** * Create a GenomeDescription object from a SAM header. * @param file SAM file witch header must be read * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header * @throws FileNotFoundException if the file cannot be found */ public static GenomeDescription createGenomeDescriptionFromSAM( final File file) throws FileNotFoundException { if (file == null) { throw new NullPointerException("The file is null"); } return createGenomeDescriptionFromSAM(new FileInputStream(file)); } /** * Create a GenomeDescription object from a SAM header. * @param dataFile SAM file witch header must be read * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header * @throws IOException if an error occurs while reading the file */ public static GenomeDescription createGenomeDescriptionFromSAM( final DataFile dataFile) throws IOException { if (dataFile == null) { throw new NullPointerException("The data file is null"); } return createGenomeDescriptionFromSAM(dataFile.open()); } /** * Create a GenomeDescription object from a SAM header. * @param is InputStream of the SAM file witch header must be read * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header */ public static GenomeDescription createGenomeDescriptionFromSAM( final InputStream is) { return createGenomeDescriptionFromSAM(readSAMHeader(is)); } /** * Create a GenomeDescription object from a SAM header. * @param header SAM header in a String * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header */ public static GenomeDescription createGenomeDescriptionFromSAM( final String header) { if (header == null) { return null; } final GenomeDescription desc = new GenomeDescription(); final String prefix = "@SQ\tSN:"; for (String line : header.split("\n")) { if (!line.startsWith(prefix)) { continue; } final String[] fields = line.substring(prefix.length()).split("\tLN:"); if (fields.length == 2) { desc.addSequence(fields[0], Integer.parseInt(fields[1])); } } return desc; } /** * Create a GenomeDescription object from a SAMFileHeader object. * @param header SAM header object * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header */ public static GenomeDescription createGenomeDescriptionFromSAM( final SAMFileHeader header) { if (header == null) { return null; } final GenomeDescription desc = new GenomeDescription(); if (header.getSequenceDictionary() == null) { return desc; } for (SAMSequenceRecord seq : header.getSequenceDictionary() .getSequences()) { desc.addSequence(seq.getSequenceName(), seq.getSequenceLength()); } return desc; } /** * Create a GenomeDescription object from a SAMFileHeader object. * @param samRecord header SAM header object * @return a new GenomeDescription object with the name and chromosomes length * defined in the SAM header */ public static GenomeDescription createGenomeDescriptionFromSAM( final SAMRecord samRecord) { if (samRecord == null) { return null; } return createGenomeDescriptionFromSAM(samRecord.getHeader()); } /** * Convert a GenomeDescription object to a SAMSequenceDictionary object. * @param genomeDescription genomeDescription object to convert * @return a new SAMSequenceDictionary object with chromosomes name and size * from the GenomeDescription object */ public static SAMSequenceDictionary newSAMSequenceDictionary( final GenomeDescription genomeDescription) { if (genomeDescription == null) { throw new NullPointerException("The genome description is null."); } final List<SAMSequenceRecord> sequences = new ArrayList<>(); for (String sequenceName : genomeDescription.getSequencesNames()) { final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(sequenceName, (int) genomeDescription.getSequenceLength(sequenceName)); sequences.add(sequenceRecord); } return new SAMSequenceDictionary(sequences); } /** * Convert a GenomeDescription object to a SAMFileHeader object. * @param genomeDescription genomeDescription object to convert * @return a new SAMFileHeader object with chromosomes name and size from the * GenomeDescription object */ public static SAMFileHeader newSAMFileHeader( final GenomeDescription genomeDescription) { final SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(newSAMSequenceDictionary(genomeDescription)); return header; } }