/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.io.comparators; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import java.io.IOException; import java.io.InputStream; import java.util.Collection; import java.util.HashSet; import java.util.Set; import com.google.common.collect.Sets; import fr.ens.biologie.genomique.eoulsan.util.EnhancedBloomFilter; /** * This class allow compare two BAM file with use BloomFilter. * @since 2.0 * @author Sandrine Perrin */ public class BAMComparator extends AbstractComparatorWithBloomFilter { public static final String COMPARATOR_NAME = "BAMComparator"; private static final Collection<String> EXTENSIONS = Sets.newHashSet(".bam"); final Set<String> tagsToNotCompare; private int numberElementsCompared; @Override public boolean compareFiles(final EnhancedBloomFilter filter, final InputStream in) throws IOException { String line = null; this.numberElementsCompared = 0; // Create Bam reader final SamReader bamReader = SamReaderFactory.makeDefault().open(SamInputResource.of(in)); // Get iterator on file // Parse file for (SAMRecord r : bamReader) { // Convert in SAM line = r.getSAMString(); this.numberElementsCompared++; // Header if (line.charAt(0) == '@') { // Skip specified tag in header sam file if (!this.tagsToNotCompare.contains(getTag(line))) { if (!filter.mightContain(line)) { // Save line occurs fail comparison setCauseFailComparison(line); // Close reader bamReader.close(); return false; } } } else { // Line if (!filter.mightContain(line)) { // Save line occurs fail comparison setCauseFailComparison(line); // Close reader bamReader.close(); return false; } } } // Close reader bamReader.close(); // Check count element is the same between two files if (this.numberElementsCompared != filter.getAddedNumberOfElements()) { setCauseFailComparison("Different count elements " + this.numberElementsCompared + " was " + filter.getAddedNumberOfElements() + " expected."); return false; } return true; } @Override protected EnhancedBloomFilter buildBloomFilter(final InputStream is) throws IOException { // Create filter final EnhancedBloomFilter filter = initBloomFilter(getExpectedNumberOfElements()); // Parse BAM file try (final SamReader bamReader = SamReaderFactory.makeDefault().open(SamInputResource.of(is))) { for (SAMRecord aBamReader : bamReader) { // Convert in line in SAM and save in filter filter.put(aBamReader.getSAMString()); } } catch (final Exception e) { throw new IOException("Fail read BAM file exception: " + e.getMessage()); } return filter; } // // Other methods // private static String getTag(final String samHeaderLine) { if (samHeaderLine.length() == 0) { return ""; } final int pos = samHeaderLine.indexOf('\t'); if (pos == -1) { return samHeaderLine.substring(1); } return samHeaderLine.substring(1, pos); } @Override public String getName() { return COMPARATOR_NAME; } @Override public Collection<String> getExtensions() { return EXTENSIONS; } @Override public int getNumberElementsCompared() { return this.numberElementsCompared; } // // Constructor // /** * Public constructor * @param useSerializeFile true if it needed to save BloomFilter in file with * extension '.ser' */ public BAMComparator(final boolean useSerializeFile) { super(useSerializeFile); this.tagsToNotCompare = new HashSet<>(); } /** * Public constructor, specify all headers tags not used to compare. * @param useSerializeFile true if it needed to save BloomFilter in file with * extension '.ser' * @param headersTags all headers tags */ public BAMComparator(final boolean useSerializeFile, final String... headersTags) { super(useSerializeFile); if (headersTags == null) { throw new NullPointerException("headersTags is null"); } this.tagsToNotCompare = Sets.newHashSet(headersTags); } }