/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import picard.metrics.MultilevelMetrics;
/**
* High level metrics about the alignment of reads within a SAM file, produced by
* the CollectAlignmentSummaryMetrics program and usually stored in a file with
* the extension ".alignment_summary_metrics".
*/
public class AlignmentSummaryMetrics extends MultilevelMetrics {
public enum Category { UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR }
/**
* One of either UNPAIRED (for a fragment run), FIRST_OF_PAIR when metrics are for only the
* first read in a paired run, SECOND_OF_PAIR when the metrics are for only the second read
* in a paired run or PAIR when the metrics are aggregated for both first and second reads
* in a pair.
*/
public Category CATEGORY;
/**
* The total number of reads including all PF and non-PF reads. When CATEGORY equals PAIR
* this value will be 2x the number of clusters.
*/
public long TOTAL_READS;
/** The number of PF reads where PF is defined as passing Illumina's filter. */
public long PF_READS;
/** The fraction of reads that are PF (PF_READS / TOTAL_READS) */
public double PCT_PF_READS;
/**
* The number of PF reads that are marked as noise reads. A noise read is one which is composed
* entirely of A bases and/or N bases. These reads are marked as they are usually artifactual and
* are of no use in downstream analysis.
*/
public long PF_NOISE_READS;
/**
* The number of PF reads that were aligned to the reference sequence. This includes reads that
* aligned with low quality (i.e. their alignments are ambiguous).
*/
public long PF_READS_ALIGNED;
/**
* The percentage of PF reads that aligned to the reference sequence. PF_READS_ALIGNED / PF_READS
*/
public double PCT_PF_READS_ALIGNED;
/**
* The total number of aligned bases, in all mapped PF reads, that are aligned to the reference sequence.
*/
public long PF_ALIGNED_BASES;
/**
* The number of PF reads that were aligned to the reference sequence with a mapping quality of
* Q20 or higher signifying that the aligner estimates a 1/100 (or smaller) chance that the
* alignment is wrong.
*/
public long PF_HQ_ALIGNED_READS;
/**
* The number of bases aligned to the reference sequence in reads that were mapped at high
* quality. Will usually approximate PF_HQ_ALIGNED_READS * READ_LENGTH but may differ when
* either mixed read lengths are present or many reads are aligned with gaps.
*/
public long PF_HQ_ALIGNED_BASES;
/**
* The subset of PF_HQ_ALIGNED_BASES where the base call quality was Q20 or higher.
*/
public long PF_HQ_ALIGNED_Q20_BASES;
/**
* The median number of mismatches versus the reference sequence in reads that were aligned
* to the reference at high quality (i.e. PF_HQ_ALIGNED READS).
*/
public double PF_HQ_MEDIAN_MISMATCHES;
/**
* The rate of bases mismatching the reference for all bases aligned to the reference sequence.
*/
public double PF_MISMATCH_RATE;
/**
* The fraction of bases that mismatch the reference in PF HQ aligned reads.
*/
public double PF_HQ_ERROR_RATE;
/**
* The number of insertion and deletion events per 100 aligned bases. Uses the number of events
* as the numerator, not the number of inserted or deleted bases.
*/
public double PF_INDEL_RATE;
/**
* The mean read length of the set of reads examined. When looking at the data for a single lane with
* equal length reads this number is just the read length. When looking at data for merged lanes with
* differing read lengths this is the mean read length of all reads.
*/
public double MEAN_READ_LENGTH;
/**
* The number of aligned reads whose mate pair was also aligned to the reference.
*/
public long READS_ALIGNED_IN_PAIRS;
/**
* The fraction of reads whose mate pair was also aligned to the reference.
* READS_ALIGNED_IN_PAIRS / PF_READS_ALIGNED
*/
public double PCT_READS_ALIGNED_IN_PAIRS;
/**
* The number of (primary) aligned reads that are **not** "properly" aligned in pairs (as per SAM flag 0x2).
*/
public long PF_READS_IMPROPER_PAIRS;
/**
* The fraction of (primary) reads that are *not* "properly" aligned in pairs (as per SAM flag 0x2).
* PF_READS_IMPROPER_PAIRS / PF_READS_ALIGNED
*/
public double PCT_PF_READS_IMPROPER_PAIRS;
/**
* The number of instrument cycles in which 80% or more of base calls were no-calls.
*/
public long BAD_CYCLES;
/**
* The number of PF reads aligned to the positive strand of the genome divided by the number of
* PF reads aligned to the genome.
*/
public double STRAND_BALANCE;
/**
* The fraction of reads that map outside of a maximum insert size (usually 100kb) or that have
* the two ends mapping to different chromosomes.
*/
public double PCT_CHIMERAS;
/**
* The fraction of PF reads that are unaligned and match to a known adapter sequence right from the
* start of the read.
*/
public double PCT_ADAPTER;
}