/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package htsjdk.samtools; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * A list of CigarElements, which describes how a read aligns with the reference. * E.g. the Cigar string 10M1D25M means * * match or mismatch for 10 bases * * deletion of 1 base * * match or mismatch for 25 bases * * c.f. http://samtools.sourceforge.net/SAM1.pdf for complete CIGAR specification. */ public class Cigar { private final List<CigarElement> cigarElements = new ArrayList<CigarElement>(); public Cigar() { } public Cigar(final List<CigarElement> cigarElements) { this.cigarElements.addAll(cigarElements); } public List<CigarElement> getCigarElements() { return Collections.unmodifiableList(cigarElements); } public CigarElement getCigarElement(final int i) { return cigarElements.get(i); } public void add(final CigarElement cigarElement) { cigarElements.add(cigarElement); } public int numCigarElements() { return cigarElements.size(); } public boolean isEmpty() { return cigarElements.isEmpty(); } /** * @return The number of reference bases that the read covers, excluding padding. */ public int getReferenceLength() { int length = 0; for (final CigarElement element : cigarElements) { switch (element.getOperator()) { case M: case D: case N: case EQ: case X: length += element.getLength(); } } return length; } /** * @return The number of reference bases that the read covers, including padding. */ public int getPaddedReferenceLength() { int length = 0; for (final CigarElement element : cigarElements) { switch (element.getOperator()) { case M: case D: case N: case EQ: case X: case P: length += element.getLength(); } } return length; } /** * @return The number of read bases that the read covers. */ public int getReadLength() { return getReadLength(cigarElements); } /** * @return The number of read bases that the read covers. */ public static int getReadLength(final List<CigarElement> cigarElements) { int length = 0; for (final CigarElement element : cigarElements) { if (element.getOperator().consumesReadBases()){ length += element.getLength(); } } return length; } /** * Exhaustive validation of CIGAR. * Note that this method deliberately returns null rather than Collections.emptyList() if there * are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable. * @param readName For error reporting only. May be null if not known. * @param recordNumber For error reporting only. May be -1 if not known. * @return List of validation errors, or null if no errors. */ public List<SAMValidationError> isValid(final String readName, final long recordNumber) { if (this.isEmpty()) { return null; } List<SAMValidationError> ret = null; boolean seenRealOperator = false; for (int i = 0; i < cigarElements.size(); ++i) { final CigarElement element = cigarElements.get(i); if (element.getLength() == 0) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "CIGAR element with zero length", readName, recordNumber)); } // clipping operator can only be at start or end of CIGAR final CigarOperator op = element.getOperator(); if (isClippingOperator(op)) { if (op == CigarOperator.H) { if (i != 0 && i != cigarElements.size() - 1) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Hard clipping operator not at start or end of CIGAR", readName, recordNumber)); } } else { if (op != CigarOperator.S) throw new IllegalStateException("Should never happen: " + op.name()); if (i == 0 || i == cigarElements.size() - 1) { // Soft clip at either end is fine } else if (i == 1) { if (cigarElements.size() == 3 && cigarElements.get(2).getOperator() == CigarOperator.H) { // Handle funky special case in which S operator is both one from the beginning and one // from the end. } else if (cigarElements.get(0).getOperator() != CigarOperator.H) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Soft clipping CIGAR operator can only be inside of hard clipping operator", readName, recordNumber)); } } else if (i == cigarElements.size() - 2) { if (cigarElements.get(cigarElements.size() - 1).getOperator() != CigarOperator.H) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Soft clipping CIGAR operator can only be inside of hard clipping operator", readName, recordNumber)); } } else { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Soft clipping CIGAR operator can at start or end of read, or be inside of hard clipping operator", readName, recordNumber)); } } } else if (isRealOperator(op)) { // Must be at least one real operator (MIDN) seenRealOperator = true; // There should be an M or P operator between any pair of IDN operators if (isInDelOperator(op)) { for (int j = i+1; j < cigarElements.size(); ++j) { final CigarOperator nextOperator = cigarElements.get(j).getOperator(); // Allow if ((isRealOperator(nextOperator) && !isInDelOperator(nextOperator)) || isPaddingOperator(nextOperator)) { break; } if (isInDelOperator(nextOperator) && op == nextOperator) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR, "No M or N operator between pair of " + op.name() + " operators in CIGAR", readName, recordNumber)); } } } } else if (isPaddingOperator(op)) { if (i == 0) { /* * Removed restriction that padding not be the first operator because if a read starts in the middle of a pad * in a padded reference, it is necessary to precede the read with padding so that alignment start refers to a * position on the unpadded reference. */ } else if (i == cigarElements.size() - 1) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Padding operator not valid at end of CIGAR", readName, recordNumber)); } else if (!isRealOperator(cigarElements.get(i-1).getOperator()) || !isRealOperator(cigarElements.get(i+1).getOperator())) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "Padding operator not between real operators in CIGAR", readName, recordNumber)); } } } if (!seenRealOperator) { if (ret == null) ret = new ArrayList<SAMValidationError>(); ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, "No real operator (M|I|D|N) in CIGAR", readName, recordNumber)); } return ret; } private static boolean isRealOperator(final CigarOperator op) { return op == CigarOperator.M || op == CigarOperator.EQ || op == CigarOperator.X || op == CigarOperator.I || op == CigarOperator.D || op == CigarOperator.N; } private static boolean isInDelOperator(final CigarOperator op) { return op == CigarOperator.I || op == CigarOperator.D; } private static boolean isClippingOperator(final CigarOperator op) { return op == CigarOperator.S || op == CigarOperator.H; } private static boolean isPaddingOperator(final CigarOperator op) { return op == CigarOperator.P; } @Override public boolean equals(final Object o) { if (this == o) return true; if (!(o instanceof Cigar)) return false; final Cigar cigar = (Cigar) o; if (cigarElements != null ? !cigarElements.equals(cigar.cigarElements) : cigar.cigarElements != null) return false; return true; } @Override public int hashCode() { return cigarElements != null ? cigarElements.hashCode() : 0; } public String toString() { return TextCigarCodec.getSingleton().encode(this); } }