/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package htsjdk.samtools.liftover; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.OverlapDetector; import java.io.File; import java.util.ArrayList; import java.util.List; /** * Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented. * Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive. * * @author alecw@broadinstitute.org */ public class LiftOver { private static final Log LOG = Log.getInstance(LiftOver.class); public static final double DEFAULT_LIFTOVER_MINMATCH = 0.95; private double liftOverMinMatch = DEFAULT_LIFTOVER_MINMATCH; private final OverlapDetector<Chain> chains; /** * Load UCSC chain file in order to lift over Intervals. */ public LiftOver(File chainFile) { IOUtil.assertFileIsReadable(chainFile); chains = Chain.loadChains(chainFile); } /** * Throw an exception if all the "to" sequence names in the chains are not found in the given sequence dictionary. */ public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) { for (final Chain chain : chains.getAll()) { if (sequenceDictionary.getSequence(chain.toSequenceName) == null) { throw new SAMException("Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary."); } } } /** * Lift over the given interval to the new genome build using the liftOverMinMatch set for this * LiftOver object. * @param interval Interval to be lifted over. * @return Interval in the output build coordinates, or null if it cannot be lifted over. */ public Interval liftOver(final Interval interval) { return liftOver(interval, liftOverMinMatch); } /** * Lift over the given interval to the new genome build. * @param interval Interval to be lifted over. * @param liftOverMinMatch Minimum fraction of bases that must remap. * @return Interval in the output build coordinates, or null if it cannot be lifted over. */ public Interval liftOver(final Interval interval, final double liftOverMinMatch) { if (interval.length() == 0) { throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } Chain chainHit = null; TargetIntersection targetIntersection = null; // Number of bases in interval that can be lifted over must be >= this. double minMatchSize = liftOverMinMatch * interval.length(); // Find the appropriate Chain, and the part of the chain corresponding to the interval to be lifted over. for (final Chain chain : chains.getOverlaps(interval)) { final TargetIntersection candidateIntersection = targetIntersection(chain, interval); if (candidateIntersection != null && candidateIntersection.intersectionLength >= minMatchSize) { if (chainHit != null) { // In basic liftOver, multiple hits are not allowed. return null; } chainHit = chain; targetIntersection = candidateIntersection; } else if (candidateIntersection != null) { LOG.info("Interval " + interval.getName() + " failed to match chain " + chain.id + " because intersection length " + candidateIntersection.intersectionLength + " < minMatchSize " + minMatchSize + " (" + (candidateIntersection.intersectionLength/(float)interval.length()) + " < " + liftOverMinMatch + ")"); } } if (chainHit == null) { // Can't be lifted over. return null; } return createToInterval(interval.getName(), targetIntersection); } public List<PartialLiftover> diagnosticLiftover(final Interval interval) { final List<PartialLiftover> ret = new ArrayList<PartialLiftover>(); if (interval.length() == 0) { throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } for (final Chain chain : chains.getOverlaps(interval)) { Interval intersectingChain = interval.intersect(chain.interval); final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain); if (targetIntersection == null) { ret.add(new PartialLiftover(intersectingChain, chain.id)); } else { Interval toInterval = createToInterval(interval.getName(), targetIntersection); float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length(); ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); } } return ret; } private static Interval createToInterval(final String intervalName, final TargetIntersection targetIntersection) { // Compute the query interval given the offsets of the target interval start and end into the first and // last ContinuousBlocks. int toStart = targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart + targetIntersection.startOffset; int toEnd = targetIntersection.chain.getBlock(targetIntersection.lastBlockIndex).getToEnd() - targetIntersection.offsetFromEnd; if (toEnd <= toStart || toStart < 0) { throw new SAMException("Something strange lifting over interval " + intervalName); } if (targetIntersection.chain.toNegativeStrand) { // Flip if query is negative. int negativeStart = targetIntersection.chain.toSequenceSize - toEnd; int negativeEnd = targetIntersection.chain.toSequenceSize - toStart; toStart = negativeStart; toEnd = negativeEnd; } // Convert to 1-based, inclusive. return new Interval(targetIntersection.chain.toSequenceName, toStart+1, toEnd, targetIntersection.chain.toNegativeStrand, intervalName); } /** * Add up overlap btw the blocks in this chain and the given interval. * @return Length of overlap, offsets into first and last ContinuousBlocks, and indices of first and * last ContinuousBlocks. */ private static TargetIntersection targetIntersection(final Chain chain, final Interval interval) { int intersectionLength = 0; // Convert interval to 0-based, half-open int start = interval.getStart() - 1; int end = interval.getEnd(); int firstBlockIndex = -1; int lastBlockIndex = -1; int startOffset = -1; int offsetFromEnd = -1; List<Chain.ContinuousBlock> blockList = chain.getBlocks(); for (int i = 0; i < blockList.size(); ++i) { final Chain.ContinuousBlock block = blockList.get(i); if (block.fromStart >= end) { break; } else if (block.getFromEnd() <= start) { continue; } if (firstBlockIndex == -1) { firstBlockIndex = i; if (start > block.fromStart) { startOffset = start - block.fromStart; } else { startOffset = 0; } } lastBlockIndex = i; if (block.getFromEnd() > end) { offsetFromEnd = block.getFromEnd() - end; } else { offsetFromEnd = 0; } int thisIntersection = Math.min(end, block.getFromEnd()) - Math.max(start, block.fromStart); if (thisIntersection <= 0) { throw new SAMException("Should have been some intersection."); } intersectionLength += thisIntersection; } if (intersectionLength == 0) { return null; } return new TargetIntersection(chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex); } /** * Get minimum fraction of bases that must remap. */ public double getLiftOverMinMatch() { return liftOverMinMatch; } /** * Set minimum fraction of bases that must remap. */ public void setLiftOverMinMatch(final double liftOverMinMatch) { this.liftOverMinMatch = liftOverMinMatch; } /** * Value class returned by targetIntersection() */ private static class TargetIntersection { /** Chain used for this intersection */ final Chain chain; /** Total intersectionLength length */ final int intersectionLength; /** Offset of target interval start in first block. */ final int startOffset; /** Distance from target interval end to end of last block. */ final int offsetFromEnd; /** Index of first ContinuousBlock matching interval. */ final int firstBlockIndex; /** Index of last ContinuousBlock matching interval. */ final int lastBlockIndex; TargetIntersection(final Chain chain,final int intersectionLength, final int startOffset, final int offsetFromEnd, final int firstBlockIndex, final int lastBlockIndex) { this.chain = chain; this.intersectionLength = intersectionLength; this.startOffset = startOffset; this.offsetFromEnd = offsetFromEnd; this.firstBlockIndex = firstBlockIndex; this.lastBlockIndex = lastBlockIndex; } } /** * Represents a portion of a liftover operation, for use in diagnosing liftover failures. */ public static class PartialLiftover { /** Intersection between "from" interval and "from" region of a chain. */ final Interval fromInterval; /** * Result of lifting over fromInterval (with no percentage mapped requirement). This is null * if fromInterval falls entirely with a gap of the chain. */ final Interval toInterval; /** id of chain used for this liftover */ final int chainId; /** Percentage of bases in fromInterval that lifted over. 0 if fromInterval is not covered by any chain. */ final float percentLiftedOver; PartialLiftover(final Interval fromInterval, final Interval toInterval, final int chainId, final float percentLiftedOver) { this.fromInterval = fromInterval; this.toInterval = toInterval; this.chainId = chainId; this.percentLiftedOver = percentLiftedOver; } PartialLiftover(final Interval fromInterval, final int chainId) { this.fromInterval = fromInterval; this.toInterval = null; this.chainId = chainId; this.percentLiftedOver = 0.0f; } public String toString() { if (toInterval == null) { // Matched a chain, but entirely within a gap. return fromInterval.toString() + " (len " + fromInterval.length() + ")=>null using chain " + chainId; } final String strand = toInterval.isNegativeStrand()? "-": "+"; return fromInterval.toString() + " (len " + fromInterval.length() + ")=>" + toInterval + "(" + strand + ") using chain " + chainId + " ; pct matched " + percentLiftedOver; } } }