/** * Copyright Copyright 2007-13 Simon Andrews * * This file is part of BamQC. * * BamQC is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * BamQC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with BamQC; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Changelog: * - Simon Andrews: Class Creation. */ package uk.ac.babraham.BamQC.DataTypes.Genome; import java.util.Arrays; import uk.ac.babraham.BamQC.BamQCException; /** * SplitLocation can be used to represent complex genomic * positions built up from several sublocations. * @author Simon Andrews */ public class SplitLocation extends Location { private static final long serialVersionUID = 4275718914518130070L; private Location [] subLocations; /** * Instantiates a new split location. * * @param subLocations The set of sublocations from which the whole feature will be built * @param strand Which strand the feature is on */ public SplitLocation (Location [] subLocations) { super(0,0,UNKNOWN); if (subLocations == null || subLocations.length == 0) { throw new IllegalArgumentException("There must be at least one sublocation to define a feature"); } this.subLocations = subLocations; Arrays.sort(this.subLocations); setPosition(subLocations[0].start(),subLocations[subLocations.length-1].end(),subLocations[0].strand()); } /** * Instantiates a new split location from an EMBL format location string * * @param EMBLString An EMBL format location string * @throws BamQCException */ public SplitLocation (String EMBLString) throws BamQCException { this(EMBLString,0); } /** * Instantiates a new split location from an EMBL string with an * arbitary offset value applied. This is useful because older * genome files gave coordinates within a BAC clone and then had * an overall offset for that BAC in the genome assembly. * * @param EMBLString An EMBL format location string * @param offset The offset to apply (in bp) * @throws BamQCException */ public SplitLocation (String EMBLString, int offset) throws BamQCException { super(0,0,FORWARD); int strand = FORWARD; int start=0; int end=0; if (EMBLString.indexOf("complement")>=0) { strand = REVERSE; } else if (EMBLString.indexOf("unknown")>=0) { strand = UNKNOWN; } else { strand = FORWARD; } EMBLString = EMBLString.replaceAll("join\\(",""); EMBLString = EMBLString.replaceAll("complement\\(",""); EMBLString = EMBLString.replaceAll("unknown\\(",""); EMBLString = EMBLString.replaceAll("\\)",""); // We need to remove any position ambiguities too EMBLString = EMBLString.replaceAll("[<>]",""); String [] subLocationStrings = EMBLString.split(","); subLocations = new Location[subLocationStrings.length]; for (int i=0;i<subLocationStrings.length;i++) { String [] positions = subLocationStrings[i].split("\\.\\."); // It's possible that only a single base is found in which case // some EMBL writers will include only a single position rather // than a range. We'll convert this to a 2 number form for consistency if (positions.length == 1) { positions = new String[] {positions[0],positions[0]}; } subLocations[i] = new Location(Integer.parseInt(positions[0])+offset,Integer.parseInt(positions[1])+offset,strand); if (start == 0) { start = subLocations[i].start(); end = subLocations[i].end(); } else { if (subLocations[i].start()< start) { start = subLocations[i].start(); } if (subLocations[i].end() > end) { end = subLocations[i].end(); } } } Arrays.sort(subLocations); setPosition(start, end, strand); // Don't store more than we have to if (subLocations.length == 1) { subLocations = null; } } /* (non-Javadoc) * @see uk.ac.babraham.BamQC.DataTypes.Genome.Location#subLocations() */ public Location [] subLocations () { if (subLocations == null) { return new Location[] {this}; } return subLocations; } /* (non-Javadoc) * @see uk.ac.babraham.BamQC.DataTypes.Genome.Location#locationString() */ @Override public String locationString () { // We optimise by not storing subLocations if there is only one location // in the string. In this case we use the simpler method from the superclass. if (subLocations == null) { return super.locationString(); } StringBuilder b = new StringBuilder(); if (strand() == REVERSE) { b.append("complement("); } if (strand() == UNKNOWN) { b.append("unknown("); } b.append(subLocations[0].start()); b.append(".."); b.append(subLocations[0].end()); for (int i=1;i<subLocations.length;i++) { b.append(","); b.append(subLocations[i].start()); b.append(".."); b.append(subLocations[i].end()); } if (strand() == REVERSE || strand() == UNKNOWN) { b.append(")"); } return b.toString(); } }