/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import fr.ens.biologie.genomique.eoulsan.util.FileUtils; import fr.ens.biologie.genomique.eoulsan.util.Utils; /** * This class define a genomic array. TODO more doc and rename attributes and * field of the inner classes * @since 1.2 * @author Laurent Jourdren */ public class GenomicArray<T> { private Map<String, ChromosomeZones<T>> chromosomes = new HashMap<>(); /** * This class define a zone in a ChromosomeZone object. * @author Laurent Jourdren */ private static final class Zone<T> implements Serializable { private static final long serialVersionUID = 3581472137861260840L; private final int start; private int end; private final char strand; private Set<T> _values; private T _value; private int valueCount; /** * Add a value to the zone. * @param value Exon to add */ public void addExon(final T value) { if (value == null) { throw new NullPointerException("value argument cannot be null"); } if (this.valueCount == 0) { this._value = value; this.valueCount = 1; } else { if (this.valueCount == 1) { if (value == this._value || this._value.hashCode() == value.hashCode()) { return; } this._values = new HashSet<>(); this._values.add(this._value); this._value = null; } this._values.add(value); this.valueCount = this._values.size(); } } /** * Add values to the zone. * @param values values to add */ private void addExons(final Set<T> values) { if (values == null) { return; } final int len = values.size(); if (len == 0) { return; } if (len == 1) { this._value = values.iterator().next(); this.valueCount = this._value == null ? 0 : 1; } else { this._values = new HashSet<>(values); this.valueCount = len; } } /** * Get the values of the zone. * @return a set with the values of the zone */ public Set<T> getValues() { if (this.valueCount == 0) { return null; } if (this.valueCount == 1) { return Collections.singleton(this._value); } return this._values; } /** * Test if a position is before, in or after the zone. * @param position to test * @return -1 if position is before the zone, 0 if the position is in the * zone and 1 of the position is after the zone */ public int compareTo(final int position) { if (position >= this.start && position <= this.end) { return 0; } return position < this.start ? -1 : 1; } @Override public String toString() { Set<String> r = new HashSet<>(); if (getValues() != null) { for (T e : getValues()) { r.add(e.toString()); } } return this.getClass().getSimpleName() + "{" + this.start + "," + this.end + "," + r + "}"; } @Override public boolean equals(final Object o) { if (o == this) { return true; } if (!(o instanceof Zone<?>)) { return false; } final Zone<?> that = (Zone<?>) o; if (!(Utils.equal(this.valueCount, that.valueCount) && Utils.equal(this.start, that.start) && Utils.equal(this.end, that.end) && Utils.equal(this.strand, that.strand))) { return false; } switch (this.valueCount) { case 0: return true; case 1: return Utils.equal(this._value, that._value); default: return Utils.equal(this._values, that._values); } } @Override public int hashCode() { return Utils.hashCode(this._value, this._values, this.start, this.end, this.strand, this.valueCount); } // // Constructor // /** * Constructor that create a zone * @param start start position of the zone * @param end end position of the zone * @param strand strand of the zone */ public Zone(final int start, final int end, final char strand) { this.start = start; this.end = end; this.strand = strand; } /** * Constructor that create a zone * @param start start position of the zone * @param end end position of the zone * @param strand strand of the zone * @param exons of the zone */ public Zone(final int start, final int end, final char strand, final Set<T> exons) { this(start, end, strand); addExons(exons); } } /** * This class define an object that contains all the stranded zones of a * chromosome. * @author Laurent Jourdren */ private static final class ChromosomeStrandedZones<T> implements Serializable { private static final long serialVersionUID = 8073207058699194059L; private final String chromosomeName; private int length = 0; private final List<Zone<T>> zones = new ArrayList<>(); private Zone<T> get(final int index) { return this.zones.get(index); } /** * Add a zone. * @param zone zone to add */ private void add(final Zone<T> zone) { this.zones.add(zone); } /** * Add a zone. * @param index index where add the zone * @param zone the zone to add */ private void add(final int index, final Zone<T> zone) { this.zones.add(index, zone); } /** * Find the zone index for a position. * @param pos the position on the chromosome * @return the index of the zone or -1 if the position if lower than 1 or * greater than the length of the chromosome */ private int findIndexPos(final int pos) { if (pos < 1 || pos > this.length) { return -1; } int minIndex = 0; int maxIndex = this.zones.size() - 1; int index = 0; while (true) { final int diff = maxIndex - minIndex; index = minIndex + diff / 2; if (diff == 1) { if (get(minIndex).compareTo(pos) == 0) { return minIndex; } if (get(maxIndex).compareTo(pos) == 0) { return maxIndex; } assert (false); } final Zone<T> z = get(index); final int comp = z.compareTo(pos); if (comp == 0) { return index; } if (comp < 0) { maxIndex = index; } else { minIndex = index; } } } /** * Split a zone in two zone. * @param zone zone to split * @param pos position of the split * @return a new zone object */ private Zone<T> splitZone(final Zone<T> zone, final int pos) { final Zone<T> result = new Zone<>(pos, zone.end, zone.strand, zone.getValues()); zone.end = pos - 1; return result; } /** * Add an entry. * @param interval interval of the entry * @param value value to add */ public void addEntry(final GenomicInterval interval, final T value) { if (interval == null) { throw new NullPointerException("interval argument cannot be null"); } if (value == null) { throw new NullPointerException("value argument cannot be null"); } final int intervalStart = interval.getStart(); final int intervalEnd = interval.getEnd(); // Create an empty zone if the interval is after the end of the // last chromosome zone if (intervalEnd > this.length) { add(new Zone<T>(this.length + 1, intervalEnd, interval.getStrand())); this.length = intervalEnd; } final int indexStart = findIndexPos(intervalStart); final int indexEnd = findIndexPos(intervalEnd); final Zone<T> z1 = get(indexStart); final Zone<T> z1b; final int count1b; if (z1.start == intervalStart) { z1b = z1; count1b = 0; } else { z1b = splitZone(z1, intervalStart); count1b = 1; } // Same index if (indexStart == indexEnd) { if (z1b.end == intervalEnd) { z1b.addExon(value); } else { final Zone<T> z1c = splitZone(z1b, intervalEnd + 1); add(indexStart + 1, z1c); } if (z1 != z1b) { z1b.addExon(value); add(indexStart + 1, z1b); } else { z1.addExon(value); } } else { final Zone<T> z2 = get(indexEnd); final Zone<T> z2b; if (z2.end != intervalEnd) { z2b = splitZone(z2, intervalEnd + 1); } else { z2b = z2; } if (z1 != z1b) { add(indexStart + 1, z1b); } if (z2 != z2b) { add(indexEnd + 1 + count1b, z2b); } for (int i = indexStart + count1b; i <= indexEnd + count1b; i++) { get(i).addExon(value); } } } /** * Get entries. * @param start start of the interval * @param stop end of the interval * @return a map with the values */ public Map<GenomicInterval, Set<T>> getEntries(final int start, final int stop) { final int indexStart = findIndexPos(start); final int indexEnd = findIndexPos(stop); if (indexStart == -1) { return null; } final int from = indexStart; final int to = indexEnd == -1 ? this.zones.size() - 1 : indexEnd; Map<GenomicInterval, Set<T>> result = null; for (int i = from; i <= to; i++) { final Zone<T> zone = get(i); // Really needed ? if (intersect(start, stop, zone.start, zone.end)) { final GenomicInterval iv = new GenomicInterval(this.chromosomeName, zone.start, zone.end, zone.strand); final Set<T> r = zone.getValues(); if (result == null) { result = new HashMap<>(); } if (r != null) { result.put(iv, Collections.unmodifiableSet(r)); } else { result.put(iv, new HashSet<T>()); } } } if (stop > get(to).end && start > get(to).start) { result.put(new GenomicInterval(this.chromosomeName, start, stop, get(to).strand), new HashSet<T>()); } else if (stop > get(to).end) { result.put(new GenomicInterval(this.chromosomeName, get(to).end + 1, stop, get(to).strand), new HashSet<T>()); } return result; } /** * Test if an interval intersect a zone. * @param start start of the interval * @param end end of the interval * @param startZone start of the zone * @param endZone end of the zone * @return true if the interval intersect a zone */ private static boolean intersect(final int start, final int end, final int startZone, final int endZone) { return (start >= startZone && start <= endZone) || (end >= startZone && end <= endZone) || (start < startZone && end > endZone); } @Override public boolean equals(final Object o) { if (o == this) { return true; } if (!(o instanceof ChromosomeStrandedZones<?>)) { return false; } final ChromosomeStrandedZones<?> that = (ChromosomeStrandedZones<?>) o; return Utils.equal(this.chromosomeName, that.chromosomeName) && Utils.equal(this.length, that.length) && Utils.equal(this.zones, that.zones); } @Override public int hashCode() { return Utils.hashCode(this.chromosomeName, this.length, this.zones); } @Override public String toString() { return this.getClass().getSimpleName() + "{chromosomeName=" + this.chromosomeName + ", length=" + this.length + ", zones=" + this.zones + "}"; } // // Constructor // /** * Public constructor. * @param chromosomeName name of the chromosome */ public ChromosomeStrandedZones(final String chromosomeName) { if (chromosomeName == null) { throw new NullPointerException( "chromosomeName argument cannot be null"); } this.chromosomeName = chromosomeName; } } /** * This class define an object that contains all the zones of a chromosome. * These zones are stranded if "yes" or "reverse". * @author Claire Wallon */ private static final class ChromosomeZones<T> implements Serializable { private static final long serialVersionUID = -6312870823086177216L; private final ChromosomeStrandedZones<T> plus; private final ChromosomeStrandedZones<T> minus; /** * Add a stranded entry. * @param interval interval of the entry * @param value value to add */ public void addEntry(final GenomicInterval interval, final T value) { if (interval == null) { throw new NullPointerException("interval argument cannot be null"); } if (value == null) { throw new NullPointerException("value argument cannot be null"); } if (interval.getStrand() == '+' || interval.getStrand() == '.') { this.plus.addEntry(interval, value); } else if (interval.getStrand() == '-') { this.minus.addEntry(interval, value); } } /** * Get stranded entries. * @param start start of the interval * @param stop end of the interval * @return a map with the values */ public Map<GenomicInterval, Set<T>> getEntries(final int start, final int stop) { final Map<GenomicInterval, Set<T>> result = new HashMap<>(); final Map<GenomicInterval, Set<T>> interPlus = this.plus.getEntries(start, stop); if (interPlus != null) { result.putAll(interPlus); } final Map<GenomicInterval, Set<T>> interMinus = this.minus.getEntries(start, stop); if (interMinus != null) { result.putAll(interMinus); } return result; } @Override public boolean equals(final Object o) { if (o == this) { return true; } if (!(o instanceof ChromosomeZones<?>)) { return false; } final ChromosomeZones<?> that = (ChromosomeZones<?>) o; return Utils.equal(this.minus, that.minus) && Utils.equal(this.plus, that.plus); } @Override public int hashCode() { return Utils.hashCode(this.minus, this.plus); } @Override public String toString() { return this.getClass().getSimpleName() + "{minus=" + this.minus + ", plus=" + this.plus + "}"; } // // Constructor // /** * Public constructor. * @param chromosomeName name of the chromosome */ public ChromosomeZones(final String chromosomeName) { if (chromosomeName == null) { throw new NullPointerException( "chromosomeName argument cannot be null"); } this.plus = new ChromosomeStrandedZones<>(chromosomeName); this.minus = new ChromosomeStrandedZones<>(chromosomeName); } } /** * Add an entry on the genomic array. * @param interval genomic interval * @param value value to add */ public void addEntry(final GenomicInterval interval, final T value) { if (interval == null) { throw new NullPointerException("interval argument cannot be null"); } if (value == null) { throw new NullPointerException("value argument cannot be null"); } final String chromosomeName = interval.getChromosome(); // Create a ChromosomeZones if it does not exist yet if (!this.chromosomes.containsKey(chromosomeName)) { addChromosome(chromosomeName); } // Add the GenomicInterval to the ChromosomeZones this.chromosomes.get(chromosomeName).addEntry(interval, value); } /** * Add a chromosome. * @param chromosomeName name of the chromosome to add */ public void addChromosome(final String chromosomeName) { if (chromosomeName == null) { throw new NullPointerException("chromosomeName argument cannot be null"); } if (containsChromosome(chromosomeName)) { return; } this.chromosomes.put(chromosomeName, new ChromosomeZones<T>(chromosomeName)); } /** * Add chromosomes from the list of sequence in a GenomeDescription object. * @param gd genome description */ public void addChromosomes(final GenomeDescription gd) { if (gd == null) { throw new NullPointerException("gd argument cannot be null"); } for (String chromosomeName : gd.getSequencesNames()) { addChromosome(chromosomeName); } } /** * Get entries in an interval. * @param interval the genomic interval * @return a map with the values */ public Map<GenomicInterval, Set<T>> getEntries( final GenomicInterval interval) { if (interval == null) { throw new NullPointerException("interval argument cannot be null"); } return getEntries(interval.getChromosome(), interval.getStart(), interval.getEnd()); } /** * Get entries in an interval * @param chromosome chromosome of the interval * @param start start of the interval * @param end end of the interval * @return a map with the values */ public Map<GenomicInterval, Set<T>> getEntries(final String chromosome, final int start, final int end) { if (chromosome == null) { throw new NullPointerException("chromosome argument cannot be null"); } final ChromosomeZones<T> chr = this.chromosomes.get(chromosome); if (chr == null) { return null; } return chr.getEntries(start, end); } /** * Test if the GenomicArray contains a chromosome. * @param chromosomeName name of the chromosome to test * @return true if the GenomicArray contains the chromosome */ public boolean containsChromosome(final String chromosomeName) { if (chromosomeName == null) { return false; } return this.chromosomes.containsKey(chromosomeName); } /** * Get a set with zone identifiers. * @return a set of strings with identifiers */ public Set<String> getFeaturesIds() { Set<String> results = new TreeSet<>(); for (Map.Entry<String, ChromosomeZones<T>> strandedZone : this.chromosomes .entrySet()) { // Process plus zones for (Zone<T> zone : strandedZone.getValue().plus.zones) { if (zone.valueCount != 0) { for (T value : zone.getValues()) { results.add(String.valueOf(value)); } } } // Process minus zones for (Zone<T> zone : strandedZone.getValue().minus.zones) { if (zone.valueCount != 0) { for (T value : zone.getValues()) { results.add(String.valueOf(value)); } } } } return results; } /** * Get the names of the chromosomes that contains the GenomicArray. * @return a set with the name of the chromosomes */ public Set<String> getChromosomesNames() { return Collections.unmodifiableSet(this.chromosomes.keySet()); } // // Save // /** * Save the annotation. * @param os Output stream */ public void save(final OutputStream os) throws IOException { if (os == null) { throw new NullPointerException("os argument cannot be null"); } final ObjectOutputStream oos = new ObjectOutputStream(os); oos.writeObject(this.chromosomes); oos.close(); } /** * Save the annotation. * @param outputFile Output file */ public void save(final File outputFile) throws FileNotFoundException, IOException { if (outputFile == null) { throw new NullPointerException("outputFile argument cannot be null"); } save(FileUtils.createOutputStream(outputFile)); } // // Load // /** * Load the annotation. * @param is InputStream input stream */ @SuppressWarnings(value = "unchecked") public void load(final InputStream is) throws IOException { if (is == null) { throw new NullPointerException("is argument cannot be null"); } final ObjectInputStream ois = new ObjectInputStream(is); try { this.chromosomes = (Map<String, ChromosomeZones<T>>) ois.readObject(); } catch (ClassNotFoundException e) { throw new IOException("Unable to load data."); } ois.close(); } /** * Load the annotation. * @param inputFile input file */ public void load(final File inputFile) throws FileNotFoundException, IOException { if (inputFile == null) { throw new NullPointerException("inputFile argument cannot be null"); } load(FileUtils.createInputStream(inputFile)); } // // Other // public void clear() { this.chromosomes.clear(); } // // Object methods // @Override public boolean equals(final Object o) { if (o == this) { return true; } if (!(o instanceof GenomicArray)) { return false; } final GenomicArray<?> that = (GenomicArray<?>) o; return Utils.equal(this.chromosomes, that.chromosomes); } @Override public int hashCode() { return Utils.hashCode(this.chromosomes); } @Override public String toString() { return this.getClass().getSimpleName() + "{chromosomes=" + this.chromosomes + "}"; } // // Constructors // /** * Public constructor. */ public GenomicArray() { } /** * Public constructor. * @param gd The genome description. */ public GenomicArray(final GenomeDescription gd) { this(); addChromosomes(gd); } }