/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.broad.igv.tdf; import org.apache.log4j.Logger; import org.broad.igv.Globals; import org.broad.igv.data.BasicScore; import org.broad.igv.data.CompositeScore; import org.broad.igv.data.CoverageDataSource; import org.broad.igv.data.NamedScore; import org.broad.igv.feature.Chromosome; import org.broad.igv.feature.LocusScore; import org.broad.igv.feature.genome.Genome; import org.broad.igv.prefs.Constants; import org.broad.igv.prefs.PreferencesManager; import org.broad.igv.track.TrackType; import org.broad.igv.track.WindowFunction; import java.util.*; /** * @author jrobinso */ public class TDFDataSource implements CoverageDataSource { private static Logger log = Logger.getLogger(TDFDataSource.class); TDFReader reader; int maxPrecomputedZoom = 6; private int trackNumber = 0; String trackName; Map<String, List<LocusScore>> summaryScoreCache = Collections.synchronizedMap(new HashMap<>()); Genome genome; WindowFunction windowFunction = WindowFunction.mean; List<WindowFunction> availableFunctions; boolean normalizeCounts = false; int totalCount = 0; float normalizationFactor = 1.0f; private Map<String, String> chrNameMap = new HashMap(); public TDFDataSource(TDFReader reader, int trackNumber, String trackName, Genome genome) { this.genome = genome; this.trackNumber = trackNumber; this.trackName = trackName; this.reader = reader; init(); } private void init() { this.availableFunctions = reader.getWindowFunctions(); if(this.availableFunctions != null) { this.availableFunctions.add(WindowFunction.none); // Always available => raw data } TDFGroup rootGroup = reader.getGroup("/"); try { maxPrecomputedZoom = Integer.parseInt(rootGroup.getAttribute("maxZoom")); } catch (Exception e) { log.error("Error reading attribute 'maxZoom'", e); } try { String dataGenome = rootGroup.getAttribute("genome"); // TODO -- throw exception if data genome != current genome } catch (Exception e) { log.error("Unknown genome " + rootGroup.getAttribute("genome")); throw new RuntimeException("Unknown genome " + rootGroup.getAttribute("genome")); } try { String totalCountString = rootGroup.getAttribute("totalCount"); if (totalCountString != null) { totalCount = Integer.parseInt(totalCountString); } } catch (Exception e) { log.error("Error reading attribute 'totalCount'", e); } intChrMap(); boolean normalizeCounts = PreferencesManager.getPreferences().getAsBoolean(Constants.NORMALIZE_COVERAGE); setNormalize(normalizeCounts); } public void updateGenome(Genome genome) { this.genome = genome; chrNameMap.clear(); intChrMap(); } private void intChrMap() { // If we have a genome, build a reverse-lookup table for queries if (genome != null) { Set<String> chrNames = reader.getChromosomeNames(); for (String chr : chrNames) { String igvChr = genome.getCanonicalChrName(chr); if (igvChr != null && !igvChr.equals(chr)) { chrNameMap.put(igvChr, chr); } } } } public void setNormalize(boolean normalizeCounts) { setNormalizeCounts(normalizeCounts, 1.0e6f); } public boolean getNormalize() { return this.normalizeCounts; } public void setNormalizeCounts(boolean normalizeCounts, float scalingFactor) { this.normalizeCounts = normalizeCounts; if (normalizeCounts && totalCount > 0) { normalizationFactor = scalingFactor / totalCount; } else { normalizationFactor = 1; } } public String getPath() { return reader == null ? null : reader.getPath(); } public double getDataMax() { return reader.getUpperLimit() * normalizationFactor; } public double getDataMin() { return reader.getLowerLimit() * normalizationFactor; } private List<LocusScore> getCachedSummaryScores(String querySeq, int zoom, int tileNumber, double tileWidth) { String key = querySeq + "_" + zoom + "_" + tileNumber + "_" + windowFunction; List<LocusScore> scores = summaryScoreCache.get(key); if (scores == null) { int startLocation = (int) (tileNumber * tileWidth); int endLocation = (int) ((tileNumber + 1) * tileWidth); scores = getSummaryScores(querySeq, startLocation, endLocation, zoom); summaryScoreCache.put(key, scores); } return scores; } protected List<LocusScore> getSummaryScores(String querySeq, int startLocation, int endLocation, int zoom) { if (this.availableFunctions == null) { init(); } List<LocusScore> scores; if (zoom <= this.maxPrecomputedZoom && windowFunction != WindowFunction.none) { // Window function == none => no windowing, so its not clear what to do. For now use mean // WindowFunction wf = (windowFunction == WindowFunction.none ? WindowFunction.mean : windowFunction); List<TDFTile> tiles = null; if (querySeq.equals(Globals.CHR_ALL) && !isChrOrderValid()) { TDFTile wgTile = reader.getWholeGenomeTile(genome, windowFunction); tiles = Arrays.asList(wgTile); } else { TDFDataset ds = reader.getDataset(querySeq, zoom, windowFunction); if (ds != null) { tiles = ds.getTiles(startLocation, endLocation); } } scores = new ArrayList(1000); if (tiles != null && tiles.size() > 0) { for (TDFTile tile : tiles) { if (tile.getSize() > 0) { for (int i = 0; i < tile.getSize(); i++) { float v = tile.getValue(trackNumber, i); if (!Float.isNaN(v)) { v *= normalizationFactor; scores.add(new BasicScore(tile.getStartPosition(i), tile.getEndPosition(i), v)); } } } } } } else { if (querySeq.equals(Globals.CHR_ALL)) { scores = getWGRawScores(); } else { scores = getLocusScoresForChr(querySeq, startLocation, endLocation, zoom); } } return scores; } private List<LocusScore> getLocusScoresForChr(String chr, int startLocation, int endLocation, int zoom) { List<LocusScore> scores; int chrLength = getChrLength(chr); if (chrLength == 0) { scores = Collections.emptyList(); } else { endLocation = Math.min(endLocation, chrLength); // By definition there are 2^z tiles per chromosome, and 700 bins per tile, where z is the zoom level. // By definition there are 2^z tiles per chromosome, and 700 bins per tile, where z is the zoom level. //int maxZoom = (int) (Math.log(chrLength / 700) / Globals.log2) + 1; //int z = Math.min(zReq, maxZoom); int nTiles = (int) Math.pow(2, zoom); double binSize = Math.max(1, (((double) chrLength) / nTiles) / 700); scores = computeSummaryScores(chr, startLocation, endLocation, binSize); } return scores; } public int getChrLength(String chr) { if (chr.equals(Globals.CHR_ALL)) { return (int) (genome.getNominalLength() / 1000); } else { Chromosome c = genome.getChromosome(chr); return c == null ? 0 : c.getLength(); } } private List<LocusScore> getWGRawScores() { List<LocusScore> scores = new ArrayList(10000); for (String chr : genome.getAllChromosomeNames()) { Chromosome c = genome.getChromosome(chr); String dsName = "/" + chr + "/raw"; TDFDataset rawDataset = reader.getDataset(dsName); if (rawDataset != null) { List<TDFTile> rawTiles = rawDataset.getTiles(0, c.getLength()); if (rawTiles.size() > 0) { for (TDFTile rawTile : rawTiles) { // Tile of raw data if (rawTile != null && rawTile.getSize() > 0) { for (int i = 0; i < rawTile.getSize(); i++) { int s = genome.getGenomeCoordinate(chr, rawTile.getStartPosition(i)); int e = genome.getGenomeCoordinate(chr, Math.max(s, rawTile.getEndPosition(i) - 1)); float v = rawTile.getValue(trackNumber, i); if (!Float.isNaN(v)) { v *= normalizationFactor; } scores.add(new BasicScore(s, e, v)); } } } } } } return scores; } private List<LocusScore> computeSummaryScores(String chr, int startLocation, int endLocation, double scale) { List<LocusScore> scores = new ArrayList(1000); String dsName = "/" + chr + "/raw"; TDFDataset rawDataset = reader.getDataset(dsName); if (rawDataset != null) { List<TDFTile> rawTiles = rawDataset.getTiles(startLocation, endLocation); if (rawTiles.size() > 0) { if (windowFunction == WindowFunction.none) { for (TDFTile rawTile : rawTiles) { // Tile of raw data if (rawTile != null && rawTile.getSize() > 0) { for (int i = 0; i < rawTile.getSize(); i++) { int s = rawTile.getStartPosition(i); int e = Math.max(s, rawTile.getEndPosition(i) - 1); if (e < startLocation) { continue; } else if (s > endLocation) { break; } float v = rawTile.getValue(trackNumber, i); if (!Float.isNaN(v)) { v *= normalizationFactor; } scores.add(new BasicScore(s, e, v)); } } } } else { Accumulator accumulator = new Accumulator(windowFunction, 5); int accumulatedStart = -1; int accumulatedEnd = -1; int lastEndBin = 0; for (TDFTile rawTile : rawTiles) { // Tile of raw data int size = rawTile.getSize(); if (rawTile != null && size > 0) { int[] starts = rawTile.getStart(); int[] ends = rawTile.getEnd(); String[] features = rawTile.getNames(); float[] values = rawTile.getData(trackNumber); // Loop through and bin scores for this interval. for (int i = 0; i < size; i++) { if (starts[i] >= endLocation) { break; // We're beyond the end of the requested interval } int s = Math.max(startLocation, starts[i]); int e = ends == null ? s + 1 : Math.min(endLocation, ends[i]); float v = values[i] * normalizationFactor; if (e < startLocation || Float.isNaN(v)) { continue; } String probeName = features == null ? null : features[i]; // Compute bin numbers, relative to start of this tile int endBin = (int) ((e - startLocation) / scale); int startBin = (int) ((s - startLocation) / scale); // If this feature spans multiple bins, or extends beyond last end bin, record if (endBin > lastEndBin || endBin > startBin) { if (accumulator.hasData()) { scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd)); accumulator = new Accumulator(windowFunction, 5); } } if (endBin > startBin) { scores.add(new NamedScore(s, e, v, probeName)); } else { if (!accumulator.hasData()) { accumulatedStart = s; accumulatedEnd = e; accumulator.add(e - s, v, probeName); } } lastEndBin = endBin; } // End of loop cleanup if (accumulator.hasData()) { scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd)); } } } } } } return scores; } private LocusScore getCompositeScore(Accumulator accumulator, int accumulatedStart, int accumulatedEnd) { LocusScore ls; if (accumulator.getNpts() == 1) { ls = new NamedScore(accumulatedStart, accumulatedEnd, accumulator.getRepData()[0], accumulator.getRepProbes()[0]); } else { float value = accumulator.getValue(); ls = new CompositeScore(accumulatedStart, accumulatedEnd, value, accumulator.getRepData(), accumulator.getRepProbes(), windowFunction); } return ls; } public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) { Chromosome chromosome = genome.getChromosome(chr); if (chromosome != null) { endLocation = Math.min(chromosome.getLength(), endLocation); } String tmp = chrNameMap.get(chr); String querySeq = tmp == null ? chr : tmp; ArrayList scores = new ArrayList(); // TODO -- this whole section could be computed once and stored, it is only a function of the genome, chr, and zoom level. int tileWidth = 0; if (chr.equals(Globals.CHR_ALL)) { tileWidth = (int) Math.ceil(genome.getNominalLength() / 1000.0); } else { if (chromosome != null) { tileWidth = chromosome.getLength() / ((int) Math.pow(2.0, zoom)); } } if (tileWidth == 0) { return null; } int startTile = (startLocation / tileWidth); int endTile = ((endLocation - 1) / tileWidth); for (int t = startTile; t <= endTile; t++) { List<LocusScore> cachedScores = getCachedSummaryScores(querySeq, zoom, t, tileWidth); if (cachedScores != null) { for (LocusScore s : cachedScores) { if (s.getEnd() >= startLocation) { scores.add(s); } else if (s.getStart() > endLocation) { break; } } } } return scores; } public TrackType getTrackType() { return reader.getTrackType(); } public void setWindowFunction(WindowFunction wf) { this.windowFunction = wf; } public boolean isLogNormalized() { //return false; return getDataMin() < 0; } public void refreshData(long timestamp) { // ignored } public WindowFunction getWindowFunction() { return windowFunction; } public Collection<WindowFunction> getAvailableWindowFunctions() { return availableFunctions; } @Override public void dispose() { } /** * We changed how chromosomes were sorted in v2.2 * This had the unintended side effect of introducing * backwards incompatibility in CHR_ALL. Version 4+ should include * the chromosome names in Genome order (rather than file order); which we check against. * * @return Whether we believe the data stored for whole genome view is valid or not */ boolean isChrOrderValid() { if (reader.getVersion() >= 4) { String chromosomeNames; List<String> fileChromos = null; //Extract the chromosome names. Not sure when that attribute was put in //If it's not in the file, give up try { TDFGroup rootGroup = reader.getGroup(TDFWriter.ROOT_GROUP); chromosomeNames = rootGroup.getAttribute(TDFWriter.CHROMOSOMES); fileChromos = new ArrayList<String>(Arrays.asList(chromosomeNames.split(","))); } catch (Exception e) { return false; } return checkChromoNameOrder(fileChromos, genome.getLongChromosomeNames()); } else if (genome != null) { // The WELL_KNOWN_GENOMES have had stable chromosome order since initial release String genomeId = genome.getId(); return WELL_KNOWN_GENOMES.contains(genomeId); } else { // Can't be sure return false; } } /** * Check the sort. It is acceptable to have different #s of chromosomes in the two lists, but they must * start with the same value and be in the same order to the extent they overlap. Less genome chrs than file * chrs => extra data at the end. Less file chrs than genome chrs => we'll have white space on the end. In * both cases the data that is shown will be valid. * * @param fileChromos * @param genomeChromos * @return * @see #isChrOrderValid() */ static boolean checkChromoNameOrder(List<String> fileChromos, List<String> genomeChromos) { int chrCount = Math.min(fileChromos.size(), genomeChromos.size()); for (int i = 0; i < chrCount; i++) { if (!fileChromos.get(i).equals(genomeChromos.get(i))) return false; } //If we get this far, the chromo order is good as far as we know return true; } private static HashSet<String> WELL_KNOWN_GENOMES = new HashSet<String>(Arrays.asList("hg18", "hg19", "mm8", "mm9")); }