/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.data; //~--- non-JDK imports -------------------------------------------------------- import org.apache.log4j.Logger; import org.broad.igv.Globals; import org.broad.igv.feature.Chromosome; import org.broad.igv.feature.LocusScore; import org.broad.igv.feature.genome.Genome; import org.broad.igv.tdf.Accumulator; import org.broad.igv.track.WindowFunction; import org.broad.igv.ui.panel.FrameManager; import org.broad.igv.util.collections.LRUCache; import java.util.*; /** * @author jrobinso */ public abstract class AbstractDataSource implements DataSource { public static List<WindowFunction> ORDERED_WINDOW_FUNCTIONS = Arrays.asList(new WindowFunction[]{ WindowFunction.min, WindowFunction.percentile2, WindowFunction.percentile10, WindowFunction.median, WindowFunction.mean, WindowFunction.percentile90, WindowFunction.percentile98, WindowFunction.max, WindowFunction.none }); private static Logger log = Logger.getLogger(AbstractDataSource.class); // DataManager dataManager; boolean cacheSummaryTiles = true; protected WindowFunction windowFunction = WindowFunction.mean; LRUCache<String, SummaryTile> summaryTileCache = new LRUCache(10); protected Genome genome; public AbstractDataSource(Genome genome) { this.genome = genome; } // abstract protected TrackType getTrackType(); /** * Return "raw" (i.e. not summarized) data for the specified interval. * * @param chr * @param startLocation * @param endLocation * @return */ abstract protected DataTile getRawData(String chr, int startLocation, int endLocation); /** * Return the precomputed summary tiles for the given locus and zoom level. If * there are none return null. * * @param chr * @param startLocation * @param endLocation * @param zoom * @return */ abstract protected List<LocusScore> getPrecomputedSummaryScores(String chr, int startLocation, int endLocation, int zoom); public int getChrLength(String chr) { if (chr.equals(Globals.CHR_ALL)) { return (int) (genome.getNominalLength() / 1000); } else { Chromosome c = genome.getChromosome(chr); return c == null ? 0 : c.getLength(); } } /** * Refresh the underlying data. Default implementation does nothing, subclasses * can override * * @param timestamp */ public void refreshData(long timestamp) { // ignore -- } /** * Return the longest feature in the dataset for the given chromosome. This * is needed when computing summary data for a region. * <p/> * * @param chr * @return */ public abstract int getLongestFeature(String chr); //{ // // if (getTrackType() == TrackType.GENE_EXPRESSION) { // String genomeId = GenomeManager.getInstance().getGenomeId(); // GeneManager gm = GeneManager.getGeneManager(genomeId); // return (gm == null) ? 1000000 : gm.getLongestGeneLength(chr); // } else { // return 1000; // } //} public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) { List<LocusScore> scores = null; if (windowFunction != WindowFunction.none) { scores = getPrecomputedSummaryScores(chr, startLocation, endLocation, zoom); if (scores != null) { return scores; } } List<SummaryTile> tiles = getSummaryTilesForRange(chr, startLocation, endLocation, zoom); scores = new ArrayList(tiles.size() * 700); for (SummaryTile tile : tiles) { scores.addAll(tile.getScores()); } //FeatureUtils.sortFeatureList(summaryScores); return scores; } private List<SummaryTile> getSummaryTilesForRange(String chr, int startLocation, int endLocation, int zReq) { int chrLength = getChrLength(chr); if (chrLength == 0) { return Collections.emptyList(); } endLocation = Math.min(endLocation, chrLength); int adjustedStart = Math.max(0, startLocation); int adjustedEnd = Math.min(chrLength, endLocation); if (cacheSummaryTiles && !FrameManager.isGeneListMode()) { // By definition there are 2^z tiles per chromosome, and 700 bins per tile, where z is the zoom level. //int maxZoom = (int) (Math.log(chrLength/700) / Globals.log2) + 1; //int z = Math.min(zReq, maxZoom); int z = zReq; int virtualTileCount = (int) Math.pow(2, z); double tileWidth = ((double) chrLength) / virtualTileCount; int startTile = (int) (adjustedStart / tileWidth); int endTile = (int) (Math.min(chrLength, adjustedEnd) / tileWidth) + 1; List<SummaryTile> tiles = null; tiles = new ArrayList(endTile - startTile + 1); for (int t = startTile; t <= endTile; t++) { int tileStart = (int) (t * tileWidth); int tileEnd = Math.min(chrLength, (int) ((t + 1) * tileWidth)); String key = chr + "_" + z + "_" + t + getWindowFunction(); SummaryTile summaryTile = summaryTileCache.get(key); if (summaryTile == null) { summaryTile = computeSummaryTile(chr, tileStart, tileEnd, 700); if (cacheSummaryTiles && !FrameManager.isGeneListMode()) { synchronized (summaryTileCache) { summaryTileCache.put(key, summaryTile); } } } if (summaryTile != null) { tiles.add(summaryTile); } } return tiles; } else { SummaryTile summaryTile = computeSummaryTile(chr, startLocation, endLocation, 700); return Arrays.asList(summaryTile); } } /** * Note: Package scope used so this method can be unit tested * * @param chr * @param startLocation * @param endLocation * @param nBins * @return */ SummaryTile computeSummaryTile(String chr, int startLocation, int endLocation, int nBins) { DataTile rawTile = getRawData(chr, startLocation, endLocation); SummaryTile tile = new SummaryTile(); if (rawTile != null && !rawTile.isEmpty() && nBins > 0) { int[] starts = rawTile.getStartLocations(); int[] ends = rawTile.getEndLocations(); float[] values = rawTile.getValues(); String[] features = rawTile.getFeatureNames(); if (windowFunction == WindowFunction.none) { for (int i = 0; i < starts.length; i++) { int s = starts[i]; int e = ends == null ? s + 1 : Math.max(s + 1, ends[i]); if (e < startLocation) { continue; } else if (s >= endLocation) { break; } String probeName = features == null ? null : features[i]; float v = values[i]; BasicScore score = new NamedScore(s, e, v, probeName); tile.addScore(score); } } else { float normalizationFactor = 1.0f; List<LocusScore> scores = new ArrayList(nBins); double scale = (double) (endLocation - startLocation) / nBins; Accumulator accumulator = new Accumulator(windowFunction, 5); int accumulatedStart = -1; int accumulatedEnd = -1; int lastEndBin = 0; int size = starts.length; // Loop through and bin scores for this interval. for (int i = 0; i < size; i++) { int true_end = ends == null ? starts[i] + 1 : ends[i]; float v = values[i] * normalizationFactor; if (starts[i] >= endLocation) { break; // We're beyond the end of the requested interval } else if (true_end <= startLocation || Float.isNaN(v)) { //Not yet to interval, or not a number continue; } // Bound feature at interval, other "piece" will be in another tile. int s = Math.max(startLocation, starts[i]); int e = Math.min(endLocation, true_end); String probeName = features == null ? null : features[i]; // Compute bin numbers, relative to start of this tile int endBin = (int) ((e - startLocation) / scale); int startBin = (int) ((s - startLocation) / scale); // If this feature spans multiple bins, or extends beyond last end bin, record if (endBin > lastEndBin || endBin > startBin) { if (accumulator.hasData()) { scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd)); accumulator = new Accumulator(windowFunction, 5); } } if (endBin > startBin) { scores.add(new NamedScore(s, e, v, probeName)); } else { if (!accumulator.hasData()) accumulatedStart = s; accumulatedEnd = e; accumulator.add(e - s, v, probeName); } lastEndBin = endBin; } // Cleanup if (accumulator.hasData()) { scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd)); } tile.addAllScores(scores); } } return tile; } private LocusScore getCompositeScore(Accumulator accumulator, int accumulatedStart, int accumulatedEnd) { LocusScore ls; if (accumulator.getNpts() == 1) { ls = new NamedScore(accumulatedStart, accumulatedEnd, accumulator.getRepData()[0], accumulator.getRepProbes()[0]); } else { float value = accumulator.getValue(); ls = new CompositeScore(accumulatedStart, accumulatedEnd, value, accumulator.getRepData(), accumulator.getRepProbes(), windowFunction); } return ls; } /** * Return true if the data has been log normalized. * * @return */ public boolean isLogNormalized() { return true; } public void setWindowFunction(WindowFunction statType) { this.windowFunction = statType; this.summaryTileCache.clear(); } public WindowFunction getWindowFunction() { return windowFunction; } /** * Default * * @return */ public Collection<WindowFunction> getAvailableWindowFunctions() { return ORDERED_WINDOW_FUNCTIONS; } @Override public void dispose() { // default is to do nothing } }