/*
* The MIT License (MIT)
* Copyright (c) 2007-2015 by Institute for Computational Biomedicine,
* Weill Medical College of Cornell University.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.goby;
import edu.cornell.med.icb.goby.counts.CachingCountsArchiveReader;
import edu.cornell.med.icb.goby.counts.CountBinningAdapterI;
import edu.cornell.med.icb.goby.counts.CountBinningAdaptor;
import edu.cornell.med.icb.goby.counts.CountsReader;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectSet;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Logger;
import org.broad.igv.data.BasicScore;
import org.broad.igv.data.CoverageDataSource;
import org.broad.igv.exceptions.DataLoadException;
import org.broad.igv.feature.LocusScore;
import org.broad.igv.prefs.Constants;
import org.broad.igv.prefs.PreferencesManager;
import org.broad.igv.tdf.TDFReader;
import org.broad.igv.track.TrackType;
import org.broad.igv.track.WindowFunction;
import org.broad.igv.util.ResourceLocator;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Vector;
/**
* A data source for <a href="http://goby.campagnelab.org">Goby</a> compressed base-level histograms (.counts files).
*
*
* @author Fabien Campagne
* Date: 6/10/11
* Time: 3:18 PM
*/
public class GobyCountArchiveDataSource implements CoverageDataSource {
static final Logger LOG = Logger.getLogger(TDFReader.class);
CachingCountsArchiveReader counts;
private double currentMax = 4;
private String filename;
private boolean someIdsStartWithChr;
private ObjectSet<String> ids;
private WindowFunction selectedWindowFunction;
long numBasesSeen;
long numSitesSeen;
private boolean hasPrecomputedStats;
private boolean doNormalize;
private double normalizationFactor;
public GobyCountArchiveDataSource(ResourceLocator locator) {
init(locator.getPath());
}
static List<WindowFunction> availableFunctions = Arrays.asList(WindowFunction.mean, WindowFunction.max);
private void init(String filename) {
try {
this.filename = FilenameUtils.removeExtension(filename);
counts = new CachingCountsArchiveReader(this.filename);
ids = counts.getIdentifiers();
for (String id : ids) {
if (id.startsWith("chr")) {
someIdsStartWithChr = true;
break;
}
}
if (counts.isStatsParsed()) {
this.numBasesSeen = counts.getTotalBasesSeen();
this.numSitesSeen = counts.getTotalSitesSeen();
hasPrecomputedStats = true;
}
boolean normalizeCounts = PreferencesManager.getPreferences().getAsBoolean(Constants.NORMALIZE_COVERAGE);
setNormalize(normalizeCounts);
} catch (IOException ex) {
LOG.error("Error loading file: " + filename, ex);
throw new DataLoadException("Error loading goby counts archive file: " + ex.toString(), filename);
}
}
public GobyCountArchiveDataSource(File file) {
init(file.getPath());
}
public double getDataMax() {
// LOG.info("Call in getDataMax currentMax= " + currentMax);
return currentMax;
}
public double getDataMin() {
// LOG.info("Call in getDataMin ");
boolean normalizeCounts = PreferencesManager.getPreferences().getAsBoolean(Constants.NORMALIZE_COVERAGE);
setNormalize(normalizeCounts);
return 0.0;
}
public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) {
if ("All".equals(chr)) return new Vector<LocusScore>();
try {
currentMax = 0;
CountsReader reader = getCountsReader(chr);
int initialStartPosition = startLocation;
int binSize = getBinSize(startLocation, endLocation);
/* LOG.info(String.format("Call in getSummaryScoresForRange %d-%d zoom %d binSize=%d ", startLocation, endLocation,
zoom, binSize));
*/
if (reader == null) {
return null;
}
updateNormalizationFactor();
CountBinningAdapterI binAdaptor = new CountBinningAdaptor(reader, binSize);
if (counts.hasIndex()) {
// we can only reposition if the countsreader has an index. Otherwise, we trust the
// caching counts archive to have created a new reader positioned at the start of the counts sequence.
binAdaptor.reposition(startLocation);
}
binAdaptor.skipTo(startLocation);
int position = binAdaptor.getPosition();
ObjectArrayList<LocusScore> result = new ObjectArrayList<LocusScore>();
result.add(new BasicScore(initialStartPosition, position, 0.0f));
while (binAdaptor.hasNextTransition() && position < endLocation) {
binAdaptor.nextTransition();
// position is the zero-based position before the count is changed by the transition
position = binAdaptor.getPosition();
// count is how many reads cover the length bases that follow position.
final double count = selectedWindowFunction == WindowFunction.mean ? binAdaptor.getAverage() : binAdaptor.getMax();
final double normalizedCount = count / normalizationFactor;
final int length = binAdaptor.getLength();
// System.out.printf("adding results %d-%d count=%g %n", position, position + length , normalizedCount);
BasicScore bs = new BasicScore(position, position + length, (float) normalizedCount);
result.add(bs);
this.currentMax = Math.max(currentMax, normalizedCount);
if (!hasPrecomputedStats) {
// estimate on the fly from the subset of data seen so far:
numBasesSeen += length * count;
if (count != 0) {
numSitesSeen += length;
}
}
}
result.add(new BasicScore(position, endLocation, 0.0f));
return result;
} catch (IOException e) {
LOG.error(e);
throw new DataLoadException(
String.format("Error getting summary scores for range %s:%d-%d in goby counts archive file %s %n",
chr, startLocation, endLocation, filename), filename);
}
}
private int getBinSize(int startLocation, int endLocation) {
final int regionLength = endLocation - startLocation;
return Math.max(1, ((regionLength / 2000)));
}
private void updateNormalizationFactor() {
if (numSitesSeen == 0) {
normalizationFactor = 1.0;
} else {
// normalization factor is estimated such that the average value will be ~2 for autosomes, corresponding
// to two copies.
normalizationFactor = doNormalize ? ((double)numBasesSeen / (double) numSitesSeen) /2.0:
1.0;
}
// System.out.printf("normalization factor=%g%n", normalizationFactor);
}
private CountsReader getCountsReader(String chr) throws IOException {
if (ids.contains(chr)) {
return counts.getCountReader(chr);
}
if (!someIdsStartWithChr) {
CountsReader reader = counts.getCountReader(chr.replaceFirst("chr", ""));
if (reader != null) return reader;
}
return counts.getCountReader(chr);
}
public TrackType getTrackType() {
return TrackType.COVERAGE;
}
public void setWindowFunction(WindowFunction statType) {
selectedWindowFunction = statType;
// LOG.info("Call in setWindowFunction ");
}
public boolean isLogNormalized() {
// LOG.info("Call in isLogNormalized ");
return false;
}
public void refreshData(long timestamp) {
// LOG.info("Call in refreshData ");
}
public WindowFunction getWindowFunction() {
// LOG.info("Call in getWindowFunction ");
return selectedWindowFunction;
}
public Collection<WindowFunction> getAvailableWindowFunctions() {
// LOG.info("Call in getAvailableWindowFunctions ");
return availableFunctions;
}
@Override
public void dispose() {
}
public String getPath() {
return filename;
}
public void setNormalize(boolean normalize) {
this.doNormalize = normalize;
}
public boolean getNormalize(){
return this.doNormalize;
}
}