package com.illumina.basespace.igv.vcf; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.logging.Logger; import net.sf.samtools.util.BlockCompressedInputStream; import org.broad.igv.data.DataSource; import org.broad.igv.feature.genome.Genome; import org.broad.igv.feature.tribble.CachingFeatureReader; import org.broad.igv.feature.tribble.VCFWrapperCodec; import org.broad.igv.track.TribbleFeatureSource; import org.broad.igv.ui.event.ViewChange; import org.broad.igv.ui.panel.FrameManager; import org.broad.igv.util.RuntimeUtils; import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.CloseableTribbleIterator; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureReader; import org.broadinstitute.variant.vcf.VCF3Codec; import org.broadinstitute.variant.vcf.VCFCodec; import com.google.common.eventbus.Subscribe; import com.illumina.basespace.igv.BaseSpaceMain; import com.illumina.basespace.igv.BaseSpaceUtil; import com.illumina.basespace.igv.vcf.VCFLocatorFactory.VCFTrackLoader; /** * @author jrobinso * @date Jun 27, 2010 */ public class BaseSpaceFeatureSource extends TribbleFeatureSource { CachingFeatureReader cachingReader; DataSource coverageSource; boolean isVCF; Genome genome; private VCFTrackLoader locator; private static Logger logger = Logger.getLogger(BaseSpaceFeatureSource.class.getPackage().getName()); private FeatureCodec<?> codec; /** * Map of IGV chromosome name -> source name */ Map<String, String> chrNameMap = new HashMap(); private int featureWindowSize; Object header; Class featureClass; public BaseSpaceFeatureSource(VCFTrackLoader locator, Genome genome) throws IOException { super(locator.getFileName(), genome,false); this.locator = locator; this.genome = genome; codec = getVCFCodec(); isVCF = codec.getClass() == VCFWrapperCodec.class; featureClass = codec.getFeatureType(); AbstractFeatureReader<?> basicReader = locator.getIndexFile() == null?new BaseSpaceVariantSetApiFeatureReader( this.locator ,codec): new BaseSpaceTabixFeatureReader(this.locator,codec); //AbstractFeatureReader<?> basicReader = locator.getIndexFile() == null?new BaseSpaceTribbleFeatureReader( this.locator ,codec): // new BaseSpaceTabixFeatureReader(this.locator,codec); header = basicReader.getHeader(); initFeatureWindowSize(basicReader); cachingReader = new CachingFeatureReader(basicReader, 5, getFeatureWindowSize()); if (genome != null) { Collection<String> seqNames = cachingReader.getSequenceNames(); if (seqNames != null) for (String seqName : seqNames) { String igvChr = genome.getChromosomeAlias(seqName); if (igvChr != null && !igvChr.equals(seqName)) { chrNameMap.put(igvChr, seqName); } } } //FrameManager.getDefaultFrame().getEventBus().register(this); } @Subscribe public void receiveZoomChange(ViewChange.ZoomCause e) { BaseSpaceVariantSetApiFeatureReader.clearVcfErrors(); } public Object getHeader() { return header; } public void setFeatureWindowSize(int size) { this.featureWindowSize = size; cachingReader.setBinSize(size); } public Iterator<Feature> getFeatures(String chr, int start, int end) throws IOException { String seqName = chrNameMap.get(chr); if (seqName == null) seqName = chr; return cachingReader.query(seqName, start, end); } private AsciiFeatureCodec getVCFCodec() { BufferedReader reader = null; try { if (locator.getPath().endsWith(".gz")) { reader = new BufferedReader(new InputStreamReader(new BlockCompressedInputStream( BaseSpaceMain.instance().getApiClient(locator.getClientId()).getFileInputStream(locator.getFile())))); } else { reader = new BufferedReader(new InputStreamReader(BaseSpaceMain.instance().getApiClient(locator.getClientId()).getFileInputStream(locator.getFile()))); } // Look for fileformat directive. This should be the first line, but // just in case check the first 20 int lineCount = 0; String formatLine; while ((formatLine = reader.readLine()) != null && lineCount < 20) { if (formatLine.toLowerCase().startsWith("##fileformat")) { String[] tmp = formatLine.split("="); if (tmp.length > 1) { String version = tmp[1].toLowerCase(); logger.info("VCF Version=" + version); if (version.startsWith("vcfv3")) { return new VCF3Codec(); } else { return new VCFWrapperCodec(new VCFCodec(),genome); } } } lineCount++; } } catch (IOException e) { logger.severe("Error checking VCF Version"); } finally { BaseSpaceUtil.dispose(reader); } // Should never get here, but as a last resort assume this is a VCF 4.x // file. return new VCFCodec(); } protected void init(String path) { } public VCFTrackLoader getLocator() { return locator; } private void initFeatureWindowSize(FeatureReader reader) { CloseableTribbleIterator<org.broad.tribble.Feature> iter = null; try { double mem = RuntimeUtils.getAvailableMemory(); iter = reader.iterator(); if (iter.hasNext()) { int nSamples = isVCF ? 100 : 1000; org.broad.tribble.Feature firstFeature = iter.next(); org.broad.tribble.Feature lastFeature = firstFeature; String chr = firstFeature.getChr(); int n = 1; long len = 0; while (iter.hasNext() && n < nSamples) { org.broad.tribble.Feature f = iter.next(); if (f != null) { n++; if (f.getChr().equals(chr)) { lastFeature = f; } else { len += lastFeature.getEnd() - firstFeature.getStart() + 1; firstFeature = f; lastFeature = f; chr = f.getChr(); } } } double dMem = mem - RuntimeUtils.getAvailableMemory(); double bytesPerFeature = Math.max(100, dMem / n); len += lastFeature.getEnd() - firstFeature.getStart() + 1; double featuresPerBase = ((double) n) / len; double targetBinMemory = 20000000; // 20 mega bytes int maxBinSize = isVCF ? 1000000 : Integer.MAX_VALUE; int bs = Math.min(maxBinSize, (int) (targetBinMemory / (bytesPerFeature * featuresPerBase))); featureWindowSize = Math.max(1000000, bs); } else { featureWindowSize = Integer.MAX_VALUE; } } catch (IOException e) { featureWindowSize = 1000000; } } }