package com.github.lindenb.jvarkit.tools.misc;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.AbstractVCFCodec;
import htsjdk.variant.vcf.VCFHeader;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.List;
import com.beust.jcommander.Parameter;
import com.github.lindenb.jvarkit.io.IOUtils;
import com.github.lindenb.jvarkit.util.jcommander.Launcher;
import com.github.lindenb.jvarkit.util.jcommander.Program;
import com.github.lindenb.jvarkit.util.log.Logger;
import com.github.lindenb.jvarkit.util.vcf.VCFUtils;
@Program(name="vcfgetvariantbyIndex",description="Access a Plain or BGZF-compressed VCF file by index" )
public class VcfGetVariantByIndex extends Launcher
{
private static Logger LOG=Logger.build(VcfGetVariantByIndex.class).make();
@Parameter(names="-o",description=" (out) output file. default stdout")
private File outputFile=null;
@Parameter(names="-i",description=" (file) list of 1-based indexes")
private File fileListOfIndexes=null;
private static final String STANDARD_EXTENSION=".ith";
private static abstract class IndexFile
implements Closeable
{
protected File vcfFile;
protected File indexFile;
protected VCFUtils.CodecAndHeader cah;
private RandomAccessFile rafile;
private long count=0L;
public IndexFile(File vcfFile) throws IOException
{
this.vcfFile=vcfFile;
this.indexFile = new File(
vcfFile.getParentFile(),
vcfFile.getName()+STANDARD_EXTENSION
);
}
public VCFHeader getHeader() {
return cah.header;
}
public AbstractVCFCodec getCodec()
{
return cah.codec;
}
public abstract int read() throws IOException;
protected String readLine() throws IOException
{
StringBuffer buf = new StringBuffer();
int c;
while ((c = this.read()) >= 0 && c != '\n')
buf.append((char) c);
if (c < 0) return null;
return buf.toString();
}
public void open() throws IOException
{
if(this.indexFile.exists())
{
LOG.info("Index exists reading "+this.indexFile);
openForReading();
}
else
{
LOG.info("Writing index for "+this.indexFile);
openForBuilding();
}
}
protected abstract void openVcfFile() throws IOException;
protected abstract long getFilePointer() throws IOException;
private void openForBuilding() throws IOException
{
try
{
this.rafile=new RandomAccessFile(this.indexFile,"rw");
openVcfFile();
String line=null;
List<String> headerLines=new ArrayList<>();
while((line=readLine())!=null)
{
headerLines.add(line);
if(line.startsWith("#CHROM")) break;
}
VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(headerLines);
this.cah=cah;
this.count=0L;
long virtualPtr=this.getFilePointer();
while((line=readLine())!=null)
{
this.rafile.writeLong(virtualPtr);
this.count++;
virtualPtr=this.getFilePointer();
}
}
catch(Exception err)
{
throw new IOException(err);
}
}
private void openForReading() throws IOException
{
try
{
this.rafile=new RandomAccessFile(this.indexFile,"r");
openVcfFile();
String line=null;
List<String> headerLines=new ArrayList<>();
while((line=readLine())!=null)
{
headerLines.add(line);
if(line.startsWith("#CHROM")) break;
}
VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(headerLines);
this.cah=cah;
this.count = (this.indexFile.length()/8L);
}
catch(Exception err)
{
throw new IOException(err);
}
}
public long getVirtualPtr(long index) throws IOException
{
if(index<0 || index>=this.count) throw new IndexOutOfBoundsException();
this.rafile.seek(index*8);//8 =sizeof(long)
return this.rafile.readLong();
}
public abstract String getLine(long index) throws IOException;
public long size()
{
return count;
}
@Override
public void close() throws IOException {
CloserUtil.close(this.rafile);
this.rafile=null;
this.cah=null;
this.count=0;
}
}
private static class BGZIndexFile extends IndexFile
{
private BlockCompressedInputStream bgzin=null;
BGZIndexFile(File f) throws IOException
{
super(f);
}
@Override
protected void openVcfFile() throws IOException {
this.bgzin = new BlockCompressedInputStream(this.vcfFile);
}
@Override
public void close() throws IOException {
super.close();
CloserUtil.close(this.bgzin);
this.bgzin=null;
}
@Override
public int read() throws IOException
{
return this.bgzin.read();
}
@Override
public String getLine(long index) throws IOException
{
long offset = getVirtualPtr(index);
this.bgzin.seek(offset);
return readLine();
}
@Override
protected long getFilePointer() throws IOException {
return this.bgzin.getFilePointer();
}
}
private static class RandomAccessIndexFile extends IndexFile
{
RandomAccessFile vcfraf=null;
RandomAccessIndexFile(File f) throws IOException
{
super(f);
}
@Override
protected void openVcfFile() throws IOException {
this.vcfraf=new RandomAccessFile(this.vcfFile, "r");
}
@Override
public void close() throws IOException {
super.close();
CloserUtil.close(this.vcfraf);
this.vcfraf=null;
}
@Override
public int read() throws IOException
{
return this.vcfraf.read();
}
@Override
public String getLine(long index) throws IOException
{
long offset = getVirtualPtr(index);
this.vcfraf.seek(offset);
return readLine();
}
@Override
protected long getFilePointer() throws IOException {
return this.vcfraf.getFilePointer();
}
}
public int doWork(List<String> args) {
if(this.fileListOfIndexes==null)
{
LOG.error("undefined list of indexes");
return -1;
}
if(args.size()!=1)
{
LOG.error("Expected only one vcf file on input");
return -1;
}
File vcfFile=new File(args.get(0));
VariantContextWriter w=null;
IndexFile indexFile=null;
BufferedReader r=null;
String line;
try {
LOG.info("Opening "+vcfFile);
if(vcfFile.getName().endsWith(".vcf.gz"))
{
indexFile = new BGZIndexFile(vcfFile);
}
else if(vcfFile.getName().endsWith(".vcf"))
{
indexFile = new RandomAccessIndexFile(vcfFile);
}
else
{
LOG.error("Not a .vcf or .vcf.gz file: "+vcfFile);
return -1;
}
indexFile.open();
w = super.openVariantContextWriter(outputFile);
w.writeHeader(indexFile.getHeader());
r=IOUtils.openFileForBufferedReading(fileListOfIndexes);
while((line=r.readLine())!=null)
{
if(line.isEmpty() || line.startsWith("#")) continue;
long ith;
try {
ith=Long.parseLong(line);
}
catch (Exception e) {
LOG.error("Bad index in "+line+" ignoring");
continue;
}
ith--;//0-based index
if(ith<0 || ith>=indexFile.size())
{
LOG.error("Index out of bound in "+line+" ignoring");
continue;
}
String varStr = indexFile.getLine(ith);
w.add(indexFile.getCodec().decode(varStr));
}
}
catch (Exception e)
{
LOG.error(e);
return -1;
}
finally
{
CloserUtil.close(indexFile);
CloserUtil.close(w);
CloserUtil.close(r);
}
return 0;
}
public static void main(String[] args) throws IOException
{
new VcfGetVariantByIndex().instanceMain(args);
}
}