/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.multiGenome.operation.convert;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import net.sf.samtools.util.BlockCompressedOutputStream;
import edu.yu.einstein.genplay.util.Utils;
/**
* @author Nicolas Fourel
* @version 0.1
*/
public class MGOBGZIPCompression {
private final File vcfFile; // the vcf file
private File bgzFile; // the bgzip file
//private int lineCount;
/**
* Constructor of {@link MGOBGZIPCompression}
* @param file the VCF file to compress
*/
public MGOBGZIPCompression(File file) {
vcfFile = file;
}
/**
* Compress a VCf file into a BGZIP file.
* @return true if the operation has been done correctly. False otherwise.
* @throws IOException
*/
public Boolean compute() throws IOException {
return regular();
}
private boolean regular () throws IOException {
if (Utils.getExtension(vcfFile).equals("vcf")) {
// Open the VCF input stream
FileInputStream vcfFIS = new FileInputStream(vcfFile);
DataInputStream vcfIN = new DataInputStream(vcfFIS);
InputStreamReader vcfISR = new InputStreamReader(vcfIN);
BufferedReader vcfBR = new BufferedReader(vcfISR);
// Get the BGZIP file
bgzFile = new File(vcfFile.getPath() + ".gz");
// Open the BGZIP output stream
BlockCompressedOutputStream bgzipBCOS = new BlockCompressedOutputStream(bgzFile);
String newLine = "\n";
byte[] newLineBytes = newLine.getBytes();
String vcfLine;
while ((vcfLine = vcfBR.readLine()) != null) {
byte[] lineBytes = vcfLine.getBytes();
bgzipBCOS.write(lineBytes);
bgzipBCOS.write(newLineBytes);
}
// Close the BGZIP output stream
bgzipBCOS.close();
// Close the VCF input stream
vcfBR.close();
vcfISR.close();
vcfIN.close();
vcfFIS.close();
return true;
} else {
return false;
}
}
/*private boolean other () throws IOException {
if (Utils.getExtension(vcfFile).equals("vcf")) {
// Open the VCF input stream
FileInputStream vcfFIS = new FileInputStream(vcfFile);
DataInputStream vcfIN = new DataInputStream(vcfFIS);
InputStreamReader vcfISR = new InputStreamReader(vcfIN);
BufferedReader vcfBR = new BufferedReader(vcfISR);
// Get the BGZIP file
bgzFile = new File(vcfFile.getPath() + ".gz");
// Open the BGZIP output stream
BlockCompressedOutputStream bgzipBCOS = new BlockCompressedOutputStream(bgzFile);
String newLine = "\n";
byte[] newLineBytes = newLine.getBytes();
long newLineBytesLength = newLineBytes.length;
String vcfLine;
lineCount = 0;
double totalLineCount = 0;
double totalByteNumber = 0;
double maxByteLength = 0;
while ((vcfLine = vcfBR.readLine()) != null) {
//lineCount++;
totalLineCount++;
byte[] lineBytes = vcfLine.getBytes();
int lineBytesLength = lineBytes.length;
if (lineBytesLength > 1000) {
int start = 0;
int length = 1000;
boolean valid = true;
while (valid) {
bgzipBCOS.write(lineBytes, start, length);
start += length;
if ((start + length) > lineBytesLength) {
int stop = (start + length) - (start + length);
bgzipBCOS.write(lineBytes, start, stop);
valid = false;
}
}
} else {
bgzipBCOS.write(lineBytes);
}
if (lineBytesLength > maxByteLength) {
maxByteLength = lineBytesLength;
}
totalByteNumber += lineBytesLength + newLineBytesLength;
bgzipBCOS.write(lineBytes);
bgzipBCOS.write(newLineBytes);
/*if (lineCount == 5) {
bgzipBCOS.flush();
lineCount = 0;
}*/
/*}
// Close the BGZIP output stream
bgzipBCOS.close();
// Close the VCF input stream
vcfBR.close();
vcfISR.close();
vcfIN.close();
vcfFIS.close();
System.out.println("totalLineCount: " + totalLineCount);
System.out.println("totalByteNumber: " + totalByteNumber);
System.out.println("maxByteLength: " + maxByteLength);
return true;
} else {
return false;
}
}*/
/**
* @return the bgzFile
*/
public File getBgzFile() {
return bgzFile;
}
}