/*******************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.cram;
import htsjdk.samtools.CRAMContainerStreamWriter;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.CramLossyOptions;
import htsjdk.samtools.cram.lossy.QualityScorePreservation;
import htsjdk.samtools.util.Log;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import net.sf.cram.ref.ReferenceSource;
import cipheronly.CipherOutputStream_256;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.converters.FileConverter;
public class Bam2Cram {
private static Log log = Log.getInstance(Bam2Cram.class);
public static final String COMMAND = "cram";
private static void printUsage(JCommander jc) {
StringBuilder sb = new StringBuilder();
sb.append("\n");
jc.usage(sb);
System.out.println("Version " + Bam2Cram.class.getPackage().getImplementationVersion());
System.out.println(sb.toString());
}
private static OutputStream openOutputStream(File outputFile, boolean encrypt, char[] pass)
throws FileNotFoundException {
OutputStream os;
if (outputFile != null) {
FileOutputStream fos = new FileOutputStream(outputFile);
os = new BufferedOutputStream(fos);
} else {
log.warn("No output file, writint to STDOUT.");
os = System.out;
}
if (encrypt) {
CipherOutputStream_256 cos = new CipherOutputStream_256(os, pass, 128);
os = cos.getCipherOutputStream();
}
return os;
}
public static void main(String[] args) throws IOException, IllegalArgumentException, IllegalAccessException,
NoSuchAlgorithmException {
Params params = new Params();
JCommander jc = new JCommander(params);
try {
jc.parse(args);
} catch (Exception e) {
System.out.println("Failed to parse parameteres, detailed message below: ");
System.out.println(e.getMessage());
System.out.println();
System.out.println("See usage: -h");
System.exit(1);
}
if (args.length == 0 || params.help) {
printUsage(jc);
System.exit(1);
}
Log.setGlobalLogLevel(params.logLevel);
if (params.referenceFasta == null)
log.warn("No reference file specified, remote access over internet may be used to download public sequences. ");
ReferenceSource referenceSource = new ReferenceSource(params.referenceFasta);
char[] pass = null;
if (params.encrypt) {
if (System.console() == null)
throw new RuntimeException("Cannot access console.");
pass = System.console().readPassword();
}
SamReaderFactory f = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader samReader;
if (params.bamFile == null) {
log.warn("No input file, reading from input...");
samReader = f.open(SamInputResource.of(System.in));
} else
samReader = f.open(params.bamFile);
SAMFileHeader samFileHeader = samReader.getFileHeader().clone();
CramLossyOptions lossyOptions = new CramLossyOptions();
lossyOptions.setCaptureAllTags(params.captureAllTags);
lossyOptions.setCaptureTags(params.captureTags);
lossyOptions.setIgnoreTags(params.ignoreTags);
lossyOptions.setPreserveReadNames(params.preserveReadNames);
if (params.losslessQS) {
lossyOptions.setPreservation(QualityScorePreservation.lossless());
} else {
if (params.qsSpec == null || params.qsSpec.length() == 0)
lossyOptions.setPreservation(QualityScorePreservation.dropAll());
else
lossyOptions.setPreservation(QualityScorePreservation.lossyFromSpec(params.qsSpec));
}
log.info("Lossiness: " + lossyOptions);
OutputStream os = openOutputStream(params.outputCramFile, params.encrypt, pass);
FixBAMFileHeader fixBAMFileHeader = new FixBAMFileHeader(referenceSource);
fixBAMFileHeader.setConfirmMD5(params.confirmMD5);
fixBAMFileHeader.setInjectURI(params.injectURI);
fixBAMFileHeader.setIgnoreMD5Mismatch(params.ignoreMD5Mismatch);
try {
fixBAMFileHeader.fixSequences(samFileHeader.getSequenceDictionary().getSequences());
} catch (FixBAMFileHeader.MD5MismatchError e) {
log.error(e.getMessage());
System.exit(1);
}
fixBAMFileHeader.addCramtoolsPG(samFileHeader);
CRAMContainerStreamWriter w = new CRAMContainerStreamWriter(os, null, referenceSource, samFileHeader,
params.bamFile == null ? null : params.bamFile.getName(), lossyOptions);
w.writeHeader(samFileHeader);
if (params.queries == null || params.queries.isEmpty()) {
SAMRecordIterator iterator = samReader.iterator();
while (iterator.hasNext()) {
if (params.outputCramFile == null && System.out.checkError())
return;
SAMRecord samRecord = iterator.next();
w.writeAlignment(samRecord);
}
iterator.close();
} else {
List<AlignmentSliceQuery> queries = new ArrayList<AlignmentSliceQuery>();
for (String string : params.queries) {
try {
queries.add(new AlignmentSliceQuery(string));
} catch (Exception e) {
log.error("Failed to parse query: " + string);
System.exit(1);
}
}
for (AlignmentSliceQuery query : queries) {
SAMRecordIterator iterator = samReader.query(query.sequence, query.start, query.end, false);
while (iterator.hasNext()) {
if (params.outputCramFile == null && System.out.checkError())
return;
SAMRecord samRecord = iterator.next();
w.writeAlignment(samRecord);
}
iterator.close();
}
}
samReader.close();
w.finish(true);
os.close();
}
@Parameters(commandDescription = "BAM to CRAM converter. ")
static class Params {
@Parameter(names = { "-l", "--log-level" }, description = "Change log level: DEBUG, INFO, WARNING, ERROR.", converter = CramTools.LevelConverter.class)
Log.LogLevel logLevel = Log.LogLevel.ERROR;
@Parameter(names = { "--input-bam-file", "-I" }, converter = FileConverter.class, description = "Path to a BAM file to be converted to CRAM. Omit if standard input (pipe).")
File bamFile;
@Parameter(names = { "--reference-fasta-file", "-R" }, converter = FileConverter.class, description = "The reference fasta file, uncompressed and indexed (.fai file, use 'samtools faidx'). ")
File referenceFasta;
@Parameter(names = { "--output-cram-file", "-O" }, converter = FileConverter.class, description = "The path for the output CRAM file. Omit if standard output (pipe).")
File outputCramFile = null;
@Parameter(names = { "--max-records" }, description = "Stop after compressing this many records. ")
long maxRecords = Long.MAX_VALUE;
@Parameter
List<String> queries;
@Parameter(names = { "-h", "--help" }, description = "Print help and quit")
boolean help = false;
@Parameter(names = { "--preserve-read-names", "-n" }, description = "Preserve all read names.")
boolean preserveReadNames = false;
@Parameter(names = { "--lossless-quality-score", "-Q" }, description = "Preserve all quality scores. Overwrites '--lossless-quality-score'.")
boolean losslessQS = false;
@Parameter(names = { "--lossy-quality-score-spec", "-L" }, description = "A string specifying what quality scores should be preserved.")
String qsSpec = "";
@Parameter(names = { "--encrypt" }, description = "Encrypt the CRAM file.")
boolean encrypt = false;
@Parameter(names = { "--ignore-tags" }, description = "Ignore the tags listed, for example 'OQ:XA:XB'")
String ignoreTags = "";
@Parameter(names = { "--capture-tags" }, description = "Capture the tags listed, for example 'OQ:XA:XB'")
String captureTags = "";
@Parameter(names = { "--capture-all-tags" }, description = "Capture all tags.")
boolean captureAllTags = false;
@Parameter(names = { "--input-is-sam" }, description = "Input is in SAM format.")
boolean inputIsSam = false;
@Parameter(names = { "--inject-sq-uri" }, description = "Inject or change the @SQ:UR header fields to point to ENA reference service. ")
public boolean injectURI = false;
@Parameter(names = { "--ignore-md5-mismatch" }, description = "Fail on MD5 mismatch if true, or correct (overwrite) the checksums and continue if false.")
public boolean ignoreMD5Mismatch = false;
@Parameter(names = { "--confirm-md5" }, description = "Confirm MD5 checksums of the reference sequences.", hidden = true, arity = 1)
public boolean confirmMD5 = true;
}
}