/*******************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.cram;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import net.sf.cram.common.Utils;
import net.sf.cram.ref.ReferenceSource;
public class FixBAMFileHeader {
private static Log log = Log.getInstance(FixBAMFileHeader.class);
private boolean confirmMD5 = false;
private String sequenceUrlPattern = "http://www.ebi.ac.uk/ena/cram/md5/%s";
private boolean injectURI = false;
private boolean ignoreMD5Mismatch = false;
private ReferenceSource referenceSource;
public FixBAMFileHeader(ReferenceSource referenceSource) {
this.referenceSource = referenceSource;
}
public void fixSequences(List<SAMSequenceRecord> sequenceRecords) throws MD5MismatchError {
for (SAMSequenceRecord sequenceRecord : sequenceRecords)
fixSequence(sequenceRecord);
}
public void fixSequence(SAMSequenceRecord sequenceRecord) throws MD5MismatchError {
String found_md5 = sequenceRecord.getAttribute(SAMSequenceRecord.MD5_TAG);
if (confirmMD5) {
if (found_md5 != null) {
byte[] bytes = referenceSource.getReferenceBases(sequenceRecord, true);
if (bytes == null) {
String message = String.format("Reference bases not found: name %s, length %d, md5 %s.",
sequenceRecord.getSequenceName(), sequenceRecord.getSequenceLength(), found_md5);
log.error(message);
throw new RuntimeException(message);
}
log.info("Confirming reference sequence md5: " + sequenceRecord.getSequenceName());
String md5 = Utils.calculateMD5String(bytes);
if (!md5.equals(found_md5)) {
if (ignoreMD5Mismatch) {
log.warn(String.format(
"Sequence id=%d, len=%d, name=%s has incorrect md5=%s. Replaced with %s.",
sequenceRecord.getSequenceIndex(), sequenceRecord.getSequenceLength(),
sequenceRecord.getSequenceName(), found_md5, md5));
sequenceRecord.setAttribute(SAMSequenceRecord.MD5_TAG, md5);
} else
throw new MD5MismatchError(sequenceRecord, md5);
}
if (sequenceRecord.getSequenceLength() != bytes.length) {
log.warn(String.format("Sequence id=%d, name=%s has incorrect length=%d. Replaced with %d.",
sequenceRecord.getSequenceIndex(), sequenceRecord.getSequenceName(),
sequenceRecord.getSequenceLength(), bytes.length));
sequenceRecord.setSequenceLength(bytes.length);
}
} else {
log.info("Reference sequence MD5 not found, calculating: " + sequenceRecord.getSequenceName());
byte[] bytes = referenceSource.getReferenceBases(sequenceRecord, true);
if (bytes == null) {
String message = String.format("Reference bases not found: name %s, length %d.",
sequenceRecord.getSequenceName(), sequenceRecord.getSequenceLength());
log.error(message);
throw new RuntimeException(message);
}
String md5 = Utils.calculateMD5String(bytes);
sequenceRecord.setAttribute(SAMSequenceRecord.MD5_TAG, md5);
}
}
if (injectURI) {
sequenceRecord.setAttribute(SAMSequenceRecord.URI_TAG,
String.format(sequenceUrlPattern, sequenceRecord.getAttribute(SAMSequenceRecord.MD5_TAG)));
}
}
public void addPG(SAMFileHeader header, String program, String cmd, String version) {
SAMProgramRecord programRecord = header.createProgramRecord();
programRecord.setCommandLine(cmd);
programRecord.setProgramName(program);
programRecord.setProgramVersion(version);
}
public void addCramtoolsPG(SAMFileHeader header) {
String cmd = "java " + Utils.getJavaCommand();
String version = Utils.CRAM_VERSION.toString();
addPG(header, "cramtools", cmd, version);
}
public boolean isConfirmMD5() {
return confirmMD5;
}
public void setConfirmMD5(boolean confirmMD5) {
this.confirmMD5 = confirmMD5;
}
public String getSequenceUrlPattern() {
return sequenceUrlPattern;
}
public void setSequenceUrlPattern(String sequenceUrlPattern) {
this.sequenceUrlPattern = sequenceUrlPattern;
}
public ReferenceSource getReferenceSource() {
return referenceSource;
}
public boolean isInjectURI() {
return injectURI;
}
public void setInjectURI(boolean injectURI) {
this.injectURI = injectURI;
}
public boolean fixHeaderInFile(File cramFile) throws IOException, MD5MismatchError {
FileInputStream fis = new FileInputStream(cramFile);
CramHeader cramHeader = CramIO.readCramHeader(fis);
fixSequences(cramHeader.getSamFileHeader().getSequenceDictionary().getSequences());
String cmd = "fixheader";
String version = getClass().getPackage().getImplementationVersion();
addPG(cramHeader.getSamFileHeader(), "cramtools", cmd, version);
CramHeader newHeader = cramHeader.clone();
return CramIO.replaceCramHeader(cramFile, newHeader);
}
public static class MD5MismatchError extends RuntimeException {
private SAMSequenceRecord sequenceRecord;
private String actualMD5;
public MD5MismatchError(SAMSequenceRecord sequenceRecord, String md5) {
super(String.format("MD5 mismatch for sequence %s:%s", sequenceRecord.getSequenceName(), md5));
this.sequenceRecord = sequenceRecord;
this.actualMD5 = md5;
}
public String getActualMD5() {
return actualMD5;
}
public void setActualMD5(String actualMD5) {
this.actualMD5 = actualMD5;
}
public SAMSequenceRecord getSequenceRecord() {
return sequenceRecord;
}
public void setSequenceRecord(SAMSequenceRecord sequenceRecord) {
this.sequenceRecord = sequenceRecord;
}
}
public boolean isIgnoreMD5Mismatch() {
return ignoreMD5Mismatch;
}
public void setIgnoreMD5Mismatch(boolean ignoreMD5Mismatch) {
this.ignoreMD5Mismatch = ignoreMD5Mismatch;
}
}