/*
* Copyright 2015 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.hpg.bigdata.core.converters;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecord.SAMTagAndValue;
import htsjdk.samtools.TagValueAndUnsignedArrayFlag;
import htsjdk.samtools.TextTagCodec;
import htsjdk.samtools.util.StringUtil;
import java.util.*;
import org.ga4gh.models.CigarOperation;
import org.ga4gh.models.CigarUnit;
import org.ga4gh.models.LinearAlignment;
import org.ga4gh.models.Position;
import org.ga4gh.models.ReadAlignment;
import org.ga4gh.models.Strand;
import org.opencb.hpg.bigdata.core.utils.ReadAlignmentUtils;
public class SAMRecord2ReadAlignmentConverter implements Converter<SAMRecord, ReadAlignment> {
// From SAM specification
private static final int QNAME_COL = 0;
private static final int FLAG_COL = 1;
private static final int RNAME_COL = 2;
private static final int POS_COL = 3;
private static final int MAPQ_COL = 4;
private static final int CIGAR_COL = 5;
private static final int MRNM_COL = 6;
private static final int MPOS_COL = 7;
private static final int ISIZE_COL = 8;
private static final int SEQ_COL = 9;
private static final int QUAL_COL = 10;
private static final int NUM_REQUIRED_FIELDS = 11;
private final boolean adjustQuality;
public SAMRecord2ReadAlignmentConverter() {
adjustQuality = true;
}
public SAMRecord2ReadAlignmentConverter(boolean adjustQuality) {
this.adjustQuality = adjustQuality;
}
@Override
public ReadAlignment forward(SAMRecord in) {
//id
String id = in.getReadName();
// read group id
String readGroupId;
if (in.getReadGroup() != null) {
readGroupId = in.getReadGroup().getId();
} else {
readGroupId = "no-group";
}
// reference name
String fragmentName = in.getReferenceName();
// the read is mapped in a proper pair
boolean properPlacement = in.getReadPairedFlag() && in.getProperPairFlag();
// the read is either a PCR duplicate or an optical duplicate.
boolean duplicateFragment = in.getDuplicateReadFlag();
// the number of reads in the fragment (extension to SAM flag 0x1)
int numberReads = in.getReadPairedFlag() ? 2 : 1;
// the observed length of the fragment, equivalent to TLEN in SAM
int fragmentLength = in.getReadPairedFlag() ? in.getInferredInsertSize() : 0;
// The read number in sequencing. 0-based and less than numberReads.
// This field replaces SAM flag 0x40 and 0x80
int readNumber = 0;
if (in.getReadPairedFlag() && in.getSecondOfPairFlag()) {
readNumber = numberReads - 1;
}
// the read fails platform/vendor quality checks
boolean failedVendorQualityChecks = in.getReadFailsVendorQualityCheckFlag();
// alignment
Position position = new Position();
position.setPosition((long) in.getAlignmentStart() - 1); //from 1-based to 0-based
position.setReferenceName(in.getReferenceName());
// position.setSequenceId("");
position.setStrand(in.getReadNegativeStrandFlag() ? Strand.NEG_STRAND : Strand.POS_STRAND);
int mappingQuality = in.getMappingQuality();
List<CigarUnit> cigar = new ArrayList<CigarUnit>();
for (CigarElement e: in.getCigar().getCigarElements()) {
CigarOperation op;
switch (e.getOperator()) {
case M:
op = CigarOperation.ALIGNMENT_MATCH;
break;
case I:
op = CigarOperation.INSERT;
break;
case D:
op = CigarOperation.DELETE;
break;
case N:
op = CigarOperation.SKIP;
break;
case S:
op = CigarOperation.CLIP_SOFT;
break;
case H:
op = CigarOperation.CLIP_HARD;
break;
case P:
op = CigarOperation.PAD;
break;
case EQ:
op = CigarOperation.SEQUENCE_MATCH;
break;
case X:
op = CigarOperation.SEQUENCE_MISMATCH;
break;
default:
throw new IllegalArgumentException("Unrecognized CigarOperator: " + e);
}
cigar.add(new CigarUnit(op, (long) e.getLength(), null));
}
LinearAlignment alignment = new LinearAlignment(position, mappingQuality, cigar);
// the read is the second read in a pair
boolean secondaryAlignment = in.getSupplementaryAlignmentFlag();
// the alignment is supplementary
boolean supplementaryAlignment = in.getSupplementaryAlignmentFlag();
// read sequence
String alignedSequence = in.getReadString();
// aligned quality
byte[] baseQualities = in.getBaseQualities();
int size = baseQualities.length;
List<Integer> alignedQuality = new ArrayList<>(size);
if (adjustQuality) {
for (byte baseQuality : baseQualities) {
int adjustedQuality = ReadAlignmentUtils.adjustQuality(baseQuality);
alignedQuality.add(adjustedQuality);
}
} else {
for (byte baseQuality : baseQualities) {
alignedQuality.add((int) baseQuality);
}
}
// next mate position
Position nextMatePosition = null;
if (in.getReadPairedFlag()) {
nextMatePosition = new Position();
nextMatePosition.setPosition((long) in.getMateAlignmentStart());
nextMatePosition.setReferenceName(in.getMateReferenceName());
// nextMatePosition.setSequenceId("");
nextMatePosition.setStrand(in.getMateNegativeStrandFlag() ? Strand.NEG_STRAND : Strand.POS_STRAND);
}
// A map of additional read alignment information.
Map<String, List<String>> info = new HashMap<String, List<String>>();
List<SAMTagAndValue> attributes = in.getAttributes();
for (SAMTagAndValue tv : attributes) {
List<String> list = new ArrayList<String>();
if (tv.value instanceof String) {
list.add("Z");
} else if (tv.value instanceof Float) {
list.add("f");
} else {
list.add("i");
}
list.add("" + tv.value);
info.put(tv.tag, list);
}
ReadAlignment out = new ReadAlignment(id, readGroupId, fragmentName, properPlacement, duplicateFragment,
numberReads, fragmentLength, readNumber, failedVendorQualityChecks, alignment, secondaryAlignment,
supplementaryAlignment, alignedSequence, alignedQuality, nextMatePosition, info);
return out;
}
@Override
public SAMRecord backward(ReadAlignment in) {
final String samLine = ReadAlignmentUtils.getSamString(in);
final String[] fields = new String[1000];
final int numFields = StringUtil.split(samLine, fields, '\t');
if (numFields < NUM_REQUIRED_FIELDS) {
throw new IllegalArgumentException("Not enough fields");
}
if (numFields == fields.length) {
throw new IllegalArgumentException("Too many fields in SAM text record.");
}
for (int i = 0; i < numFields; ++i) {
if (fields[i].isEmpty()) {
throw new IllegalArgumentException("Empty field at position " + i + " (zero-based)");
}
}
SAMRecord out = new SAMRecord(null);
out.setReadName(fields[QNAME_COL]);
out.setFlags(Integer.valueOf(fields[FLAG_COL]));
out.setReferenceName(fields[RNAME_COL]);
out.setAlignmentStart(Integer.valueOf(fields[POS_COL]));
out.setMappingQuality(Integer.valueOf(fields[MAPQ_COL]));
out.setCigarString(fields[CIGAR_COL]);
out.setMateReferenceName(fields[MRNM_COL].equals("=") ? out.getReferenceName() : fields[MRNM_COL]);
out.setMateAlignmentStart(Integer.valueOf(fields[MPOS_COL]));
out.setInferredInsertSize(Integer.valueOf(fields[ISIZE_COL]));
if (!fields[SEQ_COL].equals("*")) {
out.setReadString(fields[SEQ_COL]);
} else {
out.setReadBases(SAMRecord.NULL_SEQUENCE);
}
if (!fields[QUAL_COL].equals("*")) {
out.setBaseQualityString(fields[QUAL_COL]);
} else {
out.setBaseQualities(SAMRecord.NULL_QUALS);
}
TextTagCodec tagCodec = new TextTagCodec();
for (int i = NUM_REQUIRED_FIELDS; i < numFields; ++i) {
Map.Entry<String, Object> entry = null;
try {
entry = tagCodec.decode(fields[i]);
} catch (SAMFormatException e) {
throw new IllegalArgumentException("Unable to decode field \"" + fields[i] + "\"", e);
}
if (entry != null) {
if (entry.getValue() instanceof TagValueAndUnsignedArrayFlag) {
final TagValueAndUnsignedArrayFlag valueAndFlag =
(TagValueAndUnsignedArrayFlag) entry.getValue();
if (valueAndFlag.isUnsignedArray) {
out.setUnsignedArrayAttribute(entry.getKey(), valueAndFlag.value);
} else {
out.setAttribute(entry.getKey(), valueAndFlag.value);
}
} else {
out.setAttribute(entry.getKey(), entry.getValue());
}
}
}
return out;
}
}