/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.alignment.proto;
import com.google.protobuf.ByteString;
import java.util.*;
import org.opencb.biodata.formats.alignment.AlignmentFactory;
import org.opencb.biodata.models.alignment.Alignment;
import org.opencb.biodata.models.alignment.Alignment.AlignmentDifference;
import org.opencb.biodata.models.alignment.AlignmentHeader;
import org.opencb.opencga.storage.alignment.AlignmentSummary;
/**
* Created with IntelliJ IDEA.
* User: jmmut
* Date: 3/7/14
* Time: 11:29 AM
*
* TODO jj: check rnext, pnext, mateAlignmentStart,
* TODO jj: check another sam with CIGAR: hard clipping, padding, and skipped region
*/
public class AlignmentProtoHelper {
public static AlignmentProto.AlignmentBucket toAlignmentBucketProto(List<Alignment> alignments, AlignmentSummary summary, long bucketStart, int overlapped){
if(alignments == null || alignments.isEmpty()){
if(overlapped != 0){
if(alignments == null){
alignments = new LinkedList<>();
}
} else {
return null;
}
}
AlignmentProto.AlignmentBucket.Builder alignmentBucketBuilder = AlignmentProto.AlignmentBucket.newBuilder();
alignmentBucketBuilder.setOverlapped(overlapped);
alignmentBucketBuilder.setSummaryIndex(summary.getIndex());
long prevStart = bucketStart;
for(Alignment alignment : alignments){
alignmentBucketBuilder.addAlignmentRecords(AlignmentProtoHelper.toAlignmentProto(alignment,prevStart, summary));
prevStart = alignment.getStart();
}
return alignmentBucketBuilder.build();
}
public static AlignmentProto.AlignmentRecord toAlignmentProto(Alignment alignment, long prevStart, AlignmentSummary summary){
AlignmentProto.AlignmentRecord.Builder alignmentRecordBuilder = AlignmentProto.AlignmentRecord.newBuilder()
.setName(alignment.getName())
.setPos((int) (alignment.getStart() - prevStart))
.setMapq(alignment.getMappingQuality())
.setRelativePnext((int)(alignment.getMateAlignmentStart() - alignment.getStart()))
.setQualities(alignment.getQualities())
.setInferredInsertSize(alignment.getInferredInsertSize());
if(alignment.getFlags() != summary.getDefaultFlag()){
alignmentRecordBuilder.setFlags(alignment.getFlags());
}
if(alignment.getLength() != summary.getDefaultLen()){
alignmentRecordBuilder.setLen(alignment.getLength());
}
if(!alignment.getMateReferenceName().equals(summary.getDefaultRNext())){
alignmentRecordBuilder.setRnext(alignment.getMateReferenceName());
}
int prevDifferencePos = 0;
for(Alignment.AlignmentDifference alignmentDifference : alignment.getDifferences()){
AlignmentProto.Difference.DifferenceOperator operator = AlignmentProto.Difference.DifferenceOperator.MISMATCH;
switch(alignmentDifference.getOp()){
case Alignment.AlignmentDifference.DELETION:
operator = AlignmentProto.Difference.DifferenceOperator.DELETION;
break;
case Alignment.AlignmentDifference.HARD_CLIPPING:
operator = AlignmentProto.Difference.DifferenceOperator.HARD_CLIPPING;
break;
case Alignment.AlignmentDifference.INSERTION:
operator = AlignmentProto.Difference.DifferenceOperator.INSERTION;
break;
case Alignment.AlignmentDifference.MISMATCH:
operator = AlignmentProto.Difference.DifferenceOperator.MISMATCH;
break;
case Alignment.AlignmentDifference.PADDING:
operator = AlignmentProto.Difference.DifferenceOperator.PADDING;
break;
case Alignment.AlignmentDifference.SKIPPED_REGION:
operator = AlignmentProto.Difference.DifferenceOperator.SKIPPED_REGION;
break;
case Alignment.AlignmentDifference.SOFT_CLIPPING:
operator = AlignmentProto.Difference.DifferenceOperator.SOFT_CLIPPING;
break;
}
AlignmentProto.Difference.Builder differenceBuilder = AlignmentProto.Difference.newBuilder()
.setOperator(operator); //TODO jj: Default operator
if(alignmentDifference.getPos() != prevDifferencePos){
differenceBuilder.setPos(alignmentDifference.getPos());
}
if (alignmentDifference.isSequenceStored()) {
differenceBuilder.setSequence(ByteString.copyFromUtf8(alignmentDifference.getSeq()));
} else {
if(alignmentDifference.getLength() != 1){ //TODO jj: Default length?
differenceBuilder.setLength(alignmentDifference.getLength());
}
}
alignmentRecordBuilder.addDiffs(differenceBuilder.build());
}
alignmentRecordBuilder.addAllTags(summary.getIndexTagList(alignment.getAttributes()));
return alignmentRecordBuilder.build();
}
public static List<Alignment> toAlignmentList(AlignmentProto.AlignmentBucket alignmentBucket, AlignmentSummary summary, String chromosome, long bucketStart){
if(alignmentBucket.getSummaryIndex() != summary.getIndex()){
System.out.println("[ERROR] Summary doesn't match!"); //TODO jj: Throw exception?
}
List<Alignment> alignments = new LinkedList<>();
long prevStart = bucketStart;
Alignment alignment;
for(AlignmentProto.AlignmentRecord alignmentRecord : alignmentBucket.getAlignmentRecordsList()){
alignments.add(alignment = toAlignment(alignmentRecord, summary, chromosome, prevStart));
prevStart = alignment.getStart();
}
return alignments;
}
public static Alignment toAlignment(AlignmentProto.AlignmentRecord alignmentProto, AlignmentSummary summary, String chromosome, long prevStart){
List<Alignment.AlignmentDifference> alignmentDifferences = new LinkedList<>();
int offset = toAlignmentDifference(alignmentProto.getDiffsList(), alignmentDifferences);
int length = alignmentProto.hasLen() ? alignmentProto.getLen() : summary.getDefaultLen();
long start = alignmentProto.getPos() + prevStart;
long end = start + offset + length - 1;
long unclippedStart = start;
long unclippedEnd = end;
if(!alignmentDifferences.isEmpty()){
AlignmentDifference diff;
diff = alignmentDifferences.get(0);
if(diff.getOp() == Alignment.AlignmentDifference.SOFT_CLIPPING && diff.getPos() == 0){
unclippedStart -= diff.getLength();
}
diff = alignmentDifferences.get(alignmentDifferences.size() - 1);
if(diff.getOp() == Alignment.AlignmentDifference.SOFT_CLIPPING
&& diff.getPos() != 0){
unclippedEnd += diff.getLength();
}
}
Alignment alignment = new Alignment();
alignment.setName(alignmentProto.getName());
alignment.setChromosome(chromosome);
alignment.setStart(start);
alignment.setEnd(end);
alignment.setUnclippedStart(unclippedStart);
alignment.setUnclippedEnd(unclippedEnd);
alignment.setLength(length); //Optiona. Get from Summary
alignment.setMappingQuality(alignmentProto.getMapq());
alignment.setQualities(alignmentProto.getQualities());
alignment.setMateAlignmentStart((int) (alignmentProto.getRelativePnext() + start));
alignment.setMateReferenceName(alignmentProto.hasRnext() ? alignmentProto.getRnext() : summary.getDefaultRNext()); //Optiona. Get from Summary
alignment.setInferredInsertSize(alignmentProto.getInferredInsertSize());
alignment.setFlags(alignmentProto.hasFlags() ? alignmentProto.getFlags() : summary.getDefaultFlag()); //Optiona. Get from Summary
alignment.setDifferences(alignmentDifferences);
alignment.setAttributes(summary.getTagsFromList(alignmentProto.getTagsList()));
return alignment;
}
public static int toAlignmentDifference(List<AlignmentProto.Difference> differenceList, List<Alignment.AlignmentDifference> alignmentDifferenceList) {
int offset = 0;
int prevPos = 0;
for (AlignmentProto.Difference difference: differenceList) {
int pos = difference.hasPos() ? difference.getPos() : prevPos; //If miss, prev position.
prevPos = pos; //update prev position.
String seq = difference.hasSequence()? new String(difference.getSequence().toByteArray()) : null; //If miss, null
int len = difference.hasLength()?difference.getLength() : seq!=null ? seq.length() : 1; //If miss, seq.length. If miss too, 1.
char operator = AlignmentProto.Difference.DifferenceOperator.MISMATCH_VALUE;
switch(difference.getOperator().getNumber()) {
case AlignmentProto.Difference.DifferenceOperator.DELETION_VALUE:
operator = Alignment.AlignmentDifference.DELETION;
offset += len;
break;
case AlignmentProto.Difference.DifferenceOperator.HARD_CLIPPING_VALUE:
operator = Alignment.AlignmentDifference.HARD_CLIPPING; // FIXME offset
break;
case AlignmentProto.Difference.DifferenceOperator.INSERTION_VALUE:
operator = Alignment.AlignmentDifference.INSERTION;
offset -= len;
break;
case AlignmentProto.Difference.DifferenceOperator.MISMATCH_VALUE:
operator = Alignment.AlignmentDifference.MISMATCH;
break;
case AlignmentProto.Difference.DifferenceOperator.PADDING_VALUE:
operator = Alignment.AlignmentDifference.PADDING; // FIXME offset
break;
case AlignmentProto.Difference.DifferenceOperator.SKIPPED_REGION_VALUE:
operator = Alignment.AlignmentDifference.SKIPPED_REGION; // FIXME offset
break;
case AlignmentProto.Difference.DifferenceOperator.SOFT_CLIPPING_VALUE:
operator = Alignment.AlignmentDifference.SOFT_CLIPPING;
offset -= len;
break;
}
alignmentDifferenceList.add(new Alignment.AlignmentDifference(pos, operator, seq, len));
}
return offset;
}
/**
* Compression Core. UNIMPLEMENTED!
*
000 A
001 C
010 G
011 T
100 N
101
110
111 END
**/
private static String uncompressSeq(ByteString seq){
//String readSequence = difference.hasSequence()? new String(difference.getSequence().toByteArray()): null;
throw new UnsupportedOperationException();
}
private static ByteString compressSeq(String seq){
throw new UnsupportedOperationException();
}
}