/*******************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.cram.index;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.RefSeqIdReader;
import htsjdk.samtools.cram.io.DefaultBitInputStream;
import htsjdk.samtools.cram.structure.AlignmentSpan;
import htsjdk.samtools.cram.structure.CompressionHeader;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.Slice;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
public class CramIndex {
private List<Entry> entries = new ArrayList<CramIndex.Entry>();
public void addContainer(Container c) throws IOException, IllegalArgumentException, IllegalAccessException {
if (c.isEOF())
return;
for (int i = 0; i < c.slices.length; i++) {
Slice s = c.slices[i];
if (s.sequenceId == -2) {
this.entries.addAll(getMutliRefEntries(c.header, s, c.offset, c.landmarks));
} else {
Entry e = new Entry();
e.sequenceId = c.sequenceId;
e.alignmentStart = s.alignmentStart;
e.alignmentSpan = s.alignmentSpan;
e.containerStartOffset = c.offset;
e.sliceOffset = c.landmarks[i];
e.sliceSize = s.size;
e.sliceIndex = i;
entries.add(e);
}
}
}
public void writeTo(OutputStream os) throws IOException {
Collections.sort(entries, byStartDesc);
for (Entry e : entries) {
String string = e.toString();
os.write(string.getBytes());
os.write('\n');
}
}
private static Collection<Entry> getMutliRefEntries(CompressionHeader header, Slice slice, long containerOffset,
int[] landmarks) throws IllegalArgumentException, IllegalAccessException, IOException {
final DataReaderFactory dataReaderFactory = new DataReaderFactory();
final Map<Integer, InputStream> inputMap = new HashMap<Integer, InputStream>();
for (final Integer exId : slice.external.keySet()) {
inputMap.put(exId, new ByteArrayInputStream(slice.external.get(exId).getRawContent()));
}
final RefSeqIdReader reader = new RefSeqIdReader(slice.sequenceId, slice.alignmentStart,
ValidationStringency.SILENT);
dataReaderFactory.buildReader(reader,
new DefaultBitInputStream(new ByteArrayInputStream(slice.coreBlock.getRawContent())), inputMap, header,
slice.sequenceId);
reader.APDelta = header.APDelta;
for (int i = 0; i < slice.nofRecords; i++) {
final CramCompressionRecord record = new CramCompressionRecord();
record.sliceIndex = slice.index;
record.index = i;
reader.read();
if (record.sequenceId == slice.sequenceId) {
record.sequenceId = slice.sequenceId;
} else {
if (record.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
record.sequenceName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
}
}
Map<Integer, AlignmentSpan> spans = reader.getReferenceSpans();
List<Entry> entries = new ArrayList<CramIndex.Entry>(spans.size());
for (int seqId : spans.keySet()) {
Entry e = new Entry();
e.sequenceId = seqId;
AlignmentSpan span = spans.get(seqId);
e.alignmentStart = span.getStart();
e.alignmentSpan = span.getSpan();
e.sliceSize = slice.size;
e.sliceIndex = slice.index;
e.containerStartOffset = containerOffset;
e.sliceOffset = landmarks[slice.index];
entries.add(e);
}
return entries;
}
public static class Entry implements Comparable<Entry>, Cloneable {
public int sequenceId;
public int alignmentStart;
public int alignmentSpan;
public long containerStartOffset;
public int sliceOffset;
public int sliceSize;
public int sliceIndex;
public Entry() {
}
public Entry(String line) {
String[] chunks = line.split("\t");
if (chunks.length != 6)
throw new RuntimeException("Invalid index format: " + line);
sequenceId = Integer.valueOf(chunks[0]);
alignmentStart = Integer.valueOf(chunks[1]);
alignmentSpan = Integer.valueOf(chunks[2]);
containerStartOffset = Long.valueOf(chunks[3]);
sliceOffset = Integer.valueOf(chunks[4]);
sliceSize = Integer.valueOf(chunks[5]);
}
@Override
public String toString() {
return String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan,
containerStartOffset, sliceOffset, sliceSize);
}
@Override
public int compareTo(Entry o) {
if (sequenceId != o.sequenceId)
return o.sequenceId - sequenceId;
if (alignmentStart != o.alignmentStart)
return alignmentStart - o.alignmentStart;
return (int) (containerStartOffset - o.containerStartOffset);
}
@Override
public Entry clone() throws CloneNotSupportedException {
Entry entry = new Entry();
entry.sequenceId = sequenceId;
entry.alignmentStart = alignmentStart;
entry.alignmentSpan = alignmentSpan;
entry.containerStartOffset = containerStartOffset;
entry.sliceOffset = sliceOffset;
entry.sliceSize = sliceSize;
return entry;
}
}
public static List<Entry> readIndex(InputStream is) {
List<Entry> list = new LinkedList<CramIndex.Entry>();
Scanner scanner = new Scanner(is);
try {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
Entry entry = new Entry(line);
list.add(entry);
}
} finally {
try {
scanner.close();
} catch (Exception e) {
}
}
return list;
}
private static Comparator<Entry> byEnd = new Comparator<Entry>() {
@Override
public int compare(Entry o1, Entry o2) {
if (o1.sequenceId != o2.sequenceId)
return o2.sequenceId - o1.sequenceId;
if (o1.alignmentStart + o1.alignmentSpan != o2.alignmentStart + o2.alignmentSpan)
return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan;
return (int) (o1.containerStartOffset - o2.containerStartOffset);
}
};
private static Comparator<Entry> byStart = new Comparator<Entry>() {
@Override
public int compare(Entry o1, Entry o2) {
if (o1.sequenceId != o2.sequenceId)
return o2.sequenceId - o1.sequenceId;
if (o1.alignmentStart != o2.alignmentStart)
return o1.alignmentStart - o2.alignmentStart;
return (int) (o1.containerStartOffset - o2.containerStartOffset);
}
};
private static Comparator<Entry> byStartDesc = new Comparator<Entry>() {
@Override
public int compare(Entry o1, Entry o2) {
if (o1.sequenceId != o2.sequenceId) {
if (o1.sequenceId == -1)
return 1;
if (o2.sequenceId == -1)
return -1;
return -o2.sequenceId + o1.sequenceId;
}
if (o1.alignmentStart != o2.alignmentStart)
return o1.alignmentStart - o2.alignmentStart;
return (int) (o1.containerStartOffset - o2.containerStartOffset);
}
};
private static boolean intersect(Entry e0, Entry e1) {
if (e0.sequenceId != e1.sequenceId)
return false;
if (e0.sequenceId < 0)
return false;
int a0 = e0.alignmentStart;
int a1 = e1.alignmentStart;
int b0 = a0 + e0.alignmentSpan;
int b1 = a1 + e1.alignmentSpan;
boolean result = Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan);
return result;
}
public static List<Entry> find(List<Entry> list, int seqId, int start, int span) {
boolean whole = start < 1 || span < 1;
Entry query = new Entry();
query.sequenceId = seqId;
query.alignmentStart = start < 1 ? 1 : start;
query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span;
query.containerStartOffset = Long.MAX_VALUE;
query.sliceOffset = Integer.MAX_VALUE;
query.sliceSize = Integer.MAX_VALUE;
List<Entry> l = new ArrayList<Entry>();
for (Entry e : list) {
if (e.sequenceId != seqId) {
continue;
}
if (whole || intersect(e, query))
l.add(e);
}
Collections.sort(l, byStart);
return l;
}
public static Entry getLeftmost(List<Entry> list) {
if (list == null || list.isEmpty())
return null;
Entry left = list.get(0);
for (Entry e : list)
if (e.alignmentStart < left.alignmentStart)
left = e;
return left;
}
private static int findLastAlignedEntry(List<Entry> list) {
int low = 0;
int high = list.size() - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
Entry midVal = list.get(mid);
if (midVal.sequenceId >= 0)
low = mid + 1;
else
high = mid - 1;
}
return low;
}
}