/******************************************************************************* * Copyright 2013 EMBL-EBI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package htsjdk.samtools.cram.encoding.reader; import java.io.EOFException; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import htsjdk.samtools.cram.structure.ReadTag; import htsjdk.samtools.cram.structure.SubstitutionMatrix; import net.sf.cram.common.Utils; public abstract class AbstractFastqReader extends AbstractReader { private ReadFeatureBuffer rfBuf = new ReadFeatureBuffer(); public boolean reverseNegativeReads = true; public boolean appendSegmentIndexToReadNames = true; public byte[] referenceSequence; public int flags; public int compressionFlags; public int mateFlags; public int readLength; public int prevAlStart; public byte[] readName; public static final int maxReadBufferLength = 1024 * 1024; public byte[] bases = new byte[maxReadBufferLength]; public byte[] scores = new byte[maxReadBufferLength]; private Map<Integer, Integer> nameCache = new HashMap<Integer, Integer>(); public long counterOffset = 0; public int defaultQS = '?'; public int ignoreReadsWithFlags = 256 | 2048; public SubstitutionMatrix substitutionMatrix; public int recordCounter = 0; /** * For now this is to identify the right buffer to use. * * @param flags * read bit flags * @return 0 for non-paired or other rubbish which could not be reliably * paired, 1 for first in pair and 2 for second in pair */ protected int getSegmentIndexInTemplate(int flags) { if ((flags & 1) == 0) return 0; if ((flags & 64) != 0) return 1; else return 2; } protected abstract byte[] refSeqChanged(int seqID); public void read() throws IOException { int seqId = refId; readName = null; try { flags = bitFlagsCodec.readData(); compressionFlags = compressionBitFlagsCodec.readData(); if (refId == -2) { seqId = refIdCodec.readData(); } readLength = readLengthCodec.readData(); if (APDelta) prevAlStart += alignmentStartCodec.readData(); else prevAlStart = alignmentStartCodec.readData(); readGroupCodec.readData(); if (captureReadNames) readName = readNameCodec.readData(); // mate record: if ((compressionFlags & CramFlags.DETACHED_FLAG) != 0) { mateFlags = mateBitFlagCodec.readData(); if (!captureReadNames) readName = readNameCodec.readData(); mateReferenceIdCodec.readData(); mateAlignmentStartCodec.readData(); insertSizeCodec.readData(); detachedCount++; } else if ((compressionFlags & CramFlags.HAS_MATE_DOWNSTREAM_FLAG) != 0) { int distance = distanceToNextFragmentCodec.readData(); nameCache.put(recordCounter + distance + 1, recordCounter); } if (readName == null) { // check cache: if (nameCache.containsKey(recordCounter)) { int order = nameCache.remove(recordCounter); readName = Long.toString(order + counterOffset).getBytes(); } else readName = Long.toString(recordCounter + counterOffset).getBytes(); } Integer tagIdList = tagIdListCodec.readData(); byte[][] ids = tagIdDictionary[tagIdList]; if (ids.length > 0) { for (int i = 0; i < ids.length; i++) { int id = ReadTag.name3BytesToInt(ids[i]); DataReader<byte[]> dataReader = tagValueCodecs.get(id); try { dataReader.readData(); } catch (EOFException e) { throw e; } } } if ((flags & CramFlags.SEGMENT_UNMAPPED_FLAG) == 0) { byte[] refBases = referenceSequence; if (seqId != refId) refBases = refSeqChanged(seqId); rfBuf.readReadFeatures(this); rfBuf.restoreReadBases(readLength, prevAlStart, refBases, substitutionMatrix, bases); mappingScoreCodec.readData(); } else { for (int i=0; i<readLength; i++) bases[i] = baseCodec.readData(); } Arrays.fill(scores, 0, readLength, (byte) (defaultQS - 33)); if ((compressionFlags & CramFlags.FORCE_PRESERVE_QS_FLAG) != 0) { for (int i=0; i<readLength; i++) scores[i] = qualityScoreCodec.readData(); } else { if ((flags & CramFlags.SEGMENT_UNMAPPED_FLAG) == 0) { rfBuf.restoreQualityScores(readLength, prevAlStart, scores); } } if ((flags & ignoreReadsWithFlags) != 0) return; for (int i = 0; i < readLength; i++) if (scores[i] == -1) scores[i] = (byte) defaultQS; else scores[i] += 33; if (reverseNegativeReads && (flags & CramFlags.NEGATIVE_STRAND_FLAG) != 0) { Utils.reverseComplement(bases, 0, readLength); Utils.reverse(scores, 0, readLength); } writeRead(readName, flags, bases, scores); recordCounter++; } catch (Exception e) { System.err.printf("Failed at record %d. \n", recordCounter); if (readName != null) System.err.println("read name: " + new String(readName)); throw new RuntimeException(e); } } /** * Write the read. The read here is basically a fastq read with an addition * of SAM bit flags. Specific implementations should take care of further * cashing/pairing/filtering and actual writing of reads. * <p> * The contract is * <ul> * <li>no supplementary reads will appear in this method. * <li>reads on negative strand will be reverse complimented to appear as if * on positive strand. * </ul> * * @param name * read name * @param flags * SAM bit flags * @param bases * read bases * @param scores * fastq quality scores (phred+33) */ protected abstract void writeRead(byte[] name, int flags, byte[] bases, byte[] scores); public abstract void finish(); }