/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.hpg.bigdata.core.utils; import org.ga4gh.models.CigarUnit; import org.ga4gh.models.LinearAlignment; import org.ga4gh.models.ReadAlignment; import org.ga4gh.models.Strand; public class ReadAlignmentUtils { private static final String FIELD_SEPARATOR = "\t"; protected ReadAlignmentUtils() { } public static String getSamString(ReadAlignment ra) { StringBuilder res = new StringBuilder(); LinearAlignment la = (LinearAlignment) ra.getAlignment(); // id res.append(ra.getId().toString()).append(FIELD_SEPARATOR); // flags int flags = 0; if (ra.getNumberReads() > 0) { flags |= 0x1; } // TODO Check this line, it was getProperPlacement() before if (!ra.getImproperPlacement()) { flags |= 0x2; } if (la == null) { flags |= 0x4; } else { if (la.getPosition().getStrand() == Strand.NEG_STRAND) { flags |= 0x10; } } if (ra.getNextMatePosition() != null) { if (ra.getNextMatePosition().getStrand() == Strand.NEG_STRAND) { flags |= 0x20; } } else { if (ra.getNumberReads() > 0) { flags |= 0x8; } } if (ra.getReadNumber() == 0) { flags |= 0x40; } if (ra.getNumberReads() > 0 && ra.getReadNumber() == ra.getNumberReads() - 1) { flags |= 0x80; } if (ra.getSecondaryAlignment()) { flags |= 0x100; } if (ra.getFailedVendorQualityChecks()) { flags |= 0x200; } if (ra.getDuplicateFragment()) { flags |= 0x400; } res.append(flags); res.append(FIELD_SEPARATOR); if (la == null) { res.append("*").append(FIELD_SEPARATOR); // chromosome res.append("0").append(FIELD_SEPARATOR); // position res.append("0").append(FIELD_SEPARATOR); // mapping quality res.append(ra.getAlignedSequence().length()).append("M").append(FIELD_SEPARATOR); // cigar } else { // chromosome res.append(la.getPosition().getReferenceName()); res.append(FIELD_SEPARATOR); // position res.append(la.getPosition().getPosition() + 1); //0-based to 1-based res.append(FIELD_SEPARATOR); // mapping quality res.append(la.getMappingQuality()); res.append(FIELD_SEPARATOR); // cigar for (CigarUnit e : la.getCigar()) { res.append(e.getOperationLength()); switch (e.getOperation()) { case ALIGNMENT_MATCH: res.append("M"); break; case INSERT: res.append("I"); break; case DELETE: res.append("D"); break; case SKIP: res.append("N"); break; case CLIP_SOFT: res.append("S"); break; case CLIP_HARD: res.append("H"); break; case PAD: res.append("P"); break; case SEQUENCE_MATCH: res.append("="); break; case SEQUENCE_MISMATCH: res.append("X"); break; default: break; } } res.append(FIELD_SEPARATOR); } // mate chromosome if (ra.getNextMatePosition() != null) { if (la != null && ra.getNextMatePosition().getReferenceName().equals(la.getPosition().getReferenceName())) { res.append("="); } else { res.append(ra.getNextMatePosition().getReferenceName()); } } else { res.append("*"); } res.append(FIELD_SEPARATOR); // mate position if (ra.getNextMatePosition() != null) { res.append(ra.getNextMatePosition().getPosition()); } else { res.append(0); } res.append(FIELD_SEPARATOR); // tlen res.append(ra.getFragmentLength()); res.append(FIELD_SEPARATOR); // sequence res.append(ra.getAlignedSequence().toString()); res.append(FIELD_SEPARATOR); // quality for (int v: ra.getAlignedQuality()) { res.append((char) (v + 33)); // Add ASCII offset } // optional fields for (CharSequence key: ra.getInfo().keySet()) { res.append(FIELD_SEPARATOR); res.append(key.toString()); for (CharSequence val : ra.getInfo().get(key)) { res.append((":" + val.toString())); } } return res.toString(); } /** * Adjusts the quality value for optimized 8-level mapping quality scores. * * Quality range -> Mapped quality * 1 -> 1 * 2-9 -> 6 * 10-19 -> 15 * 20-24 -> 22 * 25-29 -> 27 * 30-34 -> 33 * 35-39 -> 27 * >=40 -> 40 * * Read more: http://www.illumina.com/documents/products/technotes/technote_understanding_quality_scores.pd * * @param quality original quality * @return Adjusted quality */ public static int adjustQuality(int quality) { final int adjustedQuality; if (quality <= 1) { adjustedQuality = quality; } else { int qualRange = quality / 5; switch (qualRange) { case 0: case 1: adjustedQuality = 6; break; case 2: case 3: adjustedQuality = 15; break; case 4: adjustedQuality = 22; break; case 5: adjustedQuality = 27; break; case 6: adjustedQuality = 33; break; case 7: adjustedQuality = 37; break; case 8: default: adjustedQuality = 40; break; } } return adjustedQuality; } }