/* * The MIT License (MIT) * * Copyright (c) 2015 UC San Diego * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.tools; import htsjdk.samtools.*; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloseableIterator; import org.broad.igv.sam.Alignment; import org.broad.igv.sam.PicardAlignment; import org.broad.igv.sam.reader.AlignmentReader; import org.broad.igv.sam.reader.AlignmentReaderFactory; import org.broad.igv.sam.reader.SAMReader; import org.broad.igv.util.stream.IGVSeekableBufferedStream; import org.broad.igv.util.stream.IGVSeekableStreamFactory; import java.io.*; import java.util.*; /** * Created by jrobinso on 10/6/15. */ public class PairedUtils { public static void main(String[] args) throws IOException { // extractInteractions(args[0], args[1], Integer.parseInt(args[2])); extractUnexpectedPairs(args[0], args[1]); } public static void extractInteractions(String alignmentFile, String outputFile, int binSize) { AlignmentReader reader = null; PrintWriter pw = null; CloseableIterator<Alignment> iter = null; Map<String, Integer> counts = new HashMap<String, Integer>(10000); try { reader = AlignmentReaderFactory.getReader(alignmentFile, false); iter = reader.iterator(); while (iter != null && iter.hasNext()) { Alignment alignment = iter.next(); if (alignment.isPaired() && alignment.getMate().isMapped() && alignment.getMappingQuality() > 0 && interactionFilter(alignment)) { String chr1 = alignment.getChr(); int bin1 = alignment.getAlignmentStart() / binSize; String chr2 = alignment.getMate().getChr(); int bin2 = alignment.getMate().getStart() / binSize; int o1 = getOrder(chr1); int o2 = getOrder(chr2); if (o1 > o2 || (o1 < 0 || o2 < 0)) continue; // Only need to record one diagonal, they are symmetrical String cell = chr1 + "_" + bin1 + ":" + chr2 + "_" + bin2; Integer count = counts.get(cell); if (count == null) { counts.put(cell, 1); } else { count += 1; counts.put(cell, count); } } } iter.close(); // Now filter cells with counts < 5 Set<Map.Entry<String, Integer>> entrySet = counts.entrySet(); Iterator<Map.Entry<String, Integer>> iter2 = entrySet.iterator(); while (iter2.hasNext()) { if (iter2.next().getValue() < 5) iter2.remove(); } // Output counts pw = new PrintWriter(new BufferedWriter(new FileWriter(outputFile))); for (Map.Entry<String, Integer> entry : entrySet) { String cell = entry.getKey(); String[] tokens = cell.split(":"); String[] t1 = tokens[0].split("_"); String[] t2 = tokens[1].split("_"); int bin1 = Integer.parseInt(t1[1]); int bin2 = Integer.parseInt(t2[1]); int gPos1 = (int) (bin1 + 0.5) * binSize; int gPos2 = (int) (bin2 + 0.5) * binSize; pw.println(cell + "\t" + t1[0] + "\t" + gPos1 + "\t" + t2[0] + "\t" + gPos2 + "\t" + (entry.getValue() / 2)); } } catch (Exception e) { e.printStackTrace(); } finally { if (pw != null) { pw.close(); } if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } } enum Orientation {FF, RF, FR, INTER} ; public static void extractUnexpectedPairs(String alignmentFile, String outputDir) throws IOException { // Defaults.REFERENCE_FASTA = new File("/User/jrobinso/human_g1k_v37_decoy.fasta"); htsjdk.samtools.SamReader reader = null; SAMFileWriter ffWriter = null, rfWriter = null, interWriter = null, frWriter = null; try { final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); File f = new File("/Users/jrobinso/Downloads/human_g1k_v37_decoy.fasta"); ReferenceSource rs = new ReferenceSource(f); factory.referenceSource(rs); SeekableStream ss = new IGVSeekableBufferedStream(IGVSeekableStreamFactory.getInstance().getStreamFor(alignmentFile), 128000); SamInputResource resource = SamInputResource.of(ss); reader = factory.open(resource); SAMFileHeader header = reader.getFileHeader(); boolean preSorted = true; SAMFileWriterFactory wfactory = new SAMFileWriterFactory(); String pre = (new File(alignmentFile)).getName().replace(".bam", ""); ffWriter = wfactory.makeSAMOrBAMWriter(header, preSorted, new File(outputDir, pre + "_ff.bam")); rfWriter = wfactory.makeSAMOrBAMWriter(header, preSorted, new File(outputDir, pre + "_rf.bam")); frWriter = wfactory.makeSAMOrBAMWriter(header, preSorted, new File(outputDir, pre + "_fr.bam")); interWriter = wfactory.makeSAMOrBAMWriter(header, preSorted, new File(outputDir, pre + "_inter.bam")); SAMRecordIterator iter = reader.iterator(); int count = 0; while (iter != null && iter.hasNext()) { SAMRecord record = iter.next(); Alignment alignment = new PicardAlignment(record); if (alignment.isPaired() && alignment.getMate().isMapped() && !alignment.isProperPair()) { if (funnyPairFilter(alignment)) { Orientation orientation = getOrientation(alignment); if (orientation != null) { switch (orientation) { case FF: ffWriter.addAlignment(record); break; case RF: rfWriter.addAlignment(record); break; case FR: frWriter.addAlignment(record); break; case INTER: interWriter.addAlignment(record); break; } } } } count++; if (count % 1000000 == 0) System.out.println(count); } iter.close(); } catch (Exception e) { e.printStackTrace(); } finally { ffWriter.close(); rfWriter.close(); frWriter.close(); interWriter.close(); reader.close(); } } private static Orientation getOrientation(Alignment alignment) { if (!alignment.getChr().equals(alignment.getMate().getChr())) { return Orientation.INTER; } else { String pairOrientation = alignment.getPairOrientation(); if (frTypes.contains(pairOrientation)) { return Orientation.FR; } else if (rfTypes.contains(pairOrientation)) { return Orientation.RF; } else if (ffTypes.contains(pairOrientation) || rrTypes.contains(pairOrientation)) { return Orientation.FF; } else { System.out.println(pairOrientation); return null; } } } private static int getOrder(String chr) { int order; try { order = Integer.parseInt(chr); } catch (NumberFormatException e) { if (chr.contains("X")) { order = 23; } else if (chr.contains("Y")) { order = 24; } else { order = -1; } } return order; } private static boolean interactionFilter(Alignment alignment) { return alignment.isPaired() && alignment.getMate().isMapped() && (Math.abs(alignment.getInferredInsertSize()) > 100000 || !alignment.getChr().equals(alignment.getMate().getChr())); } private static boolean funnyPairFilter(Alignment alignment) { if (!(alignment.isPaired() && alignment.getMate().isMapped() && alignment.getMappingQuality() > 0)) return false; // Orientation hard-coded for FR if (!(frTypes.contains(alignment.getPairOrientation()))) return true; // Insert size, again hard-coded if (Math.abs(alignment.getInferredInsertSize()) > 1000) return true; // Mate chromosome if (alignment.getMate().isMapped() && !alignment.getChr().equals(alignment.getMate().getChr())) return true; // Mate unmapped // if (!alignment.getMate().isMapped()) return true; return false; } private static HashSet<String> frTypes = new HashSet<String>(); static { frTypes.add("F1R2"); frTypes.add("F2R1"); frTypes.add("FR"); frTypes.add("F R "); } private static HashSet<String> rfTypes = new HashSet<String>(); static { rfTypes.add("R2F1"); rfTypes.add("R1F2"); rfTypes.add("RF"); rfTypes.add("R F "); } private static HashSet<String> ffTypes = new HashSet<String>(); static { ffTypes.add("F1F2"); ffTypes.add("F2F1"); ffTypes.add("FF"); ffTypes.add("F F "); } private static HashSet<String> rrTypes = new HashSet<String>(); static { rrTypes.add("R1R2"); rrTypes.add("R2R1"); rrTypes.add("RR"); rrTypes.add("R R "); } }