/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.feature.genome; import org.broad.igv.Globals; import org.broad.igv.feature.Chromosome; import org.broad.igv.util.ParsingUtils; import java.io.*; import java.util.*; /** * Static utility functions for genome data-wrangling. * * @author jrobinso * Date: 4/22/13 * Time: 1:27 PM */ public class GenomeUtils { public static void main(String[] args) throws IOException { String genomeList = args[0]; String jsonFile = args[1]; exportGenomesJson(genomeList, new File(jsonFile)); // mergeINCDCNames( // new File("genomes/alias/hg38_alias.tab"), // new File("/Users/jrobinso/projects/INSDC/GCF_000001405.26.assembly.txt"), // new File("/Users/jrobinso/projects/INSDC")); } public static void exportGenomesJson(String genomeListPath, File jsonFile) throws IOException { BufferedReader br = null; PrintWriter pw = null; try { br = ParsingUtils.openBufferedReader(genomeListPath); pw = new PrintWriter(new BufferedWriter(new FileWriter(jsonFile))); String nextLine; while ((nextLine = br.readLine()) != null) { String[] tokens = nextLine.split("\t"); if (tokens.length > 2) { String genomePath = tokens[1]; try { Genome genome = GenomeManager.getInstance().loadGenome(genomePath, null); writeJson(genome, pw); } catch (Exception e) { System.err.println(e.toString()); } } } } finally { if (pw != null) pw.close(); if (br != null) br.close(); } } /** * Create .chrom.sizes file for each genome found in the {@code genomeListPath}, and write it out to * {@code directory} * * @param directory * @param genomeListPath * @throws IOException */ public static void updateChromSizes(File directory, String genomeListPath) throws IOException { // http://igv.broadinstitute.org/genomes/genomes.txt // <Server-Side Genome List> // Human hg19 http://igv.broadinstitute.org/genomes/hg19.genome hg19 BufferedReader br = null; try { br = ParsingUtils.openBufferedReader(genomeListPath); String nextLine; while ((nextLine = br.readLine()) != null) { String[] tokens = nextLine.split("\t"); if (tokens.length > 2) { String genomeID = tokens[2]; File outputFile = new File(directory, genomeID + ".chrom.sizes"); if (outputFile.exists()) { continue; } System.out.println("Updating " + genomeID); String genomePath = tokens[1]; try { Genome genome = GenomeManager.getInstance().loadGenome(genomePath, null); exportChromSizes(directory, genome); } catch (Exception e) { System.err.println(e.toString()); } } } } finally { if (br != null) br.close(); } } /** * Export a "chrom.sizes" file for the specified genome * * @param directory output directory * @param genome * @throws FileNotFoundException */ public static void exportChromSizes(File directory, Genome genome) throws FileNotFoundException { String fn = genome.getId() + ".chrom.sizes"; File file = new File(directory, fn); PrintWriter pw = null; try { pw = new PrintWriter(file); for (String chr : genome.getAllChromosomeNames()) { Chromosome chromosome = genome.getChromosome(chr); pw.println(chromosome.getName() + "\t" + chromosome.getLength()); } } finally { if (pw != null) pw.close(); } } public static void writeJson(Genome genome, PrintWriter pw) { GenomeDescriptor gd = genome.genomeDescriptor; if (!gd.isFasta()) { System.out.println("Skipping genome " + genome.getId()); return; } System.out.println("Exporting genome " + genome.getId()); pw.println("\"" + genome.getId() + "\": {"); pw.println(" name: \"" + genome.getDisplayName() + "\","); pw.println(" id: \"" + genome.getId() + "\","); pw.println(" ucsdId: \"" + genome.getUCSCId() + "\","); pw.print(" fastaURL: \"" + gd.getSequenceLocation() + "\""); if(gd.cytoBandFileName != null) { pw.println(","); pw.print(" cytobandURL: \"" + gd.cytoBandFileName + "\""); } if(gd.chrAliasFileName != null) { pw.println(","); pw.print(" aliasURL: \"" + gd.chrAliasFileName + "\""); } if(gd.geneFileName != null) { pw.println(","); pw.println(" tracks: [{"); pw.print(" url: \"" + gd.geneFileName + "\""); if(gd.geneTrackName != null) { pw.println(","); pw.println(" name: \"" + gd.geneTrackName + "\""); } pw.println(" }]"); } //protected String geneFileName; // protected String chrAliasFileName; // protected String geneTrackName; pw.println("},"); } /** * Merge chromosome names from an NCBI assembly.txt file with an existing IGV alias file * * @param aliasFile * @param assemblyFile */ public static void mergeINCDCNames(File aliasFile, File assemblyFile, File outputDirectory) throws IOException { Map<String, Set<String>> aliasRows = new LinkedHashMap<String, Set<String>>(); BufferedReader br = null; PrintWriter pw = null; // Build alias dictionary br = new BufferedReader(new FileReader(aliasFile)); String nextLine; while ((nextLine = br.readLine()) != null) { String[] tokens = Globals.whitespacePattern.split(nextLine); HashSet<String> row = new LinkedHashSet<String>(Arrays.asList(tokens)); for (String nm : tokens) { aliasRows.put(nm, row); } } br.close(); // Loop through assembly file int[] chrIndeces = {0, 4, 6, 9}; br = new BufferedReader(new FileReader(assemblyFile)); boolean start = false; List<String> newRows = new ArrayList<String>(); while ((nextLine = br.readLine()) != null) { if (start) { String[] tokens = Globals.tabPattern.split(nextLine); boolean foundRow = false; for (int i : chrIndeces) { Set<String> row = aliasRows.get(tokens[i]); if (row != null) { for (int j : chrIndeces) { if (!"na".equals(tokens[j])) { row.add(tokens[j]); } } foundRow = true; break; } } if (!foundRow) { String newRow = tokens[chrIndeces[0]]; for (int i = 1; i < chrIndeces.length; i++) { String chrNm = tokens[chrIndeces[i]]; if (!"na".equals(chrNm)) { newRow += ("\t" + chrNm); } } newRows.add(newRow); System.out.println("New alias row: " + newRow); } } else if (nextLine.startsWith("# Sequence-Name")) { start = true; } } br.close(); pw = new PrintWriter(new BufferedWriter(new FileWriter(new File(outputDirectory, aliasFile.getName())))); Set<Set<String>> output = new HashSet<Set<String>>(); for (Set<String> row : aliasRows.values()) { if (row.size() == 0) continue; if (!output.contains(row)) { output.add(row); List<String> chrNames = new ArrayList<String>(row); pw.print(chrNames.get(0)); for (int i = 1; i < chrNames.size(); i++) { pw.print("\t" + chrNames.get(i)); } pw.println(); } } for (String row : newRows) { pw.println(row); } pw.close(); } }