/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.util.encode; import org.broad.igv.Globals; import org.broad.igv.util.HttpUtils; import org.broad.igv.util.ParsingUtils; import java.awt.*; import java.io.*; import java.net.URL; import java.util.*; import java.util.List; /** * @author jrobinso * Date: 10/31/13 * Time: 12:16 PM */ public class UCSCEncodeUtils { static HashSet<String> labs = new HashSet<String>(); static HashSet<String> dataTypes = new HashSet<String>(); static HashSet<String> cells = new HashSet<String>(); static HashSet<String> antibodies = new HashSet<String>(); static HashSet<String> fileTypes = new HashSet<String>(); static HashSet<String> allHeaders = new LinkedHashSet<String>(); private static List<String> rnaChipQualifiers = Arrays.asList("CellTotal", "Longnonpolya", "Longpolya", "NucleolusTotal", "ChromatinTotal", "ChromatinTotal", "NucleoplasmTotal"); public static void main(String[] args) throws IOException { // List<EncodeFileRecord> records = new ArrayList(); // parseFilesDotTxt(args[0], records); // PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter(args[1]))); // // pw.print("path"); // for (String h : EncodeTableModel.columnHeadings) { // pw.print("\t"); // pw.print(h); // } // pw.println(); // // for (EncodeFileRecord rec : records) { // pw.print(rec.getPath()); // for (String h : EncodeTableModel.columnHeadings) { // pw.print("\t"); // String value = rec.getAttributeValue(h); // pw.print(value == null ? "" : value); // } // pw.println(); // } // pw.close(); updateEncodeTableFile(args[0], args[1]); } private static List<EncodeFileRecord> parseTableFile(String url) throws IOException { List<EncodeFileRecord> records = new ArrayList<EncodeFileRecord>(20000); BufferedReader reader = null; reader = ParsingUtils.openBufferedReader(url); String[] headers = Globals.tabPattern.split(reader.readLine()); String nextLine; while ((nextLine = reader.readLine()) != null) { if (!nextLine.startsWith("#")) { String[] tokens = Globals.tabPattern.split(nextLine, -1); String path = tokens[0]; Map<String, String> attributes = new HashMap<String, String>(); for (int i = 0; i < headers.length; i++) { String value = tokens[i]; if (value.length() > 0) { attributes.put(headers[i], value); } } records.add(new EncodeFileRecord(path, attributes)); } } return records; } static String[] columnHeadings = {"cell", "dataType", "antibody", "view", "replicate", "type", "lab"}; private static void updateEncodeTableFile(String inputFile, String outputFile) throws IOException { List<EncodeFileRecord> records = new ArrayList<EncodeFileRecord>(); BufferedReader reader = null; reader = ParsingUtils.openBufferedReader(inputFile); String rootPath = reader.readLine(); String hub = null; String nextLine; while ((nextLine = reader.readLine()) != null) { if (nextLine.startsWith("#")) { if(nextLine.startsWith("#hub=")) { hub = nextLine.substring(5); } } else { String dir = nextLine.equals(".") ? rootPath : rootPath + nextLine; String filesDotTxt = dir + "/files.txt"; try { if (HttpUtils.getInstance().resourceAvailable(new URL(filesDotTxt))) { parseFilesDotTxt(filesDotTxt, records); } } catch (IOException e) { // e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } } for (String dt : fileTypes) System.out.println(dt); outputRecords(outputFile, records, hub); } private static void outputRecords(String outputFile, List<EncodeFileRecord> records, String hub) throws IOException { PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter(outputFile))); pw.print("path"); for (String h : columnHeadings) { pw.print("\t"); pw.print(h); } if(hub != null) { pw.print("\thub"); } pw.println(); for (EncodeFileRecord rec : records) { pw.print(rec.getPath()); for (String h : columnHeadings) { pw.print("\t"); String value = rec.getAttributeValue(h); pw.print(value == null ? "" : value); } if(hub != null) { pw.print("\t" + hub); } pw.println(); } pw.close(); } static HashSet knownFileTypes = new HashSet(Arrays.asList( "bam", "bigBed", "bed", "bb", "bw", "bigWig", "gtf", "broadPeak", "narrowPeak", "gappedPeak", "gff")); public static void parseFilesDotTxt(String url, List<EncodeFileRecord> fileRecords) throws IOException { BufferedReader reader = null; reader = ParsingUtils.openBufferedReader(url); String nextLine; while ((nextLine = reader.readLine()) != null) { String[] tokens = Globals.tabPattern.split(nextLine); if (tokens.length < 2) continue; String fn = tokens[0]; String[] attributes = Globals.semicolonPattern.split(tokens[1]); LinkedHashMap<String, String> kvalues = new LinkedHashMap<String, String>(); for (String tk : attributes) { String[] kv = Globals.equalPattern.split(tk); if (kv.length > 1) { kvalues.put(kv[0].trim(), kv[1].trim()); allHeaders.add(kv[0].trim()); } } // Hack for RnaChip -- need this to disambiguate them if ("RnaChip".equals(kvalues.get("dataType"))) { for (String qual : rnaChipQualifiers) { if (fn.contains(qual)) { kvalues.put("antibody", qual); } } } String path = fn.startsWith("http") ? fn : url.replace("files.txt", fn); EncodeFileRecord df = new EncodeFileRecord(path, kvalues); if (knownFileTypes.contains(df.getFileType())) { fileRecords.add(df); } dataTypes.add(df.getAttributeValue("dataType")); antibodies.add(df.getAttributeValue("antibody")); cells.add(df.getAttributeValue("cell")); labs.add(df.getAttributeValue("lab")); fileTypes.add(df.getFileType()); } reader.close(); } }