package org.seqcode.gsebricks.verbs.chipseq; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.Vector; import org.seqcode.genome.Genome; public class MACSParser { /** * Parses data in the MACS output, e.g. * # d = 124 * chr start end length summit tags -10*log10(pvalue) fold_enrichment FDR(%) * chr1 4481542 4484063 2522 276 272 2629.53 47.51 0.00 * @param filename name of the file containing the data * @return a List of hit objects */ public static List<MACSPeakRegion> parseMACSOutput(String filename, Genome g) { ArrayList<MACSPeakRegion> results = new ArrayList<MACSPeakRegion>(); FileReader in = null; BufferedReader bin = null; try { in = new FileReader(filename); bin = new BufferedReader(in); String line; while((line = bin.readLine()) != null) { line = line.trim(); String[] f=line.split("\t"); if (line.length()==0 || line.charAt(0)=='#'||f[0].equals("chr")){ continue; } MACSPeakRegion hit = MACSParser.parseLine(g, line, 0); if (hit!=null) results.add(hit); } } catch(IOException ioex) { //logger.error("Error parsing file", ioex); } finally { try { if (bin != null) { bin.close(); } } catch(IOException ioex2) { //nothing left to do here, just log the error //logger.error("Error closing buffered reader", ioex2); } } return results; } /** * Parse a single line of text into a hit object * @param macsLine a line of text representing a hit * @return a hit object containing the data from the specified line */ private static MACSPeakRegion parseLine(Genome g, String macsLine, int lineNumber) { MACSPeakRegion macs; String[] t = macsLine.split("\t"); if (t.length == 9) { try { macs = new MACSPeakRegion(g, t[0], Integer.parseInt(t[1]), Integer.parseInt(t[2]), Integer.parseInt(t[4]), Integer.parseInt(t[5]), Double.parseDouble(t[6]), Double.parseDouble(t[7]), Double.parseDouble(t[8])); } catch (Exception ex) { //logger.error("Parse error on line " + lineNumber + ".", ex); return null; } } else { //logger.error("Line " + lineNumber + " has " + tokens.length + " tokens."); return null; } return macs; } }