package org.seqcode.data.seqdata.tools; import java.io.IOException; import java.sql.SQLException; import java.util.*; import org.seqcode.data.connections.DatabaseException; import org.seqcode.data.seqdata.SeqAnalysisResult; import org.seqcode.gsebricks.verbs.chipseq.GPSParser; import org.seqcode.gsebricks.verbs.chipseq.GPSPeak; import org.seqcode.gseutils.NotFoundException; /** * See AnalysisImporter docs. Command line options are the same; the only difference * is that GPSAnalysisImporter parses the GPS native output format. */ public class GPSAnalysisImporter extends AnalysisImporter { /* oracle complains about underflow if we don't limit the pvalues. the actual min value is somewhere between E-100 and E-200, but I didn't bother tracking it down more closely since I don't think the difference really matters */ public final static double minpval = Math.pow(10,-100); private Set<String> seenPositions = new HashSet<String>(); private int lineno = 0; public static void main(String args[]) throws NotFoundException, SQLException, DatabaseException, IOException { GPSAnalysisImporter importer = new GPSAnalysisImporter(); importer.parseArgs(args); importer.run(System.in); importer.close(); } public SeqAnalysisResult parseLine(String line) { if (line.matches("^Position.*")) { return null; } GPSPeak p = GPSParser.parseLine(getGenome(), line, ++lineno); String k = p.getChrom() + p.getLocation(); if (seenPositions.contains(k)) { return null; } seenPositions.add(k); return new SeqAnalysisResult(getGenome(), p.getChrom(), p.getLocation(), p.getLocation()+1, p.getLocation(), p.getStrength(), p.getControlStrength(), p.getStrength(), p.getShape(), Math.max(p.getPvalue(), minpval), p.getStrength()/p.getControlStrength()); } }