package org.seqcode.tools.sequence; import java.util.*; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.IOException; import org.seqcode.data.io.FASTAWriter; import org.seqcode.genome.Genome; import org.seqcode.genome.location.Gene; import org.seqcode.genome.location.NamedStrandedRegion; import org.seqcode.genome.location.Region; import org.seqcode.gsebricks.verbs.location.ChromRegionIterator; import org.seqcode.gsebricks.verbs.location.RefGeneGenerator; import org.seqcode.gseutils.Args; import org.seqcode.gseutils.NotFoundException; /** * Returns the sequences of named gene ORFs * * cat gene_names.txt | java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped * cat gene_names.txt | java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped --fasta * java GeneORFs --species "$SC;Sigmav7" --genes s288cMapped --fasta --allgenes * * */ public class GeneORFs { private int upstream, downstream; private List<RefGeneGenerator> geneGenerators; private Genome genome; private boolean allGenes, toFasta; private FASTAWriter<NamedStrandedRegion> fwriter; public static void main(String args[]) throws Exception { GeneORFs gp = new GeneORFs(); gp.parseArgs(args); gp.run(); } public GeneORFs() {} public void parseArgs(String args[]) throws NotFoundException { geneGenerators = Args.parseGenes(args); for (RefGeneGenerator r : geneGenerators) { r.retrieveExons(false); } genome = Args.parseGenome(args).getLast(); allGenes = Args.parseFlags(args).contains("allgenes"); toFasta = Args.parseFlags(args).contains("fasta"); if (toFasta) { fwriter = new FASTAWriter<NamedStrandedRegion>(System.out); } } public void run() throws IOException { if (allGenes) { ChromRegionIterator chroms = new ChromRegionIterator(genome); while (chroms.hasNext()) { Region chrom = chroms.next(); /* we'll use all the gene generators provided but don't want to output duplicate regions. seen is keyed on 5' and contains a list of 3' ends that have already been output. */ for (RefGeneGenerator refgene : geneGenerators) { Iterator<Gene> iter = refgene.execute(chrom); while (iter.hasNext()) { Gene g = iter.next(); output(g); } } } } else { BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); String line = null; while ((line = reader.readLine()) != null) { Gene g = null; String pieces[] = line.split("\\t"); int i = 0; while (i < pieces.length && g == null) { for (RefGeneGenerator refgene : geneGenerators) { Iterator<Gene> iter = refgene.byName(pieces[i]); while (iter.hasNext()) { if (g == null) { g = iter.next(); } else { iter.next(); } } if (g != null) { break; } } i++; } if (g != null) { g.setName(line); output(g); } else { System.err.println("Couldn't find " + line); } } } } public void output(NamedStrandedRegion r) { if (toFasta) { fwriter.consume(r); } else { System.out.println(String.format("%s\t%s:%d-%d:%s", r.toString(), r.getChrom(), r.getStart(), r.getEnd(), r.getStrand())); } } }