package org.seqcode.tools.location; import java.util.*; import org.seqcode.genome.Genome; import org.seqcode.genome.location.ExonicGene; import org.seqcode.genome.location.Gene; import org.seqcode.genome.location.Region; import org.seqcode.gsebricks.verbs.location.RefGeneGenerator; import org.seqcode.gseutils.Args; /** * Dumps gene annotations in GFF format * */ public class DumpGeneGFF { public static void main(String args[]) throws Exception { Genome genome = Args.parseGenome(args).cdr(); RefGeneGenerator genegen = Args.parseGenes(args).get(0); genegen.retrieveExons(true); genegen.setWantAlias(true); Map<String,Collection<Gene>> genes = new HashMap<String,Collection<Gene>>(); Iterator<Gene> all = genegen.getAll(); while (all.hasNext()) { Gene g = all.next(); String n = g.getName(); if (n.equals(g.getID())) { Collection<String> notid = g.getNonIDNames(); if (notid.size() > 0) { Iterator<String> iter = notid.iterator(); n = iter.next(); } } if (!genes.containsKey(n)) { genes.put(n, new ArrayList<Gene>()); } genes.get(n).add(g); } for (String id : genes.keySet()) { String chrom = null; int minpos = Integer.MAX_VALUE, maxpos = 0; char strand = '+'; boolean mixedchroms = false; for (Gene g : genes.get(id)) { if (chrom == null) { chrom = g.getChrom(); strand = g.getStrand(); } else { if (!g.getChrom().equals(chrom)) { mixedchroms = true; } } minpos = Math.min(minpos, g.getStart()); maxpos = Math.max(maxpos, g.getEnd()); } if (!chrom.matches("^.*")) { chrom = "chr" + chrom; } System.out.println(String.format("%s\tprotein_coding\tgene\t%d\t%d\t.\t%s\t.\tID=%s", chrom,minpos,maxpos,Character.toString(strand),id)); for (Gene g : genes.get(id)) { System.out.println(String.format("%s\tprotein_coding\tmRNA\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s", g.getChrom(),g.getStart(),g.getEnd(),Character.toString(g.getStrand()),g.getID(),id)); if (g instanceof ExonicGene) { ExonicGene exonic = (ExonicGene)g; Iterator<Region> iter = exonic.getExons(); int count = 1; while (iter.hasNext()) { Region e = iter.next(); System.out.println(String.format("%s\tprotein_coding\texon\t%d\t%d\t.\t%s\t.\tID=%s.%d;Parent=%s", chrom,e.getStart(),e.getEnd(),Character.toString(g.getStrand()),g.getID(),count,g.getID())); count++; } } } } } }