package org.genedb.crawl.elasticsearch.index.das; import java.io.FileWriter; import java.io.IOException; import java.math.BigInteger; import java.util.List; import javax.xml.bind.JAXBException; import org.apache.log4j.Logger; import org.genedb.crawl.model.Organism; import org.kohsuke.args4j.Option; import uk.ac.ebi.das.jdas.adapters.features.FeatureAdapter; import uk.ac.ebi.das.jdas.exceptions.ValidationException; import uk.ac.ebi.das.jdas.schema.entryPoints.SEGMENT; /** * A utility to output the contents of a DAS source into a tab delimited text * file. * * @author gv1 * */ public class DASFileBuilder extends DASIndexBuilder { private static Logger logger = Logger.getLogger(DASFileBuilder.class); @Option(name = "-f", aliases = { "--file" }, usage = "The file to save the output to.", required = true) public String file; @Option(name = "-seq", aliases = { "--sequence" }, usage = "Whether to fetch the sequence as well (untested).", required = false) public boolean sequence = false; private DasFetcher fetcher; private FileWriter writer; public void run() throws IOException, JAXBException, SecurityException, IllegalArgumentException, NoSuchFieldException, IllegalAccessException, ValidationException { fetcher = new DasFetcher(url, source); writer = new FileWriter(file); writer.append("##gff-version 3\n"); List<SEGMENT> segments = fetcher.getEntryPoints(); for (SEGMENT segment : segments) { if (region != null) { if (!region.equals(segment.getId())) { continue; } } logger.info("Getting features for " + segment.getId()); List<FeatureAdapter> features = fetcher.getFeatures(segment, segment.getStart(), segment.getStop()); indexFeatures(null, segment, features); } /* * @TODO this has not been tested because the * http://das.sanger.ac.uk/das/pbg does not appear to support this * feature. */ if (sequence) { writer.append("##FASTA"); for (SEGMENT segment : segments) { writer.append("> " + segment.getId()); BigInteger start = segment.getStart(); BigInteger stop = segment.getStop(); // @NOTE this is probably way too small a value for the // increment, causing too many requests // larger values will necessitate splitting the results on // multiple lines (which is not yet // done below). final BigInteger increment = BigInteger.valueOf(60); logger.info(String.format("Getting sequence for %s (%s-%s).", segment.getId(), start, stop)); for (BigInteger x = start; x.compareTo(stop) < 0; x.add(increment)) { BigInteger fmin = x; BigInteger fmax = x.add(increment); logger.info(String.format("%s-%s", fmin, fmax)); if (fmax.compareTo(stop) >= 0) { fmax = stop; } String sequence = fetcher.getSequence(segment, fmin, fmax); writer.append(sequence + "\n"); } } } writer.close(); } @Override protected void indexFeatures(Organism o, SEGMENT segment, List<FeatureAdapter> features) throws ValidationException, IOException { StringBuffer sb = new StringBuffer(); for (FeatureAdapter featureAdapter : features) { // if the DAS source is not interbase, then must subtract one from // its fmin int fmin = interbase ? featureAdapter.getStart() : featureAdapter.getStart() - 1; int fmax = featureAdapter.getEnd(); String id = featureAdapter.getId(); String region = segment.getId(); String type = featureAdapter.getType().getId(); String phase = featureAdapter.getPhase(); String strand = featureAdapter.getOrientation(); String score = featureAdapter.getScore(); sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tID=%s\n", region, source, type, fmin, fmax, score, strand, phase, id)); } writer.append(String.format("##sequence-region segment.getId() %s %s\n", segment.getStart(), segment.getStop())); writer.append(sb.toString()); } public static void main(String[] args) throws Exception { new DASFileBuilder().prerun(args).closeIndex(); } }