package org.seqcode.genome; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.seqcode.genome.location.Region; import org.seqcode.genome.sequence.SequenceGenerator; import org.seqcode.gseutils.ArgParser; import org.seqcode.gseutils.Args; import org.seqcode.gseutils.NotFoundException; import org.seqcode.gseutils.Pair; /** * GenomeConfig: * A config parser that loads genome objects from the command-line or config files. * You can also use the Args class directly to load Genomes from the command-line. * However, GenomeConfig allows convenient loading of cached sequences as well, * and fits the schema of the other config parser classes. * * @author mahony * */ public class GenomeConfig { private Genome gen=null; private String genomeSequencePath=null; //Path to sequence data file directories private SequenceGenerator<Region> seqgen=null; private boolean sequenceAvailable=false; private boolean printHelp=false; private String[] args; public String getArgs(){ String a=""; for(int i=0; i<args.length; i++) a = a+" "+args[i]; return a; } public GenomeConfig(String [] arguments){ this.args=arguments; ArgParser ap = new ArgParser(args); seqgen = new SequenceGenerator<Region>(); if(args.length==0 || ap.hasKey("h")){ printHelp=true; }else{ try{ //Test for a config file... if there is concatenate the contents into the args if(ap.hasKey("config")){ ArrayList<String> confArgs = new ArrayList<String>(); String confName = ap.getKeyValue("config"); File confFile = new File(confName); if(!confFile.isFile()) System.err.println("\nCannot find configuration file: "+confName); BufferedReader reader = new BufferedReader(new FileReader(confFile)); String line; while ((line = reader.readLine()) != null) { line = line.trim(); String[] words = line.split("\\s+"); if(!words[0].startsWith("--")) words[0] = new String("--"+words[0]); confArgs.add(words[0]); if(words.length>1){ String rest=words[1]; for(int w=2; w<words.length; w++) rest = rest+" "+words[w]; confArgs.add(rest); } } String [] confArgsArr = confArgs.toArray(new String[confArgs.size()]); String [] newargs =new String[args.length + confArgsArr.length]; System.arraycopy(args, 0, newargs, 0, args.length); System.arraycopy(confArgsArr, 0, newargs, args.length, confArgsArr.length); args = newargs; ap = new ArgParser(args); reader.close(); } //Load genome if(ap.hasKey("species") || ap.hasKey("genome") || ap.hasKey("gen")){ Pair<Species, Genome> pair = Args.parseGenome(args); if(pair != null){ gen = pair.cdr(); sequenceAvailable=true; } }else{ if(ap.hasKey("geninfo") || ap.hasKey("g")){ //Make fake genome... chr lengths provided String fName = ap.hasKey("geninfo") ? ap.getKeyValue("geninfo") : ap.getKeyValue("g"); gen = new Genome("Genome", new File(fName), true); }else{ gen = null; } } if(gen==null){ System.err.println("WARNING: please provide chromosome length information in a genome info file (option --geninfo). " + "MultiGPS will attempt to estimate chromosome lengths from data, but this may not work or may not be accurate."); } //Cache genome sequence if(ap.hasKey("seq")){ genomeSequencePath = ap.getKeyValue("seq"); seqgen.setGenomePath(genomeSequencePath); seqgen.useCache(true); seqgen.useLocalFiles(true); sequenceAvailable=true; } } catch (NotFoundException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } /** * Merge a set of estimated genomes * @param estGenomes * @return */ public Genome mergeGenomes(List<Genome> estGenomes){ //Combine the chromosome information HashMap<String, Integer> chrLenMap = new HashMap<String, Integer>(); for(Genome e : estGenomes){ Map<String, Integer> currMap = e.getChromLengthMap(); for(String s: currMap.keySet()){ if(!chrLenMap.containsKey(s) || chrLenMap.get(s)<currMap.get(s)) chrLenMap.put(s, currMap.get(s)); } } gen =new Genome("Genome", chrLenMap); return gen; } //Accessors public Genome getGenome(){return gen;} public SequenceGenerator getSequenceGenerator(){return seqgen;} public String getGenomeSequencePath(){return genomeSequencePath;} public boolean sequenceAvailable(){return sequenceAvailable;} public boolean helpWanted(){return printHelp;} /** * Returns a string describing the arguments handled by this config parser. * @return String */ public static String getArgsList(){ return(new String("" + "Genome:" + "\t--species <Species;Genome>\n" + "\tOR\n" + "\t--geninfo <genome info file>" + "Genome Sequence Caching:" + "\t--seq <fasta seq directory>\n" + "")); } }