package com.github.lindenb.jvarkit.tools.workflow; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.nio.file.Files; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.lang.JvarkitException; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonPrimitive; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.RuntimeIOException; @Program( name="ngsworkflow", description="ngs workflow",keywords={"ngs","workflow","pipeline","bam","vcf"}) public class NgsWorkflow extends Launcher { private static final Logger LOG =Logger.build(NgsWorkflow.class).make(); private enum RefSplitType {WHOLE_GENOME,WHOLE_CONTIG,INTERVAL}; @Parameter(names={"-A","--attributes"},description="Dump available attributes and exit") private boolean dumpAttributes = false; @Parameter(names={"--reannotatevcf"},description="This project is just reannotating vcfs") private boolean reannotatevcfs = false; private abstract class RefSplit { abstract RefSplitType getType(); abstract String getToken(); } private class NoSplit extends RefSplit { @Override RefSplitType getType() { return RefSplitType.WHOLE_GENOME;} @Override String getToken() { return "";} } private class ContigSplit extends RefSplit { final String contig; ContigSplit(final String contig) {this.contig=contig;} public String getContig() {return contig;} @Override RefSplitType getType() { return RefSplitType.WHOLE_CONTIG;} @Override String getToken() { return "."+getContig();} } private class IntervalSplit extends RefSplit { final Interval interval; IntervalSplit(final Interval interval) {this.interval=interval; if(interval.getStart()>=interval.getEnd()) throw new IllegalArgumentException(interval.toString()); } IntervalSplit(String c,int S,int E) {this(new Interval(c,S,E));} IntervalSplit(int c,int S,int E) {this(String.valueOf(c),S,E);} public Interval getInterval() {return interval;} @Override RefSplitType getType() { return RefSplitType.INTERVAL;} @Override String getToken() { return "."+getInterval().getContig()+"_"+getInterval().getStart()+"_"+getInterval().getEnd();} } private static interface PropertyKey { public String getKey(); public String getDescription(); public String getDefaultValue(); } private static class PropertyKeyBuilder { private final PropertyKeyImpl prop; PropertyKeyBuilder(final String key) { if(NgsWorkflow.name2propertyKey.containsKey(key)) { throw new IllegalStateException(key); } this.prop = new PropertyKeyImpl(key); } PropertyKeyBuilder description(final String s) { this.prop.description=s;return this;} PropertyKeyBuilder def(final String s) { this.prop.def=s;return this;} PropertyKeyBuilder def(final boolean b) {return def(String.valueOf(b));} PropertyKeyBuilder def(final int b) {return def(String.valueOf(b));} PropertyKey build() { NgsWorkflow.name2propertyKey.put(this.prop.key,this.prop); return this.prop; } private static class PropertyKeyImpl implements PropertyKey { final String key; String description="No Description available"; String def; PropertyKeyImpl(final String key) { this.key=key; } public @Override String getKey() { return this.key;} public @Override String getDescription() { return this.description;} public @Override String getDefaultValue() { return this.def;} @Override public int hashCode() { return key.hashCode(); } @Override public boolean equals(final Object obj) { if(obj==this) return true; if(obj==null || !(obj instanceof PropertyKeyImpl)) return false; return this.key.equals(PropertyKeyImpl.class.cast(obj).key); } @Override public String toString() { return this.key.toString(); } } } private static final Map<String,PropertyKey> name2propertyKey=new HashMap<>(); private static PropertyKeyBuilder key(final String key) { return new PropertyKeyBuilder(key);} private static final PropertyKey PROP_PROJECT_OUTPUT_DIRECTORY = key("output.directory"). description("Project output directory"). build(); private static final PropertyKey PROP_TMP_PREFIX_TOKEN = key("tmp.prefix"). description("Temporary Files prefix"). def("__DELETE__"). build(); private static final PropertyKey PROP_FILES_PREFIX_TOKEN = key("prefix"). description("Files prefix"). build(); private static final PropertyKey PROP_CUTADAPT_ADAPTER5 = key("cutadapt.adapter5"). description("Cutadapt 5' adapter"). def("CTACACTCTTTCCCTACACGACGCTCTTCCGATCT"). build(); private static final PropertyKey PROP_CUTADAPT_ADAPTER3 = key("cutadapt.adapter3"). description("Cutadapt 3' adapter"). def("GATCGGAAGAGCACACGTCTGAACTCCAGTCAC"). build(); private static final PropertyKey PROP_BWA_MEM_NTHREADS = key("bwa.mem.nthreads"). description("Number of threads for bwa mem"). def("1"). build(); private static final PropertyKey PROP_SAMTOOLS_SORT_NTHREADS = key("samtools.sort.nthreads"). description("Number of threads for samtools sort"). def(1). build(); private static final PropertyKey PROP_PICARD_MARKDUP_REMOVES_DUPLICATES = key("markdups.removes.duplicates"). description("picard MarkDuplicate will remove the dups"). def(false). build(); private static final PropertyKey PROP_CAPTURE_EXTEND = key("capture.extend"). description("How to extend the capture bed"). def(1000). build(); private static final PropertyKey PROP_DEFAULT_COMPRESSION_LEVEL = key("compression.level"). description("Compression level"). def(9). build(); private static final PropertyKey PROP_DISABLE_RECALIBRATION = key("disable.recalibration"). description("Disable GATK Recalibration"). def(false). build(); private static final PropertyKey PROP_DISABLE_REALIGN = key("disable.realign"). description("Disable GATK RealignAroundIndel"). def(false). build(); private static final PropertyKey PROP_DISABLE_MARKDUP = key("disable.markdup"). description("Disable picard MarkDups"). def(false). build(); private static final PropertyKey PROP_PROJECT_IS_HALOPLEX = key("haloplex"). description("Project is Haloplex"). def(false). build(); private static final PropertyKey PROP_DISABLE_CUTADAPT = key("disable.cutadapt"). description("Disable cutadapt"). def(false). build(); private static final PropertyKey PROP_GATK_HAPCALLER_NCT = key("gatk.hapcaller.nct"). description("Hapcaller -nct"). def("1"). build(); private static final PropertyKey PROP_MAPPING_REGION = key("mapping.region"). description("When mapping with bwa, use bedtools intersect to restrict the output to the specified bed file"). def(""). build(); private static final PropertyKey PROP_USE_LUMPY_EXPRESS = key("use.lumpyexpress"). description("Use Lumpy Express"). def("false"). build(); private RefSplit parseRefSplitFromStr(final String s) { int colon= s.indexOf(":"); if(colon!=-1) { int hyphen = s.lastIndexOf('-', colon+1); if(hyphen<0) throw new IllegalArgumentException("Illegal array item "+s); final Interval interval = new Interval( s.substring(0,colon), Integer.parseInt(s.substring(colon+1,hyphen)), Integer.parseInt(s.substring(hyphen+1)) ); return new IntervalSplit(interval); } else { return new ContigSplit(s); } } private List<RefSplit> parseRefSplitList(final JsonElement root) throws IOException { final List<RefSplit> splits=new ArrayList<>(); if(root.isJsonObject()) { JsonObject ob = root.getAsJsonObject(); final Interval interval = new Interval( ob.get("chrom").getAsString(), ob.get("start").getAsInt(), ob.get("end").getAsInt() ); return Collections.singletonList(new IntervalSplit(interval)); } else if(root.isJsonArray()) { final JsonArray array=root.getAsJsonArray(); if(array.size()==0) throw new IOException("Empty array"); for(int i=0; i< array.size();++i) { JsonElement item = array.get(i); if(item.isJsonPrimitive()) { splits.add(parseRefSplitFromStr(item.getAsString())); } else if(item.isJsonObject()) { JsonObject ob = item.getAsJsonObject(); final Interval interval = new Interval( ob.get("chrom").getAsString(), ob.get("start").getAsInt(), ob.get("end").getAsInt() ); splits.add(new IntervalSplit(interval)); } else { throw new IOException("Illegal array item "+item); } } return splits; } else if(root.isJsonPrimitive()) { final String str=root.getAsString(); if(str.endsWith(".bed")) { splits.addAll(Files.readAllLines(Paths.get(str)).stream(). filter(L->!L.startsWith("browser")). filter(L->!L.startsWith("track")). filter(L->!L.startsWith("#")). filter(L->!L.trim().isEmpty()). map(L->L.split("[\t]")). map(T->new Interval(T[0],1+Integer.parseInt(T[1]),Integer.parseInt(T[2]))). map(I->new IntervalSplit(I)). collect(Collectors.toList()) ); return splits; } else if(str.endsWith(".json")) { splits.addAll(parseRefSplitList(readJsonFile(new File(str)))); return splits; } else if(str.endsWith(".intervals"))//gatk style { splits.addAll(Files.readAllLines(Paths.get(str)).stream(). filter(L->!L.startsWith("#")). filter(L->!L.trim().isEmpty()). map(L-> parseRefSplitFromStr(L)). collect(Collectors.toList()) ); return splits; } else if(str.equals("genome_wide")) { return Collections.singletonList(new NoSplit()); } else if(str.equals("by_chromosome")) { for(int i=1;i<=22;++i) splits.add(new ContigSplit(String.valueOf(i))); splits.add(new ContigSplit("X")); splits.add(new ContigSplit("Y")); return splits; } else { return Collections.singletonList(parseRefSplitFromStr(str)); } } else { throw new IOException("illegal refSplit "+root); } } private abstract class HasAttributes { private final Map<PropertyKey,String> _att=new HashMap<>(); protected HasAttributes(final JsonElement root) throws IOException { if(root!=null && root.isJsonObject()) { for(final PropertyKey prop:NgsWorkflow.name2propertyKey.values()) { if(!root.getAsJsonObject().has(prop.getKey())) continue; this._att.put(prop, root.getAsJsonObject().get(prop.getKey()).getAsString()); } } } public abstract HasAttributes getParent(); public Map<PropertyKey,String> getAttributes() { return _att;} private boolean booleanValue(final PropertyKey key) { String s=getAttribute(key,"false"); if(s!=null && s.equals("true")) return true; if(s!=null && s.equals("false")) return false; throw new RuntimeException("bad boolean value for "+key+" : \""+s+"\""); } public boolean isTrue(final PropertyKey key) { return booleanValue(key); } public boolean isFalse(final PropertyKey key) { return !booleanValue(key); } public String getAttribute(final PropertyKey key,final String def) { if(key==null) throw new NullPointerException("key is null"); HasAttributes curr=this; while(curr!=null) { if(curr.getAttributes().containsKey(key)) { return curr.getAttributes().get(key); } curr=curr.getParent(); } if(def!=null) return def; if(key.getDefaultValue()!=null) return key.getDefaultValue(); throw new RuntimeException("key "+key+" is not defined for "+this.toString()); } public String getAttribute(final PropertyKey key) { return getAttribute(key,null); } public boolean isAttributeSet(final PropertyKey key,final Boolean def) { final String s= getAttribute(key,(def==null?null:def.booleanValue()?"true":"false")); if(s==null) throw new RuntimeException("key "+key+" is not defined for "+this.toString()); if(s.equals("1") || s.equals("true") || s.equals("yes") || s.equals("T") || s.equals("Y")) return true; if(s.equals("0") || s.equals("false") || s.equals("no") || s.equals("F") || s.equals("N")) return false; throw new RuntimeException("bad boolean value for "+key+" : "+s); } public boolean isAttributeSet(final PropertyKey key) { return isAttributeSet(key,null); } public String getTmpPrefixToken() { return getAttribute(PROP_TMP_PREFIX_TOKEN); } public String getFilePrefix() { return getAttribute(PROP_FILES_PREFIX_TOKEN); } public String getTmpPrefix() { return getTmpPrefixToken()+getFilePrefix(); } @Override public abstract String toString(); protected String rulePrefix() { return "\tmkdir -p $(dir $@) && [[ ! -f \"${OUT}/STOP\" ]] "; } } /** describe a NGS project */ private class Project extends HasAttributes { private final String name; private final String description; private final List<Sample> _samples=new ArrayList<>(); private final Optional<Capture> capture; private final Pedigree pedigree; private final Set<String> vcfToReannotate; Project(final JsonElement root) throws IOException { super(root); if(NgsWorkflow.this.reannotatevcfs) { this.vcfToReannotate=new LinkedHashSet<>(); } else { this.vcfToReannotate=Collections.emptySet(); } if(!root.isJsonObject()) throw new IOException("project json is not object"); final JsonObject json = root.getAsJsonObject(); if(!json.has("name")) { throw new IOException("project name missing"); } this.name = json.get("name").getAsString(); if(!json.has("description")) { this.description=this.name; } else { this.description = json.get("description").getAsString(); } if(json.has("capture")) { final Capture c=new Capture(this,json.get("capture")); capture = Optional.of(c); } else { this.capture =Optional.empty(); } if(json.has("pedigree")) { this.pedigree = new Pedigree(this,json.get("pedigree")); } else { this.pedigree = new Pedigree(this,new JsonObject()); } final Set<String> seen=new HashSet<>(); if(json.has("samples")) { for(final JsonElement samplejson :json.get("samples").getAsJsonArray()) { Sample sample =new Sample(this,samplejson.getAsJsonObject()); this._samples.add(sample); if(seen.contains(sample.getName())) { throw new IOException("Sample "+sample+" defined twice"); } seen.add(sample.getName()); } } else if(NgsWorkflow.this.reannotatevcfs && json.has("vcfs")) { for(final JsonElement vcfjson :json.get("vcfs").getAsJsonArray()) { if(vcfjson.isJsonPrimitive()) { this.vcfToReannotate.add(vcfjson.getAsString()); } else { throw new IOException("Not a string "+vcfjson); } } } else { LOG.warn("No 'samples' under project"); } } public String getName() { return name; } public String getDescription() { return description; } public Pedigree getPedigree() { return pedigree; } public List<Sample> getSamples() { return this._samples;} @Override public HasAttributes getParent() { return null;} public String getOutputDirectory() { return getAttribute(PROP_PROJECT_OUTPUT_DIRECTORY); } public String getSamplesDirectory() { return "${OUT}/Samples"; } public String getBedDirectory() { return "${OUT}/BED"; } public String getVcfDirectory() { return "${OUT}/VCF"; } boolean hasCapture() { return this.capture.isPresent(); } public Capture getCapture() { return this.capture.get(); } /** when reannotation vcf, what will be the output filename */ public String getReannoteVcfFilename( String s) { if(!NgsWorkflow.this.reannotatevcfs) throw new IllegalStateException(); if(s==null || !(s.endsWith(".vcf") || s.endsWith(".vcf.gz"))) { throw new IllegalArgumentException("Bad extension vcf:"+s); } int slash= s.lastIndexOf('/'); if(slash!=-1) s=s.substring(slash+1); if(s.matches("20[0-9][0-9][0-1][0-9][0-3][0-9].*")) { s=s.substring(8); } while(s.startsWith("_") || s.startsWith(".")) { s=s.substring(1); } if(s.endsWith(".vcf")) { s=s.substring(0, s.length()-4); } else if(s.endsWith(".vcf.gz")) { s=s.substring(0, s.length()-7); } s+=".vcf.gz"; return getOutputDirectory()+"/"+getFilePrefix()+s; } public String getHapCallerAnnotationVcf() { return getVcfDirectory()+"/"+getFilePrefix()+"HCAnnotations.vcf.gz"; } public String getLumpyVcf() { return getVcfDirectory()+"/"+getFilePrefix()+"LumpyExpress.vcf.gz"; } public String getHapCallerGenotypedVcf() { return getVcfDirectory()+"/"+getTmpPrefix()+"HCGenotyped.vcf.gz"; } public String getSamtoolsRawVcf() { return getVcfDirectory()+"/"+getTmpPrefix()+"Samtools.vcf.gz"; } public List<RefSplit> getHaplotypeCallerSplits() { final List<RefSplit> chroms=new ArrayList<>(25); for(int i=1;i<=22;++i) chroms.add(new ContigSplit(String.valueOf(i))); chroms.add(new ContigSplit("X")); chroms.add(new ContigSplit("Y")); return chroms; } public List<RefSplit> getSamtoolsCallerSplits() { return getHaplotypeCallerSplits(); } String getFinalBamList() { return getSamplesDirectory()+"/"+getTmpPrefix()+"final.bam.list"; } void bamList(final PrintWriter w) { w.println(getFinalBamList()+":"+this.getSamples().stream().map(S->S.getFinalBamBai()).collect(Collectors.joining(" \\\n\t"))); w.print(rulePrefix()+ " && rm -f $@ $(addsuffix .tmp,$@) "); this.getSamples().stream().forEach(S->{ w.print(" && echo \""+S.getFinalBam()+"\" >> $(addsuffix .tmp,$@) "); }); w.println("&& mv --verbose $(addsuffix .tmp,$@) $@"); } public List<AbstractCaller> getCallers() { final List<AbstractCaller> L = new ArrayList<>(); L.add(new UnifiedGenotyperCaller()); //L.add(new SamtoolsCaller()); return L; } public void callVariants(PrintWriter out) { for(AbstractCaller C:getCallers()) C.print(out); } public void annotateVariants(PrintWriter out) { new VariantAnnotator(this.getHapCallerAnnotationVcf(),this.getHapCallerGenotypedVcf()). setPedigree( getPedigree().getPedFilename()). print(out); } void reannoteVcfs(PrintWriter out) { if(!NgsWorkflow.this.reannotatevcfs) throw new IllegalStateException(); for(final String invcf: this.vcfToReannotate) { new VariantAnnotator(this.getReannoteVcfFilename(invcf),invcf). //setRemoveAnnotations(). print(out); } } @Override public String toString() { return getName(); } public String getNoResultContig() { return "22"; } public class VariantAnnotator { private String target; private String dep; private String pedigree=null; private boolean removeAnnotations=false; public VariantAnnotator(String target,String dep) { this.target=target; this.dep=dep; } public VariantAnnotator setPedigree(final String ped) { if(!(ped==null || ped.trim().isEmpty())) { this.pedigree=ped; } return this; } public VariantAnnotator setRemoveAnnotations(boolean b) { this.removeAnnotations=b; return this; } public VariantAnnotator setRemoveAnnotations() { return setRemoveAnnotations(true); } void print(final PrintWriter w) { w.print(this.target); w.print(":"); w.print(this.dep); if(this.pedigree!=null) { w.print(" "); w.print(this.pedigree); } w.println(); w.print(rulePrefix()); w.print(" && rm -f $(addsuffix .tmp1.vcf,$@) $(addsuffix .tmp1.vcf.idx,$@) $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp2.vcf.idx,$@) "); w.print(" && "+(this.dep.endsWith(".vcf")?"cat":"gunzip -c")+" $< | "); if( removeAnnotations ) { w.print(" $(call run_jvarkit,vcfstripannot) -x '" + "INFO/DukeMapabilityUniqueness35bp," + "FILTER/DukeMapabilityUniqueness35LT1," + "FILTER/MapabilityConsensusExcludable," + "FILTER/MapabilityRegionsExcludable," + "FILTER/IN_EXAC," + "FILTER/IN_GNOMAD," + "INFO/ANN," + "INFO/CSQ," + "INFO/exac.AC," + "INFO/PossibleDeNovo" + "' - | "); } w.print(" $(call run_jvarkit,vcfbigwig) " + " -t ensembl2ucsc -T DukeMapabilityUniqueness35bp -B /commun/data/pubdb/ucsc/hg19/encodeDCC/wgEncodeDukeMapabilityUniqueness35bp.bigWig |"); w.print(" $(call run_jvarkit,vcffilterjs) -F DukeMapabilityUniqueness35LT1 -e '!variant.hasAttribute(\"DukeMapabilityUniqueness35bp\") || variant.getAttributeAsDouble(\"DukeMapabilityUniqueness35bp\",100.0)>=1.0;' > $(addsuffix .tmp1.vcf,$@) "); //BED w.print(" && ${java.exe} -Djava.io.tmpdir=$(dir $@) -jar ${gatk.jar} -T VariantFiltration -R $(REF) " + " -o $(addsuffix .tmp2.vcf,$@) -L $(addsuffix .tmp1.vcf,$@) --variant $(addsuffix .tmp1.vcf,$@) " +" --maskName MapabilityConsensusExcludable --mask:BED /commun/data/pubdb/ucsc/hg19/encodeDCC/wgEncodeDacMapabilityConsensusExcludable_nochrprefix.bed.gz " + " && mv --verbose $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp1.vcf,$@)" + " && mv --verbose $(addsuffix .tmp2.vcf.idx,$@) $(addsuffix .tmp1.vcf.idx,$@)" ); w.print(" && ${java.exe} -Djava.io.tmpdir=$(dir $@) -jar ${gatk.jar} -T VariantFiltration -R $(REF) " + " -o $(addsuffix .tmp2.vcf,$@) -L $(addsuffix .tmp1.vcf,$@) --variant $(addsuffix .tmp1.vcf,$@) " +" --maskName MapabilityRegionsExcludable --mask:BED /commun/data/pubdb/ucsc/hg19/encodeDCC/wgEncodeDukeMapabilityRegionsExcludable_nochrprefix.bed.gz " + " && mv --verbose $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp1.vcf,$@)" + " && mv --verbose $(addsuffix .tmp2.vcf.idx,$@) $(addsuffix .tmp1.vcf.idx,$@)" ); //EXAC w.print(" && ${java.exe} -Djava.io.tmpdir=$(dir $@) -jar ${gatk.jar} -T VariantAnnotator -R $(REF) " + " -o $(addsuffix .tmp2.vcf,$@) -L $(addsuffix .tmp1.vcf,$@) --variant $(addsuffix .tmp1.vcf,$@) " +" --resource:exac /commun/data/pubdb/broadinstitute.org/exac/1.0/ExAC.r1.sites.vcf.gz --resourceAlleleConcordance " +" --expression exac.AC "); if(this.pedigree!=null) { w.print(" --pedigree "+this.pedigree); w.print(" -A PossibleDeNovo "); } w.print( " -A HomopolymerRun -A TandemRepeatAnnotator " + " && mv --verbose $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp1.vcf,$@)" + " && mv --verbose $(addsuffix .tmp2.vcf.idx,$@) $(addsuffix .tmp1.vcf.idx,$@)" ); // filter exac w.print(" && $(call run_jvarkit,vcffilterjs) -F IN_EXAC -e '!variant.hasAttribute(\"exac.AC\")' $(addsuffix .tmp1.vcf,$@) |"); //gnomad w.print(" $(call run_jvarkit,vcfgnomad) -ac -gf IN_GNOMAD -m '${gnomad.vcf.manifest}' |"); //snpeff w.print(" ${java.exe} -Djava.io.tmpdir=$(dir $@) -jar ${snpeff.jar} ann -c ${snpeff.config} GRCh37.75 -nodownload -noStats | "); //vep w.print(" $(call vep78) |"); w.print(" ${bgzip.exe} > $(addsuffix .tmp2.vcf.gz,$@) "); w.print(" && ${tabix.exe} -p vcf -f $(addsuffix .tmp2.vcf.gz,$@) " + " && mv --verbose $(addsuffix .tmp2.vcf.gz,$@) $@ " + " && mv --verbose $(addsuffix .tmp2.vcf.gz.tbi,$@) $(addsuffix .tbi,$@)" ); w.print(" && rm -f $(addsuffix .tmp1.vcf,$@) $(addsuffix .tmp1.vcf.idx,$@) $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp2.vcf.idx,$@) "); w.println(); } } private class LumpyExpress { private List<LumpyInput> inputs=new ArrayList<>(); Project getProject() { return Project.this;} LumpyExpress addBam(final String inputBam) { this.inputs.add(new LumpyInput(inputBam)); return this; } private class LumpyInput { final String inputBam; LumpyInput(final String inputBam) { this.inputBam = inputBam; } private String getDiscordantBam() { return "$(addsuffix .tmp.discordant.bam,"+inputBam+")"; } private String getSplitterBam() { return "$(addsuffix .tmp.splitters.bam,"+inputBam+")"; } void print(final PrintWriter w) { // Extract the discordant paired-end alignments. w.print(this.getDiscordantBam()); w.print(":"); w.print(this.inputBam); w.println(); w.print(rulePrefix()); w.print(" && ${samtools.exe} view -F 1294 -b -o $(addsuffix .tmp.bam,$@) $< "); w.print(" && mv --verbose $(addsuffix .tmp.bam,$@) $@ "); w.println(); // Extract the split-read alignments w.print(this.getSplitterBam()); w.print(":"); w.print(this.inputBam); w.println(); w.print(rulePrefix()); w.print(" && ${samtools.exe} view -h $< | "); w.print(" ${lumpy.dir}/scripts/extractSplitReads_BwaMem -i stdin |"); w.print(" ${samtools.exe} view -Sb -o $(addsuffix .tmp.bam,$@) - "); w.print(" && mv --verbose $(addsuffix .tmp.bam,$@) $@ "); w.println(); } } void print(final PrintWriter w) { if(this.inputs.isEmpty()) return; for(LumpyInput input:this.inputs) { input.print(w); } w.print(this.getProject().getLumpyVcf()); w.print(":"); w.println(this.inputs.stream().map(T->T.getDiscordantBam()+" "+T.getSplitterBam()+ " "+T.inputBam).collect(Collectors.joining(" "))); w.println(); w.print(rulePrefix()); w.print("&& ${lumpyexpress.exe} -B "); w.print(this.inputs.stream().map(T->T.inputBam.trim()).collect(Collectors.joining(","))); w.print(" -S "); w.print(this.inputs.stream().map(T->T.getSplitterBam().trim()).collect(Collectors.joining(","))); w.print(" -D "); w.print(this.inputs.stream().map(T->T.getDiscordantBam().trim()).collect(Collectors.joining(","))); w.print(" -o $(addsuffix .tmp.vcf,$@) "); w.print(" && $(java.exe) -Djava.io.tmpdir=$(dir $@) -jar $(picard.jar) UpdateVcfSequenceDictionary I=$(addsuffix .tmp.vcf,$@) O=$(addsuffix .tmp2.vcf,$@) SEQUENCE_DICTIONARY=$(addsuffix .dict,$(basename ${REF})) && mv --verbose $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp.vcf,$@) "); w.print(" && $(java.exe) -Djava.io.tmpdir=$(dir $@) -jar $(picard.jar) SortVcf I=$(addsuffix .tmp.vcf,$@) O=$(addsuffix .tmp2.vcf,$@) && mv --verbose $(addsuffix .tmp2.vcf,$@) $(addsuffix .tmp.vcf,$@) "); w.print(" && ${bgzip.exe} -f $(addsuffix .tmp.vcf,$@) "); w.print(" && ${tabix.exe} -p vcf -f $(addsuffix .tmp.vcf.gz,$@) " + " && mv --verbose $(addsuffix .tmp.vcf.gz,$@) $@ " + " && mv --verbose $(addsuffix .tmp.vcf.gz.tbi,$@) $(addsuffix .tbi,$@)" ); w.print(" && rm --verbose "+ this.inputs.stream().map(T->T.getDiscordantBam()+" "+T.getSplitterBam()).collect(Collectors.joining(" "))); w.print(" && touch "+ this.inputs.stream().map(T->T.getDiscordantBam()+" "+T.getSplitterBam()).collect(Collectors.joining(" "))); w.print(" && sleep 2 && touch -c $@ $(addsuffix .tbi,$@)"); w.println(); } } void lumpyExpress(final PrintWriter out) { if(this.isFalse(PROP_USE_LUMPY_EXPRESS)) return; final LumpyExpress lumpy=new LumpyExpress(); for(Sample sample:this.getSamples()) { lumpy.addBam(sample.getFinalBam()); } lumpy.print(out); } private abstract class AbstractCaller { Project getProject() { return Project.this;} abstract List<? extends RefSplit> getCallSplits(); abstract String getTargetVcfFilename(); abstract void call(final PrintWriter w,RefSplit split); void print(final PrintWriter w) { final List<String> vcfParts=new ArrayList<>(); final List<? extends RefSplit> refSplits = this.getCallSplits(); if(refSplits.isEmpty()) throw new IllegalArgumentException(); for(final RefSplit split: refSplits) { final String vcfPart; switch(split.getType()) { case WHOLE_GENOME: vcfPart= getTargetVcfFilename();break; default: vcfPart= "$(addsuffix "+split.getToken()+".vcf.gz,"+getTargetVcfFilename()+")"; } vcfParts.add(vcfPart); w.append(vcfPart).append(":").append(getFinalBamList()); w.append(" ").append(getPedigree().getPedFilename()); if( getProject().hasCapture()) { w.append(" ").append(getCapture().getExtendedFilename()); } w.append("\n"); w.append(rulePrefix()+" && "); if( getProject().hasCapture()) { switch(split.getType()) { case WHOLE_GENOME: break; case INTERVAL:{ final IntervalSplit tmp= IntervalSplit.class.cast(split); w.append(" ${bedtools.exe} intersect -a ").append(getCapture().getExtendedFilename()). append(" -b <(echo -e '").append(tmp.getInterval().getContig()).append("\\t"). append(String.valueOf(tmp.getInterval().getStart())).append("\\t"). append(String.valueOf(tmp.getInterval().getEnd())).append("') | "). append(" awk -F '\t' 'BEGIN{N=0;}{print;N++;}END{if(N==0) printf(\""+getNoResultContig()+"\\t0\\t1\\n\");}' > $(addsuffix .bed,$@) && "); break; } case WHOLE_CONTIG: { final ContigSplit tmp= ContigSplit.class.cast(split); w.append(" awk -F '\t' 'BEGIN{N=0;}{if($$1==\""+tmp.getContig()+"\") {print;N++;}}END{if(N==0) printf(\""+getNoResultContig()+"\\t0\\t1\\n\");}' "+getCapture().getExtendedFilename()+" > $(addsuffix .bed,$@) && "); break; } default: throw new IllegalStateException(); } } this.call(w,split); w.append(" && mv --verbose \"$(addsuffix .tmp.vcf.gz,$@)\" \"$@\" "); w.append(" && mv --verbose \"$(addsuffix .tmp.vcf.gz.tbi,$@)\" \"$(addsuffix .tbi,$@)\" "); if( getProject().hasCapture()) { w.append(" && rm --verbose \"$(addsuffix .bed,$@)\" "); } w.append("\n"); } if(vcfParts.size()>1) { w.append(getTargetVcfFilename()).append(":"); for(final String vcfPart:vcfParts) { w.append(" \\\n\t").append(vcfPart); } w.append("\n"); w.append(rulePrefix()+" && rm -f $(addsuffix .list,$@) "); int vcfn=0; for(final String vcfPart:vcfParts) { if(vcfn%30==0) { w.append("\n\t"); } else { w.append(" && "); } w.append("echo '"); w.append(vcfPart); w.append("' >> $(addsuffix .list,$@) "); vcfn++; } w.append("\n\t${java.exe} -Djava.io.tmpdir=$(dir $@) -jar ${gatk.jar} -T CombineVariants -R $(REF) " + " -o $(addsuffix .tmp.vcf.gz,$@) -genotypeMergeOptions UNSORTED " +" --variant $(addsuffix .list,$@) " ); w.append(" && rm --verbose $(addsuffix .list,$@)"); w.append(" && mv --verbose \"$(addsuffix .tmp.vcf.gz,$@)\" \"$@\" "); w.append(" && mv --verbose \"$(addsuffix .tmp.vcf.gz.tbi,$@)\" \"$(addsuffix .tbi,$@)\" "); w.append("\n"); } } } private class UnifiedGenotyperCaller extends AbstractCaller { @Override List<? extends RefSplit> getCallSplits() { return getIntervalSplitListGapForGrch37(); } @Override String getTargetVcfFilename() { return getProject().getHapCallerGenotypedVcf();} @Override void call(final PrintWriter w,final RefSplit split) { w.append(" $(java.exe) -XX:ParallelGCThreads=5 -Xmx2g -Djava.io.tmpdir=$(dir $@) -jar $(gatk.jar) -T HaplotypeCaller "); w.append(" -R $(REF) "); w.append(" --validation_strictness LENIENT "); w.append(" -I $< -o \"$(addsuffix .tmp.vcf.gz,$@)\" "); //w.print(" --num_cpu_threads_per_data_thread "+ this.getIntProperty("base-recalibrator-nct",1)); w.append(" -l INFO "); w.append(" -nct ").append(getAttribute(PROP_GATK_HAPCALLER_NCT)); w.append(" --dbsnp \"$(gatk.bundle.dbsnp.vcf)\" "); w.append(" $(foreach A, StrandAlleleCountsBySample PossibleDeNovo AS_FisherStrand AlleleBalance AlleleBalanceBySample " /* BaseCountsBySample fait planter */ + " GCContent ClippingRankSumTest , --annotation ${A} ) "); w.append(" --pedigree ").append(getProject().getPedigree().getPedFilename()); if( getProject().hasCapture()) { switch(split.getType()) { case WHOLE_GENOME: w.append(" -L:"+getProject().getCapture().getName()+",BED "+getCapture().getExtendedFilename()); case INTERVAL: //through... case WHOLE_CONTIG: w.append(" -L:BED \"$(addsuffix .bed,$@)\" ");break; default: throw new IllegalStateException(); } } else { switch(split.getType()) { case WHOLE_GENOME: /* nothing */ break; case INTERVAL: { IntervalSplit tmp= IntervalSplit.class.cast(split); w.append(" -L \""+tmp.getInterval().getContig()+":"+ tmp.getInterval().getStart()+"-"+tmp.getInterval().getEnd()+"\" "); break; } case WHOLE_CONTIG: w.append(" -L ").append(ContigSplit.class.cast(split).getContig());break; default: throw new IllegalStateException(); } } } } private class SamtoolsCaller extends AbstractCaller { @Override List<RefSplit> getCallSplits() { return getProject().getSamtoolsCallerSplits(); } @Override String getTargetVcfFilename() { return getProject().getSamtoolsRawVcf();} @Override void call(final PrintWriter w,final RefSplit split) { w.append(" ${samtools.exe} mpileup --uncompressed --BCF --fasta-ref $(REF) --bam-list $< "); if( getProject().hasCapture()) { switch(split.getType()) { case WHOLE_GENOME: w.append(" --positions "+getCapture().getExtendedFilename()); case INTERVAL: //through... case WHOLE_CONTIG: w.append(" --positions $(addsuffix .bed,$@) ");break; default: throw new IllegalStateException(); } } else { switch(split.getType()) { case WHOLE_GENOME: /* nothing */ break; case INTERVAL: { final IntervalSplit tmp= IntervalSplit.class.cast(split); w.append(" --region \""+tmp.getInterval().getContig()+":"+ tmp.getInterval().getStart()+"-"+tmp.getInterval().getEnd()+"\" "); break; } case WHOLE_CONTIG: w.append(" --region ").append(ContigSplit.class.cast(split).getContig());break; default: throw new IllegalStateException(); } } w.append(" | ${bcftools.exe} call -vmO z -o \"$(addsuffix .tmp.vcf.gz,$@)\" "); w.append(" --samples-file ").append(getProject().getPedigree().getPedFilename()); w.append(" && ${tabix.exe} -f -p vcf \"$(addsuffix .tmp.vcf.gz,$@)\" "); } } } /** describe a Pedigree capture */ private class Pedigree extends HasAttributes { private final Project project; private final String pedFilename; private final boolean providedByUser; Pedigree( final Project project,final JsonElement root) throws IOException { super(root); this.project=project; if(root!=null && root.isJsonObject() && root.getAsJsonObject().has("ped")) { final JsonObject json=root.getAsJsonObject(); this.pedFilename=json.get("ped").getAsString(); this.providedByUser = true; } else if(root!=null && root.isJsonPrimitive()) { this.pedFilename=root.getAsString(); this.providedByUser = true; } else { this.providedByUser = false; this.pedFilename = project.getOutputDirectory()+"/PED/"+getTmpPrefix()+"pedigree.ped"; } } public boolean isProvidedByUser() { return providedByUser; } public String getPedFilename() { return pedFilename; } public Project getProject() { return project; } @Override public Project getParent() { return getProject(); } void build(final PrintWriter w) { if(isProvidedByUser()) return; w.println(getPedFilename()+":"); w.println(rulePrefix()+" && rm --verbose -f $@ $(addsuffix .tmp.ped,$@)"); for(final Sample sample: getProject().getSamples()) { w.print("\techo '"); w.print(String.join("\t",sample.getFamilyId(),sample.getName(),sample.getFatherId(),sample.getMotherId(),sample.getSex(),sample.getStatus())); w.println("' >> $(addsuffix .tmp.ped,$@)"); } w.println("\tmv --verbose $(addsuffix .tmp.ped,$@) $@"); } @Override public String toString() { return getPedFilename(); } } /** describe a BED capture */ private class Capture extends HasAttributes { private final Project project; private Integer extend=null; private String name="capture"; private final String bed; Capture( final Project project,final JsonElement root) throws IOException { super(root); this.project=project; if(root.isJsonObject()) { final JsonObject json=root.getAsJsonObject(); this.bed=json.get("bed").getAsString(); if(json.has("extend")) { this.extend=json.get("extend").getAsInt(); } if(json.has("name")) { this.name=json.get("name").getAsString(); } } else if(root.isJsonPrimitive()) { bed=root.getAsString(); } else { throw new IOException("bad capture"); } } public String getBedFilename() { return this.bed; } public String getName() { return name; } public String getNormalizedFilename() { return getProject().getBedDirectory()+"/"+getName()+".normalized.bed"; } public String getExtendedFilename() { return getProject().getBedDirectory()+"/"+getName()+".extended"+getExtend()+".bed"; } public int getExtend() { if(this.extend!=null) return this.extend; return Integer.parseInt(getAttribute(PROP_CAPTURE_EXTEND)); } public Project getProject() { return project; } @Override public HasAttributes getParent() { return getProject(); } void prepareCapture(final PrintWriter w) { w.println(getExtendedFilename()+": "+getNormalizedFilename()+" $(addsuffix .fai,$(REF))"); w.print("\tmkdir -p $(dir $@) &&"); w.print(" $(bedtools.exe) slop -b "+getExtend()+" -g $(addsuffix .fai,$(REF)) -i $< |"); w.print(" LC_ALL=C sort -t '\t' -k1,1 -k2,2n -k3,3n |"); w.print(" $(bedtools.exe) merge -d "+getExtend()+" |"); w.print(" $(bedtools.exe) sort -faidx $(addsuffix .fai,$(REF)) > $(addsuffix .tmp.bed,$@) && "); w.print(" mv --verbose $(addsuffix .tmp.bed,$@) $@"); w.println(); w.println(getNormalizedFilename()+": "+getBedFilename()+" $(addsuffix .fai,$(REF))"); w.print("\tmkdir -p $(dir $@) && "); w.print(" tr -d '\\r' < $< | grep -v '^browser' | grep -v '^track' | cut -f1,2,3 | sed 's/^chr//' | LC_ALL=C sort -t '\t' -k1,1 -k2,2n -k3,3n | "); w.print(" $(bedtools.exe) merge |"); w.print(" $(bedtools.exe) sort -faidx $(addsuffix .fai,$(REF)) > $(addsuffix .tmp.bed,$@) && "); w.print(" mv --verbose $(addsuffix .tmp.bed,$@) $@"); w.println(); } @Override public String toString() { return getBedFilename(); } } private class Sample extends HasAttributes { private final Project project; private final String name; private final List<PairedFastq> pairs; private final String finalBam; private final String urer_g_vcf; private boolean isEmpty(File f) throws IOException { InputStream in=null; try { in = new FileInputStream(f); if(f.getName().endsWith(".gz")) in=new GZIPInputStream(in); int c=in.read(); in.close(); in=null; return c==-1; } catch (Exception e) { throw new RuntimeIOException(e); } finally { CloserUtil.close(in); } } Sample( final Project project,final JsonObject root) throws IOException { super(root); if(!root.isJsonObject()) throw new IOException("sample json is not object"); final JsonObject json = root.getAsJsonObject(); this.project = project; if(!json.has("name")) throw new IOException("@name missing in sample"); this.name= json.get("name").getAsString(); if(json.has("bam")) { this.pairs = Collections.emptyList(); this.finalBam=json.get("bam").getAsString(); } else if(json.has("fastqs")) { this.finalBam=null; this.pairs = new ArrayList<>(); final JsonElement fastqs = json.get("fastqs"); if(fastqs.isJsonPrimitive()) { final File dir=new File(fastqs.getAsString()); if(!(dir.exists() && dir.isDirectory())) throw new IOException("not a directory "+dir ); final File inside[]=dir.listFiles(F->F.exists() && F.getName().contains("_R1_") && F.getName().endsWith(".fastq.gz")); if(inside==null || inside.length==0) throw new IOException("No file under "+dir); for(final File insideFileR1:inside) { if(isEmpty(insideFileR1)) { LOG.warn(insideFileR1.getPath()+" is empty!"); continue; } final JsonArray twofastqs=new JsonArray(); twofastqs.add(new JsonPrimitive(insideFileR1.getAbsolutePath())); final File insideFileR2=new File(dir,insideFileR1.getName().replaceAll("_R1_", "_R2_")); if(!insideFileR2.exists()) throw new IOException("Cannot find "+insideFileR2); twofastqs.add(new JsonPrimitive(insideFileR2.getAbsolutePath())); final PairedFastq pair=new PairedFastq(this, this.pairs.size(), twofastqs); this.pairs.add(pair); } } else if(fastqs.isJsonArray()) { for(final JsonElement pairjson : fastqs.getAsJsonArray()) { final PairedFastq pair =new PairedFastq(this,this.pairs.size(),pairjson); this.pairs.add(pair); } } else { throw new IOException("bad fastq"); } } else { this.finalBam=null; this.pairs = Collections.emptyList(); } if(json.has("gvcf")) { this.urer_g_vcf=json.get("gvcf").getAsString(); } else { this.urer_g_vcf=null; } } public String getFamilyId() { return "PEDIGREE";} public String getFatherId() { return "0";} public String getMotherId() { return "0";} public String getSex() { return "0";} public String getStatus() { return "9";} public Project getProject() { return this.project;} public List<PairedFastq> getPairs() { return this.pairs;} public String getName() { return this.name;} public boolean isHaloplex() { return isAttributeSet(PROP_PROJECT_IS_HALOPLEX, false);} public boolean isBamAlreadyProvided() { return this.finalBam!=null;} @Override public Project getParent() { return getProject();} public String getDirectory() { return getProject().getSamplesDirectory()+"/"+getName(); } public String getMergedBam() { return getDirectory()+"/"+getTmpPrefix()+getName()+".merged.bam"; } public String getMergedBamBai() { return "$(call picardbai,"+getMergedBam()+")"; } public boolean isMarkupDisabled() { return this.finalBam!=null || isHaloplex() || isAttributeSet(PROP_DISABLE_MARKDUP, false); } public String getMarkdupBam() { if(isMarkupDisabled()) { return getMergedBam(); } else { return getDirectory()+"/"+getTmpPrefix()+getName()+".markdup.bam"; } } public String getMarkdupBamBai() { return "$(call picardbai,"+getMarkdupBam()+")"; } public boolean isRealignDisabled() { return isBamAlreadyProvided() || isAttributeSet(PROP_DISABLE_REALIGN, false); } public String getRealignBam() { if(isRealignDisabled()) { return getMarkdupBam(); } else { return getDirectory()+"/"+getTmpPrefix()+getName()+".realign.bam"; } } public String getRealignBamBai() { return "$(call picardbai,"+getRealignBam()+")"; } public boolean isRecalibrationDisabled() { return isBamAlreadyProvided() || isAttributeSet(PROP_DISABLE_RECALIBRATION, false); } public String getRecalBam() { if(isRecalibrationDisabled()) { return getRealignBam(); } else { return getDirectory()+"/"+getTmpPrefix()+getName()+".recal.bam"; } } public String getRecalBamBai() { return "$(call picardbai,"+getRecalBam()+")"; } public String getFinalBam() { if(isBamAlreadyProvided()) { return this.finalBam; } else { return getDirectory()+"/"+getFilePrefix()+getName()+".final.bam"; } } public String getFinalBamBai() { return "$(call picardbai,"+getFinalBam()+")"; } public String mergeSortedBams() { if(isBamAlreadyProvided()) return ""; return new StringBuilder(). append(getMergedBamBai()+" : "+getMergedBam()).append("\n\ttouch -c $@\n"). append(getMergedBam()+" : "+getPairs().stream().map(P->P.getSortedFilename()).collect(Collectors.joining(" "))+"\n"). append(rulePrefix()+" && "). append("$(java.exe) -Djava.io.tmpdir=$(dir $@) -jar \"$(picard.jar)\" MergeSamFiles O=$(addsuffix .tmp.bam,$@) "). append(" SO=coordinate AS=true CREATE_INDEX=true COMPRESSION_LEVEL=").append(getAttribute(PROP_DEFAULT_COMPRESSION_LEVEL)). append(" VALIDATION_STRINGENCY=SILENT USE_THREADING=true VERBOSITY=INFO TMP_DIR=$(dir $@) COMMENT=\"Merged from $(words $^) files.\" "). append("$(foreach B,$(filter %.bam,$^), I=$(B) ) && "). append("mv --verbose \"$(addsuffix .tmp.bam,$@)\" \"$@\" "). append("\n"). toString(); } public String markDuplicates() { if(this.isMarkupDisabled()) return ""; return new StringBuilder(). append(getMarkdupBamBai()+" : "+getMarkdupBam()+"\n"). append("\ttouch -c $@"). append("\n"). append(getMarkdupBam()+" : "+getMergedBam()+"\n"). append(rulePrefix()+" && "). append("$(java.exe) -Xmx3g -Djava.io.tmpdir=$(dir $@) -jar \"$(picard.jar)\" MarkDuplicates I=$< O=$(addsuffix .tmp.bam,$@) M=$(addsuffix .metrics,$@) "). append(" REMOVE_DUPLICATES="). append(isAttributeSet(PROP_PICARD_MARKDUP_REMOVES_DUPLICATES)?"true":"false"). append(" AS=true CREATE_INDEX=true COMPRESSION_LEVEL="). append(getAttribute(PROP_DEFAULT_COMPRESSION_LEVEL)). append(" VALIDATION_STRINGENCY=SILENT VERBOSITY=INFO TMP_DIR=$(dir $@) "). append(" && mv --verbose \"$(addsuffix .tmp.bam,$@)\" \"$@\" "). append(" && mv --verbose \"$(addsuffix .tmp.bai,$@)\" \""+getMarkdupBamBai()+"\""). append("\n"). toString(); } public String finalBam() { if(this.isBamAlreadyProvided()) return ""; StringBuilder sb=new StringBuilder(). append(getFinalBamBai()+":"+ this.getFinalBam()). append("\n"). append(rulePrefix()). append(" && ${samtools.exe} index $< $@"). append("\n") ; sb.append(getFinalBam()+":"+ this.getRecalBam()). append("\n"). append(rulePrefix()). append(" && cp --verbose $< $@"). append("\n") ; return sb.toString(); } public String recalibrateBam() { if(this.isRecalibrationDisabled()) return ""; final StringWriter sw=new StringWriter(); final PrintWriter w= new PrintWriter(sw); w.println( getRecalBamBai() + " : " + this.getRecalBam()); w.println("\ttouch -c $@"); w.println(); w.print(this.getRecalBam()+" : "+this.getRealignBam()+" "+this.getRealignBamBai()); if( this.getProject().hasCapture()) { w.print(" " +getProject().getCapture().getExtendedFilename()); } w.println(); /* first call BaseRecalibrator */ w.print("\tmkdir -p $(dir $@) && "); w.print(" $(java.exe) -XX:ParallelGCThreads=5 -Xmx2g -Djava.io.tmpdir=$(dir $@) -jar $(gatk.jar) -T BaseRecalibrator "); w.print(" -R $(REF) "); w.print(" --validation_strictness LENIENT "); w.print(" -I $< "); //w.print(" --num_cpu_threads_per_data_thread "+ this.getIntProperty("base-recalibrator-nct",1)); w.print(" -l INFO "); w.print(" -o $(addsuffix .grp,$@) "); w.print(" -knownSites:dbsnp138,VCF \"$(gatk.bundle.dbsnp.vcf)\" "); if( this.getProject().hasCapture()) { w.print(" -L:"+getProject().getCapture().getName()+",BED "+getProject().getCapture().getExtendedFilename()); } w.print(" -cov QualityScoreCovariate "); w.print(" -cov CycleCovariate "); w.print(" -cov ContextCovariate "); /* then call PrintReads */ w.print(" && $(java.exe) -XX:ParallelGCThreads=5 -Xmx4g -Djava.io.tmpdir=$(dir $@) -jar $(gatk.jar) -T PrintReads "); w.print(" -R $(REF) "); w.print(" --bam_compression "+getAttribute(PROP_DEFAULT_COMPRESSION_LEVEL)); w.print(" -BQSR $(addsuffix .grp,$@) "); w.print(" --disable_indel_quals "); w.print(" -I $< "); w.print(" -o $(addsuffix .tmp.bam,$@) "); w.print(" --validation_strictness LENIENT "); w.print(" -l INFO && "); w.print(" mv --verbose \"$(addsuffix .tmp.bam,$@)\" \"$@\" && "); w.print(" mv --verbose \"$(call picardbai,$(addsuffix .tmp.bam,$@))\" \""+getRecalBamBai()+"\" && "); w.print(" sleep 2 && touch -c \""+getRecalBamBai()+"\" && "); w.print(" rm --verbose -f $(addsuffix .grp,$@)"); w.println(); w.println(); return sw.toString(); } public String realignAroundIndels() { if(isRealignDisabled()) return ""; StringWriter sw=new StringWriter(); PrintWriter w= new PrintWriter(sw); w.append(getRealignBamBai()+" : "+getRealignBam()+"\n"). append("\ttouch -c $@"). append("\n") ; /* first call RealignerTargetCreator */ w.print(this.getRealignBam()); w.print(" : "); w.print(this.getMarkdupBam()); if( this.getProject().hasCapture()) { w.print(" " +getProject().getCapture().getExtendedFilename()); } w.println(); w.print("\tmkdir -p $(dir $@ ) && "); w.print("$(java.exe) -XX:ParallelGCThreads=2 -Xmx2g -Djava.io.tmpdir=$(dir $@) -jar $(gatk.jar) -T RealignerTargetCreator "); w.print(" -R $(REF) "); w.print(" --num_threads 1"); if( this.getProject().hasCapture()) { w.print(" -L:"+getProject().getCapture().getName()+",BED "+getProject().getCapture().getExtendedFilename()); } w.print(" -I $(filter %.bam,$^) "); w.print(" -o $(addsuffix .intervals,$@) "); w.print(" --known:onekindels,VCF \"$(gatk.bundle.onek.indels.vcf)\" "); w.print(" --known:millsindels,VCF \"$(gatk.bundle.mills.indels.vcf)\" "); w.print(" -S SILENT "); /* then call IndelRealigner */ w.print(" && $(java.exe) -XX:ParallelGCThreads=5 -Xmx2g -Djava.io.tmpdir=$(dir $@) -jar $(gatk.jar) -T IndelRealigner "); w.print(" -R $(REF) "); w.print(" --bam_compression "+ getAttribute(PROP_DEFAULT_COMPRESSION_LEVEL)); w.print(" -I $(filter %.bam,$^) "); w.print(" -o $(addsuffix .tmp.bam,$@) "); w.print(" -targetIntervals \"$(addsuffix .intervals,$@)\" "); w.print(" -known:onekindels,VCF \"$(gatk.bundle.onek.indels.vcf)\" "); w.print(" -known:millsindels,VCF \"$(gatk.bundle.mills.indels.vcf)\" "); w.print(" -S SILENT && "); w.print("mv --verbose \"$(addsuffix .tmp.bam,$@)\" \"$@\" && "); w.print("mv --verbose \"$(call picardbai,$(addsuffix .tmp.bam,$@))\" \""+ getRealignBamBai() +"\" && "); w.print("rm --verbose -f \"$(addsuffix .intervals,$@)\" "); w.println(); w.println(); return sw.toString(); } @Override public String toString() { return getName(); } } private class PairedFastq extends HasAttributes { private final List<Fastq> fastqs; private final int indexInSample; private final Sample _sample; PairedFastq( final Sample sample,int indexInSample,final JsonElement root) throws IOException { super(root); this.indexInSample=indexInSample; this.fastqs = new ArrayList<>(); this._sample=sample; if(root.isJsonObject()) { final JsonObject json=root.getAsJsonObject(); if(json.has("fastqs")) { for(final JsonElement fqjson :json.get("fastqs").getAsJsonArray()) { Fastq fq =new Fastq(this,fqjson); this.fastqs.add(fq); } } } else if(root.isJsonArray()) { for(final JsonElement fqjson :root.getAsJsonArray()) { Fastq fq =new Fastq(this,fqjson); this.fastqs.add(fq); } } else if(root.isJsonPrimitive()) { Fastq fq =new Fastq(this,root); this.fastqs.add(fq); } else throw new IOException("cannot decore paired for "+root); } public Sample getSample() { return this._sample;} public Project getProject() { return getSample().getProject();} @Override public final Sample getParent() { return getSample(); } public Fastq get(int i) { return this.fastqs.get(i);} public int getIndex() { return indexInSample;} public String getDirectory() { return getSample().getDirectory()+"/p"+getIndex(); } public String getSortedFilename() { if(getSample().getPairs().size()==1) { return getSample().getMergedBam(); } else { return getDirectory()+"/"+getTmpPrefix()+getSample().getName()+".sorted.bam"; } } private boolean isUsingCutadapt() { return isAttributeSet(PROP_DISABLE_CUTADAPT, false)==false; } private Integer getLane() { for(int i=0;i< this.fastqs.size();++i) { String fname= this.fastqs.get(i).getFilename(); if(!fname.endsWith(".fastq.gz")) continue; String tokens[]=fname.split("[_]"); for(int j=0;j+1< tokens.length;++j) { if(tokens[j].matches("L[0-9]+") && tokens[j+1].equals("R"+(i+1))) { return Integer.parseInt(tokens[j].substring(1).trim()); } } } return null; } public String bwamem() { if(getSample().isBamAlreadyProvided()) return ""; final String inputs[]=new String[2]; final StringBuilder sb= new StringBuilder(). append(this.getSortedFilename()). append(":"). append(this.get(0).getFilename()); if(this.fastqs.size()>1) { sb.append(" "). append(this.get(1).getFilename()); inputs[0]=this.get(0).getFilename(); inputs[1]=this.get(1).getFilename(); } if(!getAttribute(PROP_MAPPING_REGION,"").isEmpty()) { sb.append(" ").append(getAttribute(PROP_MAPPING_REGION,"")); } sb.append("\n"). append(rulePrefix()); if(this.fastqs.size()==1 && this.fastqs.get(0).isBam()) { inputs[0]="$(addsuffix .bam2fq.R1.fq.gz,$@)"; inputs[1]="$(addsuffix .bam2fq.R2.fq.gz,$@)"; sb.append(" && $(java.exe) -Djava.io.tmpdir=$(dir $@) -jar \"$(picard.jar)\" SamToFastq "). append(" I=$< FASTQ=").append(inputs[0]). append(" SECOND_END_FASTQ=").append(inputs[1]). append(" UNPAIRED_FASTQ=$(addsuffix .bam2fq.unpaired.fq.gz,$@) "). append(" VALIDATION_STRINGENCY=SILENT VERBOSITY=INFO TMP_DIR=$(dir $@) "). append(" && rm --verbose $(addsuffix .bam2fq.unpaired.fq.gz,$@) ") ; } String fq1; String fq2; if(isUsingCutadapt()) { fq1="$(addsuffix .tmp.R1.fq,$@)"; fq2="$(addsuffix .tmp.R2.fq,$@)"; for(int side=0;side<2;++side) { sb.append(" && ${cutadapt.exe} -a "). append(getAttribute(side==0?PROP_CUTADAPT_ADAPTER5:PROP_CUTADAPT_ADAPTER3)). append(" "). append(inputs[side]). append(" -o $(addsuffix .tmp.fq,$@) > /dev/null "). append("&& awk '{if(NR%4==2 && length($$0)==0) { printf(\"N\\n\");} else if(NR%4==0 && length($$0)==0) { printf(\"#\\n\");} else {print;}}' "). append("$(addsuffix .tmp.fq,$@) > "). append(side==0?fq1:fq2). append(" && rm --verbose $(addsuffix .tmp.fq,$@) "); } } else { fq1=inputs[0]; fq2=inputs[1]; } final Integer laneIndex= getLane(); final String bwaRef="$(dir $(REF))index-bwa-0.7.12/$(notdir $(REF))"; sb.append(" && $(bwa.exe) mem -t "). append(getAttribute(PROP_BWA_MEM_NTHREADS)). append(" -M -H '@CO\\tDate:" + new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").format(new Date())); if(getProject().hasCapture()) { sb.append(" Capture was "). append(getProject().getCapture().getName()). append(" : "). append(getProject().getCapture().getBedFilename()). append(" ") ; } sb.append(" Alignment of $(notdir $^) for project "). append(getProject().getName()+":"+getProject().getDescription()+"'"). append(" -R '@RG\\tID:"+ getSample().getName()). append(laneIndex==null?"":"_L"+laneIndex). append("\\tLB:"+getSample()+"\\tSM:"+getSample() +"\\tPL:illumina\\tCN:Nantes' \""+bwaRef+"\" "+fq1+" "+fq2+" |"); if(!getAttribute(PROP_MAPPING_REGION,"").isEmpty()) { final String rgn=getAttribute(PROP_MAPPING_REGION,""); sb.append("$(samtools.exe) view -b -u -T ${REF} -L \"").append(rgn).append("\" - | "); } sb.append("$(samtools.exe) sort --reference \"$(REF)\" -l "). append(getAttribute(PROP_DEFAULT_COMPRESSION_LEVEL)). append(" -@ "). append(getAttribute(PROP_SAMTOOLS_SORT_NTHREADS)). append(" -O bam -o $(addsuffix .tmp.bam,$@) -T $(dir $@)"+getTmpPrefixToken()+getSample().getName()+".tmp_sort_"+getIndex()+" - && "); sb.append("mv --verbose \"$(addsuffix .tmp.bam,$@)\" \"$@\"") ; if(isUsingCutadapt()) { sb.append(" && rm --verbose -f "+fq1+" "+fq2); } if(this.fastqs.size()==1 && this.fastqs.get(0).isBam()) { sb.append(" && rm --verbose -f "+inputs[0]+" "+inputs[1]); } return sb.append("\n"). toString(); } @Override public String toString() { return getSample().getName()+".fastqs["+this.getIndex()+"]"; } } private class Fastq extends HasAttributes { private final PairedFastq pair; private final String filename; Fastq( final PairedFastq pair,final JsonElement e ) throws IOException { super(e); this.pair=pair; filename= e.getAsString(); } public String getFilename() { return filename; } public boolean isBam() { return getFilename().endsWith(".bam"); } public boolean isFastq() { final String s=getFilename().toLowerCase(); return s.endsWith(".fq") || s.endsWith(".fq.gz") || s.endsWith(".fastq") || s.endsWith(".fastq.gz"); } @Override public String toString() { return getFilename(); } public PairedFastq getPair() { return pair; } @Override public PairedFastq getParent() { return getPair(); } } private PrintWriter out = new PrintWriter(System.out); public void execute(final Project project) { out.println("include ../../config/config.mk"); out.println("OUT="+project.getOutputDirectory()); out.println("# 1 : bam name"); out.println("define bam_and_picardbai"); out.println("$(1) $(call picardbai,$(1))"); out.println("endef"); out.println("#1 : bam filename"); out.println("define picardbai"); out.println("$(patsubst %.bam,%.bai,$(1))"); out.println("endef"); out.println("gatk.bundle.dir=$(dir $(REF))"); out.println("gatk.bundle.dbsnp.vcf=$(gatk.bundle.dir)dbsnp_138.b37.vcf"); out.println("gatk.bundle.onek.indels.vcf=$(gatk.bundle.dir)1000G_phase1.indels.b37.vcf"); out.println("gatk.bundle.onek.snp.vcf=$(gatk.bundle.dir)1000G_phase1.snps.high_confidence.b37.vcf"); out.println("gatk.bundle.mills.indels.vcf=$(gatk.bundle.dir)Mills_and_1000G_gold_standard.indels.b37.vcf"); out.println("gatk.bundle.hapmap.vcf=$(gatk.bundle.dir)hapmap_3.3.b37.vcf"); out.println("exac.vcf?=/commun/data/pubdb/broadinstitute.org/exac/0.3/ExAC.r0.3.sites.vep.vcf.gz"); //out.println("define run_jvarkit\n${java.exe} -jar ${jvarkit.dir}/$(1).jar\nendef"); if(!reannotatevcfs) { out.print("all:"+project.getHapCallerAnnotationVcf()); if(project.isTrue(PROP_USE_LUMPY_EXPRESS)) { out.print(" "); out.print(project.getLumpyVcf()); } out.println(); out.println("all_final_bam:"+project.getSamples().stream(). map(S->S.getFinalBamBai()). collect(Collectors.joining(" \\\n\t")) ); out.println("all_recal_bam:"+project.getSamples().stream(). filter(S->!S.isBamAlreadyProvided()). map(S->S.getRecalBamBai()). collect(Collectors.joining(" \\\n\t")) ); out.println("all_realigned_bam:"+project.getSamples().stream(). filter(S->!S.isBamAlreadyProvided()). map(S->S.getRealignBamBai()). collect(Collectors.joining(" \\\n\t")) ); out.println("all_markdup_bam:"+project.getSamples().stream(). filter(S->!S.isBamAlreadyProvided()). map(S->S.getMarkdupBamBai()). collect(Collectors.joining(" \\\n\t")) ); out.println("all_merged_bam:"+project.getSamples().stream(). filter(S->!S.isBamAlreadyProvided()). map(S->S.getMergedBamBai()). collect(Collectors.joining(" \\\n\t")) ); project.lumpyExpress(out); for(final Sample sample: project.getSamples()) { if(sample.getPairs().size()==1) { out.println(sample.getPairs().get(0).bwamem()); } else if(sample.getPairs().size()>1) { sample.getPairs().forEach(P->{out.println(P.bwamem());}); out.println(sample.mergeSortedBams()); } out.println(sample.markDuplicates()); out.println(sample.realignAroundIndels()); out.println(sample.recalibrateBam()); out.println(sample.finalBam()); } if(project.hasCapture()) { project.getCapture().prepareCapture(out); } project.getPedigree().build(out); project.bamList(out); project.callVariants(out); project.annotateVariants(out); } else /* reannotate vcf */ { out.println("all:"+ project.vcfToReannotate.stream().map(S->project.getReannoteVcfFilename(S)).collect(Collectors.joining(" "))); project.reannoteVcfs(out); } out.println(); out.flush(); } private static JsonElement readJsonFile(final File jsonFile) throws IOException { IOUtil.assertFileIsReadable(jsonFile); FileReader r=new FileReader(jsonFile); JsonParser jsparser=new JsonParser(); JsonElement root=jsparser.parse(r); r.close(); return root; } @Override public int doWork(final List<String> args) { if(this.dumpAttributes) { for(final PropertyKey prop:NgsWorkflow.name2propertyKey.values()) { System.out.print("\""+prop.getKey()+"\"\t"+prop.getDescription()); if(prop.getDefaultValue()!=null && !prop.getDefaultValue().isEmpty()) { System.out.print("\t\""+prop.getDefaultValue()+"\""); } System.out.println(); } return 0; } try { if(args.size()!=1) throw new JvarkitException.CommandLineError("exepcted one and only one json file as input"); final File jsonFile=new File(args.get(0)); final JsonElement root=readJsonFile(jsonFile); final Project proj=new Project(root); execute(proj); out.flush(); return 0; } catch(final Exception err) { LOG.error(err); return -1; } finally { } } /* curl "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/gap.txt.gz" |gunzip -c | cut -f2,3,4 | grep -v _ | grep -v GL | grep -v MT | sed 's/^chr//' | sort -t ' ' -k1,1V -k2,2n -k3,3n | /commun/data/packages/bedtools/bedtools2-2.25.0/bin/complementBed -i - -g <(cut -f 1,2 /commun/data/pubdb/broadinstitute.org/bundle/1.5/b37/human_g1k_v37.fasta.fai) | sort -t ' ' -k1,1V -k2,2n -k3,3n | grep -v GL | awk -F ' ' '($2!=$3)' | awk '{printf("L.add(new IntervalSplit(%s,%s,%s));\n",$1,int($2)-1,int($3)+1);}' */ public List<IntervalSplit> getIntervalSplitListGapForGrch37() { final List<IntervalSplit> L=new ArrayList<>(); L.add(new IntervalSplit(1,10000,177417)); L.add(new IntervalSplit(1,227416,267720)); L.add(new IntervalSplit(1,317718,471369)); L.add(new IntervalSplit(1,521367,2634221)); L.add(new IntervalSplit(1,2684219,3845269)); L.add(new IntervalSplit(1,3995267,13052999)); L.add(new IntervalSplit(1,13102997,13219913)); L.add(new IntervalSplit(1,13319911,13557163)); L.add(new IntervalSplit(1,13607161,17125659)); L.add(new IntervalSplit(1,17175657,29878083)); L.add(new IntervalSplit(1,30028081,103863907)); L.add(new IntervalSplit(1,103913905,120697157)); L.add(new IntervalSplit(1,120747155,120936696)); L.add(new IntervalSplit(1,121086694,121485435)); L.add(new IntervalSplit(1,142535433,142731023)); L.add(new IntervalSplit(1,142781021,142967762)); L.add(new IntervalSplit(1,143117760,143292817)); L.add(new IntervalSplit(1,143342815,143544526)); L.add(new IntervalSplit(1,143644524,143771003)); L.add(new IntervalSplit(1,143871001,144095784)); L.add(new IntervalSplit(1,144145782,144224482)); L.add(new IntervalSplit(1,144274480,144401745)); L.add(new IntervalSplit(1,144451743,144622414)); L.add(new IntervalSplit(1,144672412,144710725)); L.add(new IntervalSplit(1,144810723,145833119)); L.add(new IntervalSplit(1,145883117,146164651)); L.add(new IntervalSplit(1,146214649,146253300)); L.add(new IntervalSplit(1,146303298,148026039)); L.add(new IntervalSplit(1,148176037,148361359)); L.add(new IntervalSplit(1,148511357,148684148)); L.add(new IntervalSplit(1,148734146,148954461)); L.add(new IntervalSplit(1,149004459,149459646)); L.add(new IntervalSplit(1,149509644,205922708)); L.add(new IntervalSplit(1,206072706,206332222)); L.add(new IntervalSplit(1,206482220,223747847)); L.add(new IntervalSplit(1,223797845,235192212)); L.add(new IntervalSplit(1,235242210,248908211)); L.add(new IntervalSplit(1,249058209,249240622)); L.add(new IntervalSplit(2,10000,3529312)); L.add(new IntervalSplit(2,3579311,5018789)); L.add(new IntervalSplit(2,5118787,16279725)); L.add(new IntervalSplit(2,16329723,21153114)); L.add(new IntervalSplit(2,21178112,87668207)); L.add(new IntervalSplit(2,87718205,89630437)); L.add(new IntervalSplit(2,89830435,90321526)); L.add(new IntervalSplit(2,90371524,90545104)); L.add(new IntervalSplit(2,91595102,92326172)); L.add(new IntervalSplit(2,95326170,110109338)); L.add(new IntervalSplit(2,110251336,149690583)); L.add(new IntervalSplit(2,149790581,234003742)); L.add(new IntervalSplit(2,234053740,239801979)); L.add(new IntervalSplit(2,239831977,240784133)); L.add(new IntervalSplit(2,240809131,243102477)); L.add(new IntervalSplit(2,243152475,243189374)); L.add(new IntervalSplit(3,60000,66170270)); L.add(new IntervalSplit(3,66270269,90504855)); L.add(new IntervalSplit(3,93504853,194041962)); L.add(new IntervalSplit(3,194047250,197962431)); L.add(new IntervalSplit(4,10000,1423146)); L.add(new IntervalSplit(4,1478645,8799204)); L.add(new IntervalSplit(4,8818202,9274643)); L.add(new IntervalSplit(4,9324641,31820918)); L.add(new IntervalSplit(4,31837416,32834639)); L.add(new IntervalSplit(4,32840637,40296397)); L.add(new IntervalSplit(4,40297095,49338942)); L.add(new IntervalSplit(4,49488940,49660118)); L.add(new IntervalSplit(4,52660116,59739334)); L.add(new IntervalSplit(4,59789332,75427380)); L.add(new IntervalSplit(4,75452278,191044277)); L.add(new IntervalSplit(5,10000,17530657)); L.add(new IntervalSplit(5,17580656,46405642)); L.add(new IntervalSplit(5,49405640,91636129)); L.add(new IntervalSplit(5,91686127,138787074)); L.add(new IntervalSplit(5,138837072,155138728)); L.add(new IntervalSplit(5,155188726,180905261)); L.add(new IntervalSplit(6,60000,58087659)); L.add(new IntervalSplit(6,58137658,58780167)); L.add(new IntervalSplit(6,61880165,62128590)); L.add(new IntervalSplit(6,62178588,95680544)); L.add(new IntervalSplit(6,95830542,157559468)); L.add(new IntervalSplit(6,157609466,157641301)); L.add(new IntervalSplit(6,157691299,167942074)); L.add(new IntervalSplit(6,168042072,170279973)); L.add(new IntervalSplit(6,170329971,171055068)); L.add(new IntervalSplit(7,10000,232484)); L.add(new IntervalSplit(7,282483,50370632)); L.add(new IntervalSplit(7,50410630,58054332)); L.add(new IntervalSplit(7,61054330,61310514)); L.add(new IntervalSplit(7,61360512,61460466)); L.add(new IntervalSplit(7,61510464,61677021)); L.add(new IntervalSplit(7,61727019,61917158)); L.add(new IntervalSplit(7,61967156,74715725)); L.add(new IntervalSplit(7,74765723,100556044)); L.add(new IntervalSplit(7,100606042,130154524)); L.add(new IntervalSplit(7,130254522,139379378)); L.add(new IntervalSplit(7,139404376,142048196)); L.add(new IntervalSplit(7,142098194,142276198)); L.add(new IntervalSplit(7,142326196,143347898)); L.add(new IntervalSplit(7,143397896,154270635)); L.add(new IntervalSplit(7,154370633,159128664)); L.add(new IntervalSplit(8,10000,7474649)); L.add(new IntervalSplit(8,7524648,12091855)); L.add(new IntervalSplit(8,12141853,43838888)); L.add(new IntervalSplit(8,46838886,48130500)); L.add(new IntervalSplit(8,48135598,86576452)); L.add(new IntervalSplit(8,86726450,142766516)); L.add(new IntervalSplit(8,142816514,145332589)); L.add(new IntervalSplit(8,145432587,146304023)); L.add(new IntervalSplit(9,10000,39663686)); L.add(new IntervalSplit(9,39713685,39974797)); L.add(new IntervalSplit(9,40024795,40233030)); L.add(new IntervalSplit(9,40283028,40425835)); L.add(new IntervalSplit(9,40475833,40940342)); L.add(new IntervalSplit(9,40990340,41143215)); L.add(new IntervalSplit(9,41193213,41365794)); L.add(new IntervalSplit(9,41415792,42613956)); L.add(new IntervalSplit(9,42663954,43213699)); L.add(new IntervalSplit(9,43313697,43946570)); L.add(new IntervalSplit(9,43996568,44676647)); L.add(new IntervalSplit(9,44726645,44908294)); L.add(new IntervalSplit(9,44958292,45250204)); L.add(new IntervalSplit(9,45350202,45815522)); L.add(new IntervalSplit(9,45865520,46216431)); L.add(new IntervalSplit(9,46266429,46461040)); L.add(new IntervalSplit(9,46561038,47060134)); L.add(new IntervalSplit(9,47160132,47317680)); L.add(new IntervalSplit(9,65467678,65918361)); L.add(new IntervalSplit(9,65968359,66192216)); L.add(new IntervalSplit(9,66242214,66404657)); L.add(new IntervalSplit(9,66454655,66614196)); L.add(new IntervalSplit(9,66664194,66863344)); L.add(new IntervalSplit(9,66913342,67107835)); L.add(new IntervalSplit(9,67207833,67366297)); L.add(new IntervalSplit(9,67516295,67987999)); L.add(new IntervalSplit(9,68137997,68514182)); L.add(new IntervalSplit(9,68664180,68838947)); L.add(new IntervalSplit(9,68988945,69278386)); L.add(new IntervalSplit(9,69328384,70010543)); L.add(new IntervalSplit(9,70060541,70218730)); L.add(new IntervalSplit(9,70318728,70506536)); L.add(new IntervalSplit(9,70556534,70735469)); L.add(new IntervalSplit(9,70835467,92343417)); L.add(new IntervalSplit(9,92443415,92528797)); L.add(new IntervalSplit(9,92678795,133073061)); L.add(new IntervalSplit(9,133223059,137041194)); L.add(new IntervalSplit(9,137091192,139166998)); L.add(new IntervalSplit(9,139216996,141153432)); L.add(new IntervalSplit(10,60000,17974675)); L.add(new IntervalSplit(10,18024674,38818836)); L.add(new IntervalSplit(10,38868834,39154936)); L.add(new IntervalSplit(10,42354934,42546688)); L.add(new IntervalSplit(10,42596686,46426965)); L.add(new IntervalSplit(10,46476963,47429170)); L.add(new IntervalSplit(10,47529168,47792477)); L.add(new IntervalSplit(10,47892475,48055708)); L.add(new IntervalSplit(10,48105706,49095537)); L.add(new IntervalSplit(10,49195535,51137411)); L.add(new IntervalSplit(10,51187409,51398846)); L.add(new IntervalSplit(10,51448844,125869473)); L.add(new IntervalSplit(10,125919471,128616070)); L.add(new IntervalSplit(10,128766068,133381405)); L.add(new IntervalSplit(10,133431403,133677528)); L.add(new IntervalSplit(10,133727526,135524748)); L.add(new IntervalSplit(11,60000,1162759)); L.add(new IntervalSplit(11,1212758,50783854)); L.add(new IntervalSplit(11,51090852,51594206)); L.add(new IntervalSplit(11,54694204,69089802)); L.add(new IntervalSplit(11,69139800,69724696)); L.add(new IntervalSplit(11,69774694,87688379)); L.add(new IntervalSplit(11,87738377,96287585)); L.add(new IntervalSplit(11,96437583,134946517)); L.add(new IntervalSplit(12,60000,95739)); L.add(new IntervalSplit(12,145738,7189877)); L.add(new IntervalSplit(12,7239875,34856695)); L.add(new IntervalSplit(12,37856693,109373471)); L.add(new IntervalSplit(12,109423469,122530624)); L.add(new IntervalSplit(12,122580622,132706993)); L.add(new IntervalSplit(12,132806991,133841896)); L.add(new IntervalSplit(13,19020000,86760324)); L.add(new IntervalSplit(13,86910323,112353995)); L.add(new IntervalSplit(13,112503993,114325994)); L.add(new IntervalSplit(13,114425992,114639949)); L.add(new IntervalSplit(13,114739947,115109879)); L.add(new IntervalSplit(14,19000000,107289540)); L.add(new IntervalSplit(15,20000000,20894633)); L.add(new IntervalSplit(15,20935074,21398820)); L.add(new IntervalSplit(15,21884999,22212115)); L.add(new IntervalSplit(15,22262113,22596194)); L.add(new IntervalSplit(15,22646192,23514854)); L.add(new IntervalSplit(15,23564852,29159444)); L.add(new IntervalSplit(15,29209442,82829646)); L.add(new IntervalSplit(15,82879644,84984474)); L.add(new IntervalSplit(15,85034472,102521393)); L.add(new IntervalSplit(16,60000,8636921)); L.add(new IntervalSplit(16,8686920,34023151)); L.add(new IntervalSplit(16,34173149,35285802)); L.add(new IntervalSplit(16,46385800,88389384)); L.add(new IntervalSplit(16,88439382,90294754)); L.add(new IntervalSplit(17,-1,296627)); L.add(new IntervalSplit(17,396625,21566609)); L.add(new IntervalSplit(17,21666607,22263007)); L.add(new IntervalSplit(17,25263005,34675849)); L.add(new IntervalSplit(17,34725847,62410761)); L.add(new IntervalSplit(17,62460759,77546462)); L.add(new IntervalSplit(17,77596460,79709050)); L.add(new IntervalSplit(17,79759048,81195211)); L.add(new IntervalSplit(18,10000,15410898)); L.add(new IntervalSplit(18,18510897,52059137)); L.add(new IntervalSplit(18,52209135,72283354)); L.add(new IntervalSplit(18,72333352,75721821)); L.add(new IntervalSplit(18,75771819,78017249)); L.add(new IntervalSplit(19,60000,7346004)); L.add(new IntervalSplit(19,7396003,8687199)); L.add(new IntervalSplit(19,8737197,20523416)); L.add(new IntervalSplit(19,20573414,24631783)); L.add(new IntervalSplit(19,27731781,59118984)); L.add(new IntervalSplit(20,60000,26319569)); L.add(new IntervalSplit(20,29419568,29653909)); L.add(new IntervalSplit(20,29803907,34897086)); L.add(new IntervalSplit(20,34947084,61091438)); L.add(new IntervalSplit(20,61141436,61213370)); L.add(new IntervalSplit(20,61263368,62965521)); L.add(new IntervalSplit(21,9411193,9595548)); L.add(new IntervalSplit(21,9645547,9775438)); L.add(new IntervalSplit(21,9825436,10034921)); L.add(new IntervalSplit(21,10084919,10215977)); L.add(new IntervalSplit(21,10365975,10647897)); L.add(new IntervalSplit(21,10697895,11188130)); L.add(new IntervalSplit(21,14338128,42955560)); L.add(new IntervalSplit(21,43005558,44632665)); L.add(new IntervalSplit(21,44682663,48119896)); L.add(new IntervalSplit(22,16050000,16697850)); L.add(new IntervalSplit(22,16847849,20509432)); L.add(new IntervalSplit(22,20609430,50364778)); L.add(new IntervalSplit(22,50414776,51244567)); L.add(new IntervalSplit("X",60000,94821)); L.add(new IntervalSplit("X",144820,231385)); L.add(new IntervalSplit("X",281383,1047558)); L.add(new IntervalSplit("X",1097556,1134114)); L.add(new IntervalSplit("X",1184112,1264235)); L.add(new IntervalSplit("X",1314233,2068239)); L.add(new IntervalSplit("X",2118237,7623883)); L.add(new IntervalSplit("X",7673881,10738675)); L.add(new IntervalSplit("X",10788673,37098257)); L.add(new IntervalSplit("X",37148255,49242998)); L.add(new IntervalSplit("X",49292996,49974174)); L.add(new IntervalSplit("X",50024172,52395915)); L.add(new IntervalSplit("X",52445913,58582013)); L.add(new IntervalSplit("X",61682011,76653693)); L.add(new IntervalSplit("X",76703691,113517669)); L.add(new IntervalSplit("X",113567667,115682291)); L.add(new IntervalSplit("X",115732289,120013236)); L.add(new IntervalSplit("X",120063234,143507325)); L.add(new IntervalSplit("X",143557323,148906425)); L.add(new IntervalSplit("X",148956423,149032063)); L.add(new IntervalSplit("X",149082061,152277100)); L.add(new IntervalSplit("X",152327098,155260561)); L.add(new IntervalSplit("Y",10000,44821)); L.add(new IntervalSplit("Y",94820,181385)); L.add(new IntervalSplit("Y",231383,997558)); L.add(new IntervalSplit("Y",1047556,1084114)); L.add(new IntervalSplit("Y",1134112,1214235)); L.add(new IntervalSplit("Y",1264233,2018239)); L.add(new IntervalSplit("Y",2068237,8914956)); L.add(new IntervalSplit("Y",8964954,9241323)); L.add(new IntervalSplit("Y",9291321,10104554)); L.add(new IntervalSplit("Y",13104552,13143955)); L.add(new IntervalSplit("Y",13193953,13748579)); L.add(new IntervalSplit("Y",13798577,20143886)); L.add(new IntervalSplit("Y",20193884,22369680)); L.add(new IntervalSplit("Y",22419678,23901429)); L.add(new IntervalSplit("Y",23951427,28819362)); L.add(new IntervalSplit("Y",58819360,58917657)); L.add(new IntervalSplit("Y",58967655,59363567)); return L; } public static void main(String[] args) { new NgsWorkflow().instanceMainWithExit(args); } }