package uk.ac.sanger.artemis.components.variant;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.genedb.crawl.model.Exon;
import org.genedb.crawl.model.Gene;
import org.genedb.crawl.model.LocatedFeature;
import org.genedb.crawl.model.MappedVCFRecord;
import org.genedb.crawl.model.Sequence;
import org.genedb.crawl.model.Transcript;
import uk.ac.sanger.artemis.io.Range;
import uk.ac.sanger.artemis.io.RangeVector;
import uk.ac.sanger.artemis.sequence.Bases;
import uk.ac.sanger.artemis.util.OutOfRangeException;
public abstract class VariantReaderAdapter {
private static Logger logger = Logger.getLogger(VariantReaderAdapter.class);
protected AbstractVCFReader abstractReader;
public static final VariantReaderAdapter getReader(String url) throws IOException {
if (IOUtils.isBCF(url)) {
logger.info("BCF " + url);
return new BCFReaderAdapter(url);
}
logger.info("Tabix " + url);
return new TabixReaderAdapter(url);
}
public abstract void close() throws IOException;
public List<VCFRecord> unFilteredQuery(String region,
int start,
int end) throws IOException {
logger.info("BEGIN QUERY " + region + ":" + start + "-" + end);
List<VCFRecord> records = new ArrayList<VCFRecord>();
VCFRecord record;
while((record = abstractReader.getNextRecord(region, start, end)) != null) {
records.add(record);
}
return records;
}
@SuppressWarnings("unchecked")
public List<CDSFeature> makeCDSFeatures(List<LocatedFeature> features, Sequence regionSequence) throws OutOfRangeException {
List<CDSFeature> cdsFeatures = new ArrayList<CDSFeature>();
Map<String, List<LocatedFeature>> parented = new HashMap<String,List<LocatedFeature>>();
for (LocatedFeature feature : features) {
if (! feature.type.name.equals("exon"))
continue;
if (feature.parent != null && feature.parentRelationshipType.equals("part_of")) {
if (parented.get(feature.parent) == null)
parented.put(feature.parent, new ArrayList<LocatedFeature>());
parented.get(feature.parent).add(feature);
}
cdsFeatures.add(makeCDSFeature(feature,regionSequence));
}
for (Entry<String,List<LocatedFeature>> parentedFeatures : parented.entrySet()) {
cdsFeatures.add(
makeCDSFeature(parentedFeatures.getValue(), regionSequence));
}
return cdsFeatures;
}
// TODO currently assumes they are in the correct order!
@SuppressWarnings("unchecked")
private CDSFeature makeCDSFeature(List<LocatedFeature> features, Sequence regionSequence) throws OutOfRangeException {
StringBuilder bases = new StringBuilder();
RangeVector rv = new RangeVector();
boolean isFwd = ( features.get(0).strand > 0) ? true : false;
int min = Integer.MAX_VALUE;
int max = 0;
for (LocatedFeature feature : features) {
rv.add(new Range(feature.fmin, feature.fmax));
bases.append(regionSequence.dna.subSequence(feature.fmin, feature.fmax));
if (feature.fmin < min) {
min = feature.fmin;
}
if (feature.fmax > max) {
max = feature.fmax;
}
}
String b = bases.toString();
if (! isFwd)
b = Bases.reverseComplement(b);
CDSFeature cdsFeature = new CDSFeature(isFwd, rv, min + 1, max, b);
return cdsFeature;
}
@SuppressWarnings("unchecked")
private CDSFeature makeCDSFeature(LocatedFeature feature, Sequence regionSequence) throws OutOfRangeException {
boolean isFwd = ( feature.strand > 0) ? true : false;
RangeVector rv = new RangeVector();
rv.add(new Range(feature.fmin, feature.fmax));
String bases = regionSequence.dna.substring(feature.fmin, feature.fmax);
if (! isFwd)
bases = Bases.reverseComplement(bases);
CDSFeature cdsFeature = new CDSFeature(isFwd, rv, feature.fmin + 1, feature.fmax, bases);
return cdsFeature;
}
@SuppressWarnings("unchecked")
@Deprecated
public List<CDSFeature> genesToCDSFeature(
List<Gene> genes,
Sequence regionSequence) {
List<CDSFeature> cdsFeatures = new ArrayList<CDSFeature>();
logger.info("Genes : " + genes.size());
for (Gene gene : genes) {
//logger.info(gene.uniqueName);
for (Transcript t : gene.transcripts) {
boolean isFwd = ( gene.strand > 0) ? true : false;
RangeVector rv = new RangeVector();
int min = Integer.MAX_VALUE;
int max = 0;
StringBuilder bases = new StringBuilder();
for (Exon e : t.exons) {
try {
rv.add(new Range(e.fmin, e.fmax));
bases.append(regionSequence.dna.subSequence(e.fmin, e.fmax));
if (e.fmin < min) {
min = e.fmin;
}
if (e.fmax > max) {
max = e.fmax;
}
} catch (OutOfRangeException e1) {
throw new RuntimeException(e1);
}
}
String b = bases.toString();
if (! isFwd)
b = Bases.reverseComplement(b);
CDSFeature cdsFeature = new CDSFeature(isFwd, rv, min + 1, max, b);
cdsFeatures.add(cdsFeature);
}
}
logger.info("CDSFeatures : " + cdsFeatures.size());
assert(cdsFeatures.size() >= genes.size());
return cdsFeatures;
}
public List<MappedVCFRecord> query(
String region,
int start,
int end,
List<CDSFeature> cdsFeatures,
VariantFilterOptions options) throws IOException {
List<MappedVCFRecord> records = new ArrayList<MappedVCFRecord>();
//logger.info("BEGIN QUERY " + region + ":" + start + "-" + end);
logger.info(
String.format(
"FILTER\t%s-%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
start,
end,
options.isEnabled(VariantFilterOption.SHOW_SYNONYMOUS),
options.isEnabled(VariantFilterOption.SHOW_NON_SYNONYMOUS),
options.isEnabled(VariantFilterOption.SHOW_DELETIONS),
options.isEnabled(VariantFilterOption.SHOW_INSERTIONS),
options.isEnabled(VariantFilterOption.SHOW_MULTI_ALLELES),
options.isEnabled(VariantFilterOption.SHOW_NON_OVERLAPPINGS),
options.isEnabled(VariantFilterOption.SHOW_NON_VARIANTS)));
//logger.info(options);
VCFRecord record;
while((record = abstractReader.getNextRecord(region, start, end)) != null) {
//logger.info(record);
if (showRecord(record, cdsFeatures, options, record.getPos())) {
records.add(processRecord(record));
} else {
logger.warn("not showing " + record.getPos());
}
}
return records;
}
public List<String> getSeqNames() {
return Arrays.asList(abstractReader.getSeqNames());
}
protected boolean showRecord(
VCFRecord record,
List<CDSFeature> cdsFeatures,
VariantFilterOptions options,
int basePosition) {
if (!options.isEnabled(VariantFilterOption.SHOW_DELETIONS) //.showDeletions
&& record.getAlt().isDeletion(isVcf_v4()))
return false;
if (!options.isEnabled(VariantFilterOption.SHOW_INSERTIONS) //.showInsertions
&& record.getAlt().isInsertion(isVcf_v4()))
return false;
if (!options.isEnabled(VariantFilterOption.SHOW_NON_OVERLAPPINGS) //.showNonOverlappings
&& ! VCFview.isOverlappingFeature(cdsFeatures, basePosition))
return false;
if (!options.isEnabled(VariantFilterOption.SHOW_NON_VARIANTS) /*.showNonVariants*/ && record.getAlt().isNonVariant())
return false;
short isSyn = record.getSynFlag(cdsFeatures, basePosition);
logger.info("ISSYNONYMOUS\t"+record.getPos() +"\t" + isSyn);
if (options.isEnabled(VariantFilterOption.MARK_NEW_STOPS) //.markNewStops
&& !record.getAlt().isDeletion(isVcf_v4())
&& !record.getAlt().isInsertion(isVcf_v4())
&& record.getAlt().length() == 1
&& record.getRef().length() == 1) {
if (isSyn == 2)
record.setMarkAsNewStop(true);
}
if ((!options.isEnabled(VariantFilterOption.SHOW_SYNONYMOUS) /*.showSynonymous*/ || !options.isEnabled(VariantFilterOption.SHOW_NON_SYNONYMOUS) /*.showNonSynonymous*/)
&& !record.getAlt().isDeletion(isVcf_v4())
&& !record.getAlt().isInsertion(isVcf_v4())
&& record.getAlt().length() == 1
&& record.getRef().length() == 1) {
if ((!options.isEnabled(VariantFilterOption.SHOW_SYNONYMOUS) /*.showSynonymous*/ && isSyn == 1)
|| (!options.isEnabled(VariantFilterOption.SHOW_NON_SYNONYMOUS) /*.showNonSynonymous*/ && (isSyn == 0 || isSyn == 2)))
return false;
}
if (!options.isEnabled(VariantFilterOption.SHOW_MULTI_ALLELES) //.showMultiAlleles
&& record.getAlt().isMultiAllele())
return false;
return true;
}
protected MappedVCFRecord processRecord (VCFRecord record) {
MappedVCFRecord mappedRecord = new MappedVCFRecord();
mappedRecord.markAsNewStop = record.isMarkAsNewStop();
mappedRecord.chrom = record.getChrom();
mappedRecord.pos = record.getPos();
mappedRecord.quality = record.getQuality();
mappedRecord.ref = record.getRef();
mappedRecord.ref_length = mappedRecord.ref.length();
if (record.getAlt().isMultiAllele()) {
mappedRecord.alt.isMultiAllele = true;
} else if (record.getAlt().isDeletion(isVcf_v4())) {
mappedRecord.alt.isDeletion = true;
} else if (record.getAlt().isInsertion(isVcf_v4())) {
mappedRecord.alt.isInsertion = true;
}
mappedRecord.alt.length = record.getAlt().length();
mappedRecord.alt.alternateBase = record.getAlt().toString();
return mappedRecord;
}
private boolean isVcf_v4()
{
return abstractReader.isVcf_v4();
}
}