package uk.ac.sanger.artemis.components.variant;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.EnumSet;
import java.util.List;
import org.apache.log4j.Logger;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.client.Client;
import org.genedb.crawl.bam.BioDataFileStore;
import org.genedb.crawl.bam.BioDataFileStoreInitializer;
import org.genedb.crawl.controller.VariantController;
import org.genedb.crawl.dao.backend.VariantDAO;
import org.genedb.crawl.elasticsearch.Connection;
import org.genedb.crawl.elasticsearch.index.gff.GFFIndexBuilder;
import org.genedb.crawl.elasticsearch.mappers.ElasticSearchFeatureMapper;
import org.genedb.crawl.elasticsearch.mappers.ElasticSearchRegionsMapper;
import org.genedb.crawl.json.JsonIzer;
import org.genedb.crawl.model.Gene;
import org.genedb.crawl.model.LocatedFeature;
import org.genedb.crawl.model.MappedVCFRecord;
import org.genedb.crawl.model.Sequence;
import org.genedb.crawl.model.Variant;
import uk.ac.sanger.artemis.components.variant.VariantFilterOption;
import uk.ac.sanger.artemis.components.variant.VariantFilterOptions;
import uk.ac.sanger.artemis.components.variant.VariantReaderAdapter;
import uk.ac.sanger.artemis.util.OutOfRangeException;
import junit.framework.TestCase;
public class VariantTest extends TestCase {
private static Logger logger = Logger.getLogger(VariantTest.class);
JsonIzer jsonIzer = new JsonIzer();
MyGFFIndexBuilder builder;
class MyGFFIndexBuilder extends GFFIndexBuilder {
ElasticSearchRegionsMapper getRegionsMapper() {
return regionsMapper;
}
ElasticSearchFeatureMapper getFeatureMapper() {
return featureMapper;
}
public Client getClient() {
return client;
}
}
/*
public void testVCF() throws IOException, SecurityException, IllegalArgumentException, ParseException, NoSuchFieldException, IllegalAccessException {
String gffFile = "./src/test/resources/data/Pf3D7_01.gff.gz";
String variantFile = "./src/test/resources/data/Pf3D7_01.vcf.gz";
String region = "Pf3D7_01";
String sequenceNameInVCF = "MAL1";
String organism = "{ \"ID\":27, \"common_name\":\"Pfalciparum\", \"genus\":\"Plasmodium\", \"species\":\"falciparum\", \"translation_table\":11, \"taxonID\":5833}";
int start = 1;
int end = 100000;
logger.info(String.format("Running %s %s %s %s:%d-%d", gffFile, variantFile, organism, region, start, end));
runQueryDirectly(variantFile,sequenceNameInVCF,start,end);
run(gffFile, variantFile, organism, region, start, end);
}
public void testBCF() throws IOException, SecurityException, IllegalArgumentException, ParseException, NoSuchFieldException, IllegalAccessException {
String gffFile = "./src/test/resources/data/Spn23f.gff";
String variantFile= "http://www.genedb.org/artemis/NAR/Spneumoniae/4882_6_10_variant.bcf";
String region = "Spn23F";
String sequenceNameInVCF = "S_pneumoniae_Spanish_23F.dna";
String organism = "{\"ID\":999,\"common_name\":\"Spneumoniae\",\"genus\":\"Streptococcus\",\"species\":\"pneumoniae\",\"translation_table\":1,\"taxonID\":1313}";
int start = 1;
int end = 100000;
logger.info(String.format("Running %s %s %s %s:%d-%d", gffFile, variantFile, organism, region, start, end));
runQueryDirectly(variantFile,sequenceNameInVCF,start,end);
run(gffFile, variantFile, organism, region, start, end);
}
*/
public void testBCFLocal() throws IOException, SecurityException, IllegalArgumentException, ParseException, NoSuchFieldException, IllegalAccessException, OutOfRangeException {
String gffFile = "./src/test/resources/data/Streptococcus_pneumoniae_ATCC_700669_v1.gff";
String variantFile= "./src/test/resources/data/4882_6_10_variant.bcf";
String region = "FM211187";
String sequenceNameInVCF = "S_pneumoniae_Spanish_23F.dna";
String organism = "{\"ID\":999,\"common_name\":\"Spneumoniae\",\"genus\":\"Streptococcus\",\"species\":\"pneumoniae\",\"translation_table\":1,\"taxonID\":1313}";
int start = 1;
int end = 278965;
logger.info(String.format("Running %s %s %s %s:%d-%d", gffFile, variantFile, organism, region, start, end));
runQueryDirectly(variantFile,sequenceNameInVCF,start,end);
startIndex();
List<LocatedFeature> exonFeatures = buildIndex(gffFile, organism, region, start, end);
logger.info("how many gene features? " + exonFeatures.size());
ElasticSearchRegionsMapper regionsMapper = builder.getRegionsMapper();
Sequence regionSequence = regionsMapper.sequence(region);
logger.info(regionSequence.dna);
VariantReaderAdapter vreader = VariantReaderAdapter.getReader(variantFile);
logger.info("generating cds features");
List<CDSFeature> cdsFeatures = vreader.makeCDSFeatures(exonFeatures, regionSequence);
logger.info("how many cds features? " + cdsFeatures.size());
File artemisResults = new File("src/test/resources/data/artemis_vcf_filter_results.txt");
FileInputStream fileStream = new FileInputStream(artemisResults);
BufferedReader reader = new BufferedReader(new InputStreamReader(fileStream));
//Map<Integer,VCFRecord> artemisResultRecords = new HashMap<>
List<MappedVCFRecord> records = null;
String line;
int filter = 0;
logger.info("parsing...");
while ((line = reader.readLine()) != null) {
logger.info(line);
if (line.startsWith("#"))
continue;
if (line.startsWith("HIDE") || line.startsWith("SHOW")) {
boolean show = (line.startsWith("HIDE")) ? false : true;
// SHOW : S_pneumoniae_Spanish_23F.dna 11203 . C T 99.0 . DP=103;AF1=1;CI95=1,1;DP4=0,0,43,60;MQ=60 PL:DP:SP:GT:GQ 255,255,0:103:0:1/1:99
String recordString = line.substring(line.indexOf(":") + 2);
logger.info(recordString);
VCFRecord record = VCFRecord.parse(recordString);
boolean found = false;
for (MappedVCFRecord mappedRecord : records) {
logger.info(String.format("%s = %s , %s = %s", record.getPos() , mappedRecord.pos , record.getQuality(), mappedRecord.quality));
if (record.getPos() == mappedRecord.pos && record.getQuality() == mappedRecord.quality) {
logger.info("found!");
found = true;
}
}
if (show) {
assertTrue("line not found when should be : " + line, found);
} else {
assertFalse("line found when shouldn't be : " + line, found);
}
continue;
}
// showSynonymous, selfshowNonSynonymous, selfshowDeletions, selfshowInsertions, selfshowMultiAlleles, selfshowNonOverlappings,selfshowNonVariantsself
String[] columns=line.split("\t");
String[] coords = columns[0].split("-");
int sbeg = Integer.parseInt(coords[0]);
int send = Integer.parseInt(coords[1]);
boolean showSynonymous= Boolean.parseBoolean(columns[1]);
boolean showNonSynonymous= Boolean.parseBoolean(columns[2]);
boolean showDeletions= Boolean.parseBoolean(columns[3]);
boolean showInsertions= Boolean.parseBoolean(columns[4]);
boolean showMultiAlleles= Boolean.parseBoolean(columns[5]);
boolean showNonOverlappings= Boolean.parseBoolean(columns[6]);
boolean showNonVariants= Boolean.parseBoolean(columns[7]);
logger.info(
String.format(
"%s-%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
sbeg,
send,
showSynonymous,
showNonSynonymous,
showDeletions,
showInsertions,
showMultiAlleles,
showNonOverlappings,
showNonVariants));
filter = 0;
if (showSynonymous)
filter += VariantFilterOption.SHOW_SYNONYMOUS.index();
if (showNonSynonymous)
filter += VariantFilterOption.SHOW_NON_SYNONYMOUS.index();
if (showDeletions)
filter += VariantFilterOption.SHOW_DELETIONS.index();
if (showInsertions)
filter += VariantFilterOption.SHOW_INSERTIONS.index();
if (showMultiAlleles)
filter += VariantFilterOption.SHOW_MULTI_ALLELES.index();
if (showNonOverlappings)
filter += VariantFilterOption.SHOW_NON_OVERLAPPINGS.index();
if (showNonVariants)
filter += VariantFilterOption.SHOW_NON_VARIANTS.index();
VariantFilterOptions options = new VariantFilterOptions(filter);
records = vreader.query(sequenceNameInVCF, start, end, cdsFeatures, options);
jsonIzer.setPretty(true);
logger.info(jsonIzer.toJson(records));
logger.info("records length : " + records.size());
}
closeIndex();
}
private void runQueryDirectly(String variantFile, String region, int start, int end) throws IOException {
logger.info("runQueryDirectly");
VariantReaderAdapter reader = VariantReaderAdapter.getReader(variantFile);
logger.info(reader.getSeqNames());
List<?> records = reader.unFilteredQuery(region, start, end);
for (Object record : records) {
logger.info(record);
}
}
private List<LocatedFeature> buildIndex (
String gffFile,
String organism,
String region,
int start,
int end) throws SecurityException, IllegalArgumentException, IOException, ParseException, NoSuchFieldException, IllegalAccessException {
//logger.info("bui");
// builder = new MyGFFIndexBuilder();
// builder.elasticSearchPropertiesFile = new File("resource-elasticsearch-local.properties");
//
builder.gffs = gffFile;
builder.organism = organism;
builder.run();
ElasticSearchRegionsMapper regionsMapper = builder.getRegionsMapper();
ElasticSearchFeatureMapper featureMapper = builder.getFeatureMapper();
Connection.waitForStatus(builder.getClient(), EnumSet.of(ClusterHealthStatus.YELLOW, ClusterHealthStatus.GREEN));;
//String sequence = region;
// Sequence regionSequence = regionsMapper.sequence(sequence);
List<LocatedFeature> features = VariantDAO.getExons(region, start, end, regionsMapper, featureMapper);
//
for (LocatedFeature f : features) {
logger.info("gene : " + f.uniqueName + " / " + f.region+ ":" + f.fmin + "-" + f.fmax);
}
return features;
//
// //VariantFilterOptions opts = new VariantFilterOptions(EnumSet.allOf(VariantFilterOption.class));
// logger.info(opts);
// BioDataFileStoreInitializer initializer = new BioDataFileStoreInitializer();
// initializer.setAlignmentFiles(new File("./etc/alignments.json"));
//
// BioDataFileStore<Variant> variantStore = initializer.getVariants();
//
// String alignmentName = variantStore.getAlignmentFromName(sequence);
//
// VariantReaderAdapter reader = VariantReaderAdapter.getReader(variantFile);
// logger.info(reader.getSeqNames());
//
// List<MappedVCFRecord> records = reader.query(sequenceNameInVCF, start, end, geneFeatures, opts, regionSequence);
////
// jsonIzer.setPretty(true);
// String result = jsonIzer.toJson(records);
////
//// assertTrue(records.size() > 0);
//// assertNotNull(result != null);
////
// logger.info(result);
//List<VCFRecord> recordsUnfiltered = reader.unFilteredQuery(region, start, end);
}
void startIndex() {
builder = new MyGFFIndexBuilder();
builder.elasticSearchPropertiesFile = new File(
"resource-elasticsearch-local.properties");
}
void closeIndex() {
builder.closeIndex();
}
}