/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.app.cli.analysis;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.storage.core.manager.OpenCGATestExternalResource;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.db.api.CohortDBAdaptor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.db.api.JobDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.models.*;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
/**
* Created on 09/05/16
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class AnalysisMainTest {
public static final String STORAGE_ENGINE = "mongodb";
@Rule
public OpenCGATestExternalResource opencga = new OpenCGATestExternalResource();
private CatalogManager catalogManager;
private final String userId = "user";
private final String dbNameVariants = "opencga_variants_test";
private final String dbNameAlignments = "opencga_alignments_test";
private String sessionId;
private long projectId;
private long studyId;
private long outdirId;
private Logger logger = LoggerFactory.getLogger(AnalysisMainTest.class);
@Before
public void setUp() throws Exception {
catalogManager = opencga.getCatalogManager();
opencga.clearStorageDB(STORAGE_ENGINE, dbNameVariants);
opencga.clearStorageDB(STORAGE_ENGINE, dbNameAlignments);
User user = catalogManager.createUser(userId, "User", "user@email.org", "user", "ACME", null, null).first();
sessionId = catalogManager.login(userId, "user", "localhost").first().getId();
projectId = catalogManager.getProjectManager().create("p1", "p1", "Project 1", "ACME", "Homo sapiens",
null, null, "GRCh38", new QueryOptions(), sessionId).first().getId();
Map<File.Bioformat, DataStore> datastores = new HashMap<>();
datastores.put(File.Bioformat.VARIANT, new DataStore(STORAGE_ENGINE, dbNameVariants));
datastores.put(File.Bioformat.ALIGNMENT, new DataStore(STORAGE_ENGINE, dbNameAlignments));
studyId = catalogManager.createStudy(projectId, "s1", "s1", Study.Type.CASE_CONTROL, null, "Study 1", null,
null, null, null, datastores, null,
Collections.singletonMap(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), VariantSource.Aggregation.NONE),
null, sessionId).first().getId();
outdirId = catalogManager.getFileManager().createFolder(Long.toString(studyId), Paths.get("data", "index").toString(), null,
false, null, QueryOptions.empty(), sessionId).first().getId();
}
@Test
public void testVariantIndex() throws Exception {
Job job;
File file1 = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
File file2 = opencga.createFile(studyId, "1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
File file3 = opencga.createFile(studyId, "1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
File file4 = opencga.createFile(studyId, "1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
File file5 = opencga.createFile(studyId, "1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
// Index file1
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", "user@p1:s1:" + file1.getPath()});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file1.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.NONE, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file1.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
// Annotate variants from chr2 (which is not indexed)
execute(new String[]{"variant", "annotate", "--session-id", sessionId, "--study-id", String.valueOf(studyId), "--filter-chromosome", "2"});
// Annotate all variants
execute(new String[]{"variant", "annotate", "--session-id", sessionId, "--study-id", String.valueOf(studyId), "--output-filename", "myAnnot", "-o", String.valueOf(outdirId)});
File outputFile = catalogManager.getAllFiles(studyId, new Query(FileDBAdaptor.QueryParams.NAME.key(), "~myAnnot"), null, sessionId).first();
assertNotNull(outputFile);
job = catalogManager.getJob(outputFile.getJob().getId(), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
assertEquals(outdirId, job.getOutDirId());
// Index file2
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", String.valueOf(file2.getId()), "--calculate-stats", "--outdir", String.valueOf(outdirId)});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file2.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.READY, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file2.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
assertEquals(outdirId, job.getOutDirId());
// Annotate all variants
execute(new String[]{"variant", "annotate", "--session-id", sessionId, "--study-id", String.valueOf(studyId), "--outdir-id", String.valueOf(outdirId)});
// Index file3
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", String.valueOf(file3.getId())});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file3.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.INVALID, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file3.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
Assert.assertNotEquals(outdirId, job.getOutDirId());
// Index file4 and stats
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", String.valueOf(file4.getId()), "--calculate-stats", "--queue"});
assertEquals(FileIndex.IndexStatus.INDEXING, catalogManager.getFile(file4.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.CALCULATING, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file4.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.PREPARED, job.getStatus().getName());
opencga.runStorageJob(job, sessionId);
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file4.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.READY, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file4.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
// Index file5 and annotation
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", String.valueOf(file5.getId()), "--annotate"});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file5.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.INVALID, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file5.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
execute(new String[]{"variant", "stats", "--session-id", sessionId, "--study-id", String.valueOf(studyId), "--cohort-ids", "ALL", "--outdir-id", String.valueOf(outdirId)});
assertEquals(Cohort.CohortStatus.READY, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
catalogManager.createCohort(studyId, "coh1", Study.Type.CONTROL_SET, "", file1.getSampleIds(), null, sessionId);
catalogManager.createCohort(studyId, "coh2", Study.Type.CONTROL_SET, "", file2.getSampleIds(), null, sessionId);
execute(new String[]{"variant", "stats", "--session-id", sessionId, "--study-id", String.valueOf(studyId), "--cohort-ids", "coh1", "--outdir-id", String.valueOf(outdirId)});
assertEquals(Cohort.CohortStatus.READY, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "coh1"), null, sessionId).first().getStatus().getName());
assertEquals(Cohort.CohortStatus.NONE, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "coh2"), null, sessionId).first().getStatus().getName());
// execute(new String[]{"variant", "query", "--session-id", sessionId, "--study", String.valueOf(studyId), "--limit", "10"});
}
@Test
public void testVariantIndexAndQuery() throws CatalogException, IOException {
Job job;
File file1 = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
// File file1 = opencga.createFile(studyId, "variant-test-file.vcf.gz", sessionId);
// File file1 = opencga.createFile(studyId, "100k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
// File file1 = opencga.createFile(studyId, "10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId);
QueryResult<Sample> allSamples = catalogManager.getAllSamples(studyId, new Query(), new QueryOptions(), sessionId);
List<Long> sampleIds = allSamples.getResult().stream().map(Sample::getId).collect(Collectors.toList());
Long c1 = catalogManager.createCohort(studyId, "C1", Study.Type.CONTROL_SET, "", sampleIds.subList(0, sampleIds.size() / 2), null, sessionId).first().getId();
Long c2 = catalogManager.createCohort(studyId, "C2", Study.Type.CONTROL_SET, "", sampleIds.subList(sampleIds.size() / 2 + 1, sampleIds.size()), null, sessionId).first().getId();
Long c3 = catalogManager.createCohort(studyId, "C3", Study.Type.CONTROL_SET, "", sampleIds.subList(0, 1), null, sessionId).first().getId();
Sample sample = catalogManager.createSample(studyId, "Sample", "", "", null, null, sessionId).first();
Long c4 = catalogManager.createCohort(studyId, "C4", Study.Type.CONTROL_SET, "", Collections.singletonList(sample.getId()), null, sessionId).first().getId();
// Index file1
execute(new String[]{"variant", "index", "--session-id", sessionId, "--file-id", "user@p1:s1:" + file1.getPath(), "--calculate-stats", "--annotate"});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(file1.getId(), sessionId).first().getIndex().getStatus().getName());
assertEquals(Cohort.CohortStatus.READY, catalogManager.getAllCohorts(studyId, new Query(CohortDBAdaptor.QueryParams.NAME.key(), "ALL"), null, sessionId).first().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), file1.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
execute(new String[]{"variant", "stats", "--session-id", sessionId, "--study-id", "user@p1:s1", "--cohort-ids", c1 + "," + c2 + "," + c3 + "," + c4});
// execute(new String[]{"variant", "query", "--session-id", sessionId, "--return-sample", "35,36", "--limit", "10"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: cellbase");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "export-frequencies", "--session-id", sessionId, "--limit", "1000", "--output-format", "cellbase"});
// System.out.println("------------------------------------------------------");
// System.out.println("Export output format: avro");
// System.out.println("------------------------------------------------------");
// execute(new String[]{"variant", "query", "--session-id", sessionId, "--output-format", "avro", "--output", "/tmp/100k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz.avro.snappy"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: avro.snappy");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "query", "--session-id", sessionId, "--return-sample", "HG00096,HG00097", "--limit", "10", "--output-format", "avro.snappy"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: vcf");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "query", "--session-id", sessionId, "--return-sample", "HG00096,HG00097", "--limit", "10", "--output-format", "vcf"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: cellbase (populationFrequencies)");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "query", "--session-id", sessionId, "--return-sample", "HG00096,HG00097", "--limit", "10", "--output-format", "cellbase"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: tsv");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "query", "--session-id", sessionId, "--return-sample", "HG00096,HG00097", "--limit", "10", "--output-format", "stats"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: tsv");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "export-frequencies", "--session-id", sessionId, "--limit", "10"});
System.out.println("------------------------------------------------------");
System.out.println("Export output format: vcf");
System.out.println("------------------------------------------------------");
execute(new String[]{"variant", "export-frequencies", "--session-id", sessionId, "--limit", "10", "--output-format", "vcf"});
}
@Test
public void testAlignmentIndex() throws CatalogException, IOException {
Job job;
File bam = opencga.createFile(studyId, "HG00096.chrom20.small.bam", sessionId);
// Index file1
execute(new String[]{"alignment", "index", "--session-id", sessionId, "--file-id", "user@p1:s1:" + bam.getPath()});
assertEquals(FileIndex.IndexStatus.READY, catalogManager.getFile(bam.getId(), sessionId).first().getIndex().getStatus().getName());
job = catalogManager.getAllJobs(studyId, new Query(JobDBAdaptor.QueryParams.INPUT.key(), bam.getId()), null, sessionId).first();
assertEquals(Job.JobStatus.READY, job.getStatus().getName());
execute(new String[]{"alignment", "query", "--session-id", sessionId, "--file-id", "user@p1:s1:" + bam.getPath(), "--region", "20"});
}
public int execute(String[] args) {
int exitValue = AnalysisMain.privateMain(args);
assertEquals(0, exitValue);
return exitValue;
}
}