/*
* Copyright (C) 2014 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.pipelines;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import java.io.BufferedReader;
/**
* This integration test will read and write to Cloud Storage, and call the Genomics API.
*
* The following environment variables are required:
* - a Google Cloud API key in GOOGLE_API_KEY,
* - a Google Cloud project name in TEST_PROJECT,
* - a Cloud Storage folder path in TEST_OUTPUT_GCS_FOLDER to store temporary test outputs,
* - a Cloud Storage folder path in TEST_STAGING_GCS_FOLDER to store temporary files,
*
* Cloud Storage folder paths should be of the form "gs://bucket/folder/"
*
* When doing e.g. mvn install, you can skip integration tests using:
* mvn install -DskipITs
*
* To run one test:
* mvn -Dit.test=CountReadsITCase#testLocal verify
*
* See also http://maven.apache.org/surefire/maven-failsafe-plugin/examples/single-test.html
*/
@RunWith(JUnit4.class)
public class CountReadsITCase {
// This file shouldn't move.
static final String TEST_BAM_FNAME = "gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/NA06985/alignment/NA06985.454.MOSAIK.SRP000033.2009_11.bam";
// This is the Readgroupset ID of the same file, in ReadStore. It also shouldn't move.
static final String TEST_READGROUPSET = "CMvnhpKTFhDvp9zAvYj66AY";
// The region where we're counting reads.
static final String TEST_CONTIG = "1:550000:560000";
// How many reads are in that region.
static final long TEST_EXPECTED = 685;
// In this file there are no unmapped reads, so expecting the same number.
static final long TEST_EXPECTED_WITH_UNMAPPED = TEST_EXPECTED;
// Same as the above variables, but for the NA12877_S1 dataset.
static final String NA12877_S1_BAM_FILENAME = "gs://genomics-public-data/platinum-genomes/bam/NA12877_S1.bam";
static final String NA12877_S1_READGROUPSET = "CMvnhpKTFhD3he72j4KZuyc";
static final String NA12877_S1_CONTIG = "chr17:41196311:41277499";
static final long NA12877_S1_EXPECTED = 45081;
// How many reads are in that region if we take unmapped ones too
static final long NA12877_S1_EXPECTED_WITH_UNMAPPED = 45142;
static IntegrationTestHelper helper;
@BeforeClass
public static void setUpBeforeClass() {
helper = new IntegrationTestHelper();
}
private void testLocalBase(String outputFilename, String contig, String bamFilename, long expectedCount,
boolean includeUnmapped) throws Exception {
final String OUTPUT = helper.getTestOutputGcsFolder()+ outputFilename;
String[] ARGS = {
"--project=" + helper.getTestProject(),
"--output=" + OUTPUT,
"--references=" + contig,
"--includeUnmapped=" + includeUnmapped,
"--BAMFilePath=" + bamFilename,
};
try {
helper.touchOutput(OUTPUT);
CountReads.main(ARGS);
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());
Assert.assertEquals(expectedCount, got);
} finally {
helper.deleteOutput(OUTPUT);
}
}
/**
* CountReads running on the client's machine.
*/
@Test
public void testLocal() throws Exception {
testLocalBase("CountReadsITCase-testLocal-output.txt",
TEST_CONTIG, TEST_BAM_FNAME, TEST_EXPECTED, false);
}
@Test
public void testLocalUnmapped() throws Exception {
testLocalBase("CountReadsITCase-testLocal-output.txt",
TEST_CONTIG, TEST_BAM_FNAME, TEST_EXPECTED_WITH_UNMAPPED, true);
}
@Test
public void testLocalNA12877_S1() throws Exception {
testLocalBase("CountReadsITCase-testLocal-NA12877_S1-output.txt",
NA12877_S1_CONTIG, NA12877_S1_BAM_FILENAME, NA12877_S1_EXPECTED, false);
}
@Test
public void testLocalNA12877_S1_UNMAPPED() throws Exception {
testLocalBase("CountReadsITCase-testLocal-NA12877_S1-output.txt",
NA12877_S1_CONTIG, NA12877_S1_BAM_FILENAME,
NA12877_S1_EXPECTED_WITH_UNMAPPED, true);
}
private void testCloudBase(String outputFilename, String contig, String bamFilename, long expectedCount) throws Exception {
final String OUTPUT = helper.getTestOutputGcsFolder() + outputFilename;
String[] ARGS = {
"--project=" + helper.getTestProject(),
"--output=" + OUTPUT,
"--numWorkers=2",
"--runner=BlockingDataflowPipelineRunner",
"--stagingLocation=" + helper.getTestStagingGcsFolder(),
"--references=" + contig,
"--BAMFilePath=" + bamFilename
};
try {
helper.touchOutput(OUTPUT);
CountReads.main(ARGS);
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());
Assert.assertEquals(expectedCount, got);
} finally {
helper.deleteOutput(OUTPUT);
}
}
/**
* CountReads running on Dataflow.
*/
@Test
public void testCloud() throws Exception {
testCloudBase("CountReadsITCase-testCloud-output.txt",
TEST_CONTIG, TEST_BAM_FNAME, TEST_EXPECTED);
}
@Test
public void testCloudNA12877_S1() throws Exception {
testCloudBase("CountReadsITCase-testCloud-NA12877_S1-output.txt",
NA12877_S1_CONTIG, NA12877_S1_BAM_FILENAME, NA12877_S1_EXPECTED);
}
public void testCloudWithAPIBase(String outputFilename, String contig, String readGroupSetId, long expectedCount) throws Exception {
final String OUTPUT = helper.getTestOutputGcsFolder() + outputFilename;
String[] ARGS = {
"--project=" + helper.getTestProject(),
"--output=" + OUTPUT,
"--numWorkers=2",
"--runner=BlockingDataflowPipelineRunner",
"--stagingLocation=" + helper.getTestStagingGcsFolder(),
"--references=" + contig,
"--readGroupSetId=" + readGroupSetId
};
try {
helper.touchOutput(OUTPUT);
CountReads.main(ARGS);
BufferedReader reader = helper.openOutput(OUTPUT);
long got = Long.parseLong(reader.readLine());
Assert.assertEquals(expectedCount, got);
} finally {
helper.deleteOutput(OUTPUT);
}
}
/**
* CountReads running on Dataflow with API input.
*/
@Test
public void testCloudWithAPI() throws Exception {
testCloudWithAPIBase("CountReadsITCase-testCloudWithAPI-output.txt",
TEST_CONTIG, TEST_READGROUPSET, TEST_EXPECTED);
}
@Test
public void testCloudWithAPI_NA12877_S1() throws Exception {
testCloudWithAPIBase("CountReadsITCase-testCloudWithAPI-NA12877_S1-output.txt",
NA12877_S1_CONTIG, NA12877_S1_READGROUPSET, NA12877_S1_EXPECTED);
}
}