/*
* Copyright (C) 2014 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.pipelines;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import htsjdk.samtools.BAMIndexMetaData;
import htsjdk.samtools.SamReader;
/**
* This integration test will read and write to Cloud Storage, and call the Genomics API.
*
* The following environment variables are required:
* - a Google Cloud API key in GOOGLE_API_KEY,
* - a Google Cloud project name in TEST_PROJECT,
* - a Cloud Storage folder path in TEST_OUTPUT_GCS_FOLDER to store temporary test outputs,
* - a Cloud Storage folder path in TEST_STAGING_GCS_FOLDER to store temporary files,
*
* Cloud Storage folder paths should be of the form "gs://bucket/folder/"
*
* When doing e.g. mvn install, you can skip integration tests using:
* mvn install -DskipITs
*
* To run one test:
* mvn -Dit.test=ShardedBAMWritingITCase#testShardedWriting verify
*
* See also http://maven.apache.org/surefire/maven-failsafe-plugin/examples/single-test.html
*/
@RunWith(JUnit4.class)
public class ShardedBAMWritingITCase {
static final String TEST_CONTIG = "11:0:200000000";
static final String TEST_BAM_FNAME = "gs://genomics-public-data/ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/data/NA12878/exome_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.exome.20121211.bam";
static final String OUTPUT_FNAME = "sharded-output.bam";
static final int EXPECTED_ALL_READS = 10414236;
static final int EXPECTED_UNMAPPED_READS = 108950;
static IntegrationTestHelper helper;
@BeforeClass
public static void setUpBeforeClass() {
helper = new IntegrationTestHelper();
}
@Test
public void testShardedWriting() throws Exception {
final String OUTPUT = helper.getTestOutputGcsFolder() + OUTPUT_FNAME;
String[] ARGS = {
"--project=" + helper.getTestProject(),
"--output=" + OUTPUT,
"--numWorkers=18",
"--runner=BlockingDataflowPipelineRunner",
"--stagingLocation=" + helper.getTestStagingGcsFolder(),
"--references=" + TEST_CONTIG,
"--BAMFilePath=" + TEST_BAM_FNAME,
"--lociPerWritingShard=1000000"
};
SamReader reader = null;
try {
helper.touchOutput(OUTPUT);
ShardedBAMWriting.main(ARGS);
reader = helper.openBAM(OUTPUT);
Assert.assertTrue(reader.hasIndex());
final int sequenceIndex = reader.getFileHeader().getSequenceIndex("11");
BAMIndexMetaData metaData = reader.indexing().getIndex().getMetaData(sequenceIndex);
Assert.assertEquals(EXPECTED_ALL_READS - EXPECTED_UNMAPPED_READS,
metaData.getAlignedRecordCount());
// Not handling unmapped reads yet
// Assert.assertEquals(EXPECTED_UNMAPPED_READS,
// metaData.getUnalignedRecordCount());
} finally {
if (reader != null) {
reader.close();
}
helper.deleteOutput(OUTPUT);
}
}
}