/* * Copyright (C) 2015 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.genomics.dataflow.pipelines; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import com.google.api.client.util.BackOff; import com.google.api.client.util.ExponentialBackOff; import com.google.api.client.util.Lists; import com.google.api.services.storage.Storage; import com.google.cloud.dataflow.sdk.options.GcsOptions; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.util.GcsUtil; import com.google.cloud.dataflow.sdk.util.Transport; import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath; import com.google.cloud.genomics.dataflow.readers.bam.BAMIO; import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; import htsjdk.samtools.SamReader; import htsjdk.samtools.ValidationStringency; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.Writer; import java.nio.channels.Channels; import java.util.List; public class IntegrationTestHelper { // Test configuration constants private final String TEST_PROJECT = System.getenv("TEST_PROJECT"); private final String TEST_OUTPUT_GCS_FOLDER = System.getenv("TEST_OUTPUT_GCS_FOLDER"); private final String TEST_STAGING_GCS_FOLDER = System.getenv("TEST_STAGING_GCS_FOLDER"); // Variant test configuration constants public static final String PLATINUM_GENOMES_DATASET = "3049512673186936334"; public static final String PLATINUM_GENOMES_BRCA1_REFERENCES = "chr17:41196311:41277499"; public static final int PLATINUM_GENOMES_NUMBER_OF_SAMPLES = 17; public static final String A_FEW_PLATINUM_GENOMES_CALLSET_NAMES = "NA12877,NA12880,NA12890"; private GenomicsOptions popts = PipelineOptionsFactory.create().as(GenomicsOptions.class); GcsUtil gcsUtil; public IntegrationTestHelper() { assertNotNull("You must set the TEST_PROJECT environment variable for this test.", TEST_PROJECT); assertNotNull("You must set the TEST_OUTPUT_GCS_FOLDER environment variable for this test.", TEST_OUTPUT_GCS_FOLDER); assertNotNull("You must set the TEST_STAGING_GCS_FOLDER environment variable for this test.", TEST_STAGING_GCS_FOLDER); assertTrue("TEST_OUTPUT_GCS_FOLDER must end with '/'", TEST_OUTPUT_GCS_FOLDER.endsWith("/")); assertTrue("TEST_OUTPUT_GCS_FOLDER must start with 'gs://'", TEST_OUTPUT_GCS_FOLDER.startsWith("gs://")); assertTrue("TEST_STAGING_GCS_FOLDER must start with 'gs://'", TEST_STAGING_GCS_FOLDER.startsWith("gs://")); // we don't care how TEST_STAGING_GCS_FOLDER ends, so no check for it. gcsUtil = new GcsUtil.GcsUtilFactory().create(popts); } /** * @return the TEST_PROJECT */ public String getTestProject() { return TEST_PROJECT; } /** * @return the TEST_OUTPUT_GCS_FOLDER */ public String getTestOutputGcsFolder() { return TEST_OUTPUT_GCS_FOLDER; } /** * @return the TEST_STAGING_GCS_FOLDER */ public String getTestStagingGcsFolder() { return TEST_STAGING_GCS_FOLDER; } /** * Make sure we can get to the output for single-file test results. * * Also write a sentinel value to the file. This protects against the possibility of prior * test output causing a newly failing test to appear to succeed. * * @param outputPath * @throws IOException */ public void touchOutput(String outputPath) throws IOException { try (Writer writer = Channels.newWriter(gcsUtil.create(GcsPath.fromUri(outputPath), "text/plain"), "UTF-8")) { writer.write("output will go here"); } } /** * Open test output for reading for single file test results. */ public BufferedReader openOutput(String outputPath) throws IOException { return new BufferedReader(Channels.newReader(gcsUtil.open(GcsPath.fromUri(outputPath)), "UTF-8")); } /** * Open test output as BAM file - useful if your test writes out a BAM file * and you want to validate the contents. * @throws IOException */ public SamReader openBAM(String bamFilePath) throws IOException { final GcsOptions gcsOptions = popts.as(GcsOptions.class); final Storage.Objects storage = Transport.newStorageClient(gcsOptions).build().objects(); return BAMIO.openBAM(storage, bamFilePath, ValidationStringency.LENIENT, true); } /** * Download multi file test output. * * Some cloud storage operations are eventually consistent * https://cloud.google.com/storage/docs/consistency so * be robust for those cases. * * @param outputPrefix prefix for files to download * @param numLinesExpected number of line expected * @return lines from all test output files * @throws Exception */ public List<String> downloadOutputs(String outputPrefix, int numLinesExpected) throws Exception { // Download the pipeline results. List<String> results = null; ExponentialBackOff backoff = new ExponentialBackOff.Builder() .setMaxIntervalMillis(6000) .build(); while (true) { try { results = Lists.newArrayList(); for (GcsPath path : gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) { BufferedReader reader = openOutput(path.toString()); for (String line = reader.readLine(); line != null; line = reader.readLine()) { results.add(line); } reader.close(); } } catch (FileNotFoundException e) { long backOffMillis = backoff.nextBackOffMillis(); if (backOffMillis == BackOff.STOP) { throw e; } Thread.sleep(backOffMillis); } long backOffMillis = backoff.nextBackOffMillis(); if (numLinesExpected == results.size() || backOffMillis == BackOff.STOP) { // If we have the number of results we expect OR we've used all the retries // (e.g., due to a real test failure), exit from this loop. break; } Thread.sleep(backOffMillis); } return results; } /** * Delete single file test output. * * @param outputPath path to the output file to be deleted. * @throws Exception */ public void deleteOutput(String outputPath) throws Exception { // boilerplate GcsPath path = GcsPath.fromUri(outputPath); GcsOptions gcsOptions = popts.as(GcsOptions.class); Storage storage = Transport.newStorageClient(gcsOptions).build(); // do the actual work storage.objects().delete(path.getBucket(), path.getObject()).execute(); } /** * Delete multi file test output. * * @param outputPrefix prefix for files to delete * @throws Exception */ public void deleteOutputs(String outputPrefix) throws Exception { for (GcsPath path : gcsUtil.expand(GcsPath.fromUri(outputPrefix + "*"))) { deleteOutput(path.toString()); } } }