/*
* Copyright (C) 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.utils;
import com.google.api.services.genomics.model.CallSet;
import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.Validation.Required;
import com.google.cloud.genomics.utils.CallSetUtils;
import com.google.cloud.genomics.utils.GenomicsUtils;
import com.google.common.base.CharMatcher;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.BiMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.io.Files;
import com.google.genomics.v1.StreamVariantsRequest;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
/**
* A common options class for variant sets and call sets.
*/
public interface CallSetNamesOptions extends GenomicsOptions {
@Required
@Description("The ID of the Google Genomics variant set this pipeline is accessing.")
String getVariantSetId();
void setVariantSetId(String variantSetId);
@Description("A comma-separated list of callset names. Use this option or "
+ "--callSetNamesFilepath to specify the subset of callsets over which "
+ "this pipeline should compute.")
String getCallSetNames();
void setCallSetNames(String callSetNames);
@Description("A local file path to a list of newline-separated callset names. "
+ "Use this option or --callSetNames to specify the subset of callsets "
+ "over which this pipeline should compute.")
String getCallSetNamesFilepath();
void setCallSetNamesFilepath(String callSetNamesFilepath);
public static class Methods {
/**
* Construct a request prototype with several fields already filled in using option values.
*
* @param options
* @return the request prototype
* @throws IOException
*/
public static StreamVariantsRequest getRequestPrototype(final CallSetNamesOptions options) throws IOException {
StreamVariantsRequest.Builder request = StreamVariantsRequest.newBuilder()
.setVariantSetId(options.getVariantSetId())
.addAllCallSetIds(getCallSetIds(options));
if (null != options.getProject()) {
request.setProjectId(options.getProject());
}
return request.build();
}
/**
* Parse and return the unique call set names specified in the options.
*
* @param options
* @return a list of unique call set names
* @throws IOException
*/
public static List<String> getCallSetNames(final CallSetNamesOptions options) throws IOException {
Preconditions.checkArgument(
null == options.getCallSetNames() || null == options.getCallSetNamesFilepath(),
"Only specify one of --callSetNamesList or --callSetNamesFilepath");
if (null != options.getCallSetNamesFilepath()) {
String fileContents =
Files.toString(new File(options.getCallSetNamesFilepath()), Charset.defaultCharset());
return ImmutableSet
.<String>builder()
.addAll(
Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults()
.split(fileContents)).build().asList();
}
if (null != options.getCallSetNames()) {
return ImmutableSet
.<String>builder()
.addAll(
Splitter.on(CharMatcher.is(',')).omitEmptyStrings().trimResults()
.split(options.getCallSetNames())).build().asList();
}
return ImmutableSet.<String>builder().build().asList();
}
/**
* Return the call set ids corresponding to the call set names provided in the options.
*
* This has a side-effect of confirming that the call set names within the variant set are unique.
*
* @param options
* @return a list of unique call set ids
* @throws IOException
*/
public static List<String> getCallSetIds(final CallSetNamesOptions options) throws IOException {
List<String> callSetNames = getCallSetNames(options);
if (callSetNames.isEmpty()) {
return callSetNames; // Return the empty list.
}
ImmutableSet.Builder<String> callSetIds = ImmutableSet.<String>builder();
Iterable<CallSet> callSets = GenomicsUtils.getCallSets(options.getVariantSetId(),
GenomicsOptions.Methods.getGenomicsAuth(options));
BiMap<String,String> nameToId = null;
try {
nameToId = CallSetUtils.getCallSetNameMapping(callSets);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("VariantSet " + options.getVariantSetId()
+ " contains duplicate callset name(s).", e);
}
for (String callSetName : callSetNames) {
String id = nameToId.get(callSetName);
Preconditions.checkNotNull(id,
"Call set name '%s' does not correspond to a call set id in variant set id %s",
callSetName, options.getVariantSetId());
callSetIds.add(id);
}
return callSetIds.build().asList();
}
}
}