/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.dataflow.options;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.api.services.dataflow.Dataflow;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.beam.runners.dataflow.util.DataflowTransport;
import org.apache.beam.runners.dataflow.util.GcsStager;
import org.apache.beam.runners.dataflow.util.Stager;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.DefaultValueFactory;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.Hidden;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.util.InstanceBuilder;
/**
* Internal. Options used to control execution of the Dataflow SDK for
* debugging and testing purposes.
*/
@Description("[Internal] Options used to control execution of the Dataflow SDK for "
+ "debugging and testing purposes.")
@Hidden
public interface DataflowPipelineDebugOptions extends PipelineOptions {
/**
* The list of backend experiments to enable.
*
* <p>Dataflow provides a number of experimental features that can be enabled
* with this flag.
*
* <p>Please sync with the Dataflow team before enabling any experiments.
*/
@Description("[Experimental] Dataflow provides a number of experimental features that can "
+ "be enabled with this flag. Please sync with the Dataflow team before enabling any "
+ "experiments.")
@Experimental
@Nullable
List<String> getExperiments();
void setExperiments(@Nullable List<String> value);
/**
* The root URL for the Dataflow API. {@code dataflowEndpoint} can override this value
* if it contains an absolute URL, otherwise {@code apiRootUrl} will be combined with
* {@code dataflowEndpoint} to generate the full URL to communicate with the Dataflow API.
*/
@Description("The root URL for the Dataflow API. dataflowEndpoint can override this "
+ "value if it contains an absolute URL, otherwise apiRootUrl will be combined with "
+ "dataflowEndpoint to generate the full URL to communicate with the Dataflow API.")
@Default.String(Dataflow.DEFAULT_ROOT_URL)
String getApiRootUrl();
void setApiRootUrl(String value);
/**
* Dataflow endpoint to use.
*
* <p>Defaults to the current version of the Google Cloud Dataflow
* API, at the time the current SDK version was released.
*
* <p>If the string contains "://", then this is treated as a URL,
* otherwise {@link #getApiRootUrl()} is used as the root
* URL.
*/
@Description("The URL for the Dataflow API. If the string contains \"://\", this"
+ " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
@Default.String(Dataflow.DEFAULT_SERVICE_PATH)
String getDataflowEndpoint();
void setDataflowEndpoint(String value);
/**
* The path to write the translated Dataflow job specification out to
* at job submission time. The Dataflow job specification will be represented in JSON
* format.
*/
@Description("The path to write the translated Dataflow job specification out to "
+ "at job submission time. The Dataflow job specification will be represented in JSON "
+ "format.")
String getDataflowJobFile();
void setDataflowJobFile(String value);
/**
* The class responsible for staging resources to be accessible by workers
* during job execution. If stager has not been set explicitly, an instance of this class
* will be created and used as the resource stager.
*/
@Description("The class of the stager that should be created and used to stage resources. "
+ "If stager has not been set explicitly, an instance of the this class will be created "
+ "and used as the resource stager.")
@Default.Class(GcsStager.class)
Class<? extends Stager> getStagerClass();
void setStagerClass(Class<? extends Stager> stagerClass);
/**
* The resource stager instance that should be used to stage resources.
* If no stager has been set explicitly, the default is to use the instance factory
* that constructs a resource stager based upon the currently set stagerClass.
*/
@JsonIgnore
@Description("The resource stager instance that should be used to stage resources. "
+ "If no stager has been set explicitly, the default is to use the instance factory "
+ "that constructs a resource stager based upon the currently set stagerClass.")
@Default.InstanceFactory(StagerFactory.class)
Stager getStager();
void setStager(Stager stager);
/**
* An instance of the Dataflow client. Defaults to creating a Dataflow client
* using the current set of options.
*/
@JsonIgnore
@Description("An instance of the Dataflow client. Defaults to creating a Dataflow client "
+ "using the current set of options.")
@Default.InstanceFactory(DataflowClientFactory.class)
Dataflow getDataflowClient();
void setDataflowClient(Dataflow value);
/** Returns the default Dataflow client built from the passed in PipelineOptions. */
class DataflowClientFactory implements DefaultValueFactory<Dataflow> {
@Override
public Dataflow create(PipelineOptions options) {
return DataflowTransport.newDataflowClient(
options.as(DataflowPipelineOptions.class)).build();
}
}
/**
* Mapping of old PTranform names to new ones, specified as JSON
* <code>{"oldName":"newName",...}</code>. To mark a transform as deleted, make newName the
* empty string.
*/
@JsonIgnore
@Description(
"Mapping of old PTranform names to new ones, specified as JSON "
+ "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
+ "string.")
Map<String, String> getTransformNameMapping();
void setTransformNameMapping(Map<String, String> value);
/**
* Custom windmill_main binary to use with the streaming runner.
*/
@Description("Custom windmill_main binary to use with the streaming runner")
String getOverrideWindmillBinary();
void setOverrideWindmillBinary(String value);
/**
* Custom windmill service endpoint.
*/
@Description("Custom windmill service endpoint.")
String getWindmillServiceEndpoint();
void setWindmillServiceEndpoint(String value);
@Description("Port for communicating with a remote windmill service.")
@Default.Integer(443)
int getWindmillServicePort();
void setWindmillServicePort(int value);
/**
* Number of threads to use on the Dataflow worker harness. If left unspecified,
* the Dataflow service will compute an appropriate number of threads to use.
*/
@Description("Number of threads to use on the Dataflow worker harness. If left unspecified, "
+ "the Dataflow service will compute an appropriate number of threads to use.")
int getNumberOfWorkerHarnessThreads();
void setNumberOfWorkerHarnessThreads(int value);
/**
* If {@literal true}, save a heap dump before killing a thread or process which is GC
* thrashing or out of memory. The location of the heap file will either be echoed back
* to the user, or the user will be given the opportunity to download the heap file.
*
* <p>CAUTION: Heap dumps can of comparable size to the default boot disk. Consider increasing
* the boot disk size before setting this flag to true.
*/
@Description("If {@literal true}, save a heap dump before killing a thread or process "
+ "which is GC thrashing or out of memory.")
boolean getDumpHeapOnOOM();
void setDumpHeapOnOOM(boolean dumpHeapBeforeExit);
/**
* Creates a {@link Stager} object using the class specified in
* {@link #getStagerClass()}.
*/
class StagerFactory implements DefaultValueFactory<Stager> {
@Override
public Stager create(PipelineOptions options) {
DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
return InstanceBuilder.ofType(Stager.class)
.fromClass(debugOptions.getStagerClass())
.fromFactoryMethod("fromOptions")
.withArg(PipelineOptions.class, options)
.build();
}
}
}