/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.extensions.gcp.options; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.isNullOrEmpty; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.api.client.http.HttpRequestInitializer; import com.google.api.client.util.BackOff; import com.google.api.client.util.Sleeper; import com.google.api.services.cloudresourcemanager.CloudResourceManager; import com.google.api.services.cloudresourcemanager.model.Project; import com.google.api.services.storage.model.Bucket; import com.google.auth.Credentials; import com.google.auth.http.HttpCredentialsAdapter; import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer; import com.google.cloud.hadoop.util.ResilientOperation; import com.google.cloud.hadoop.util.RetryDeterminer; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.io.Files; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.FileAlreadyExistsException; import java.security.GeneralSecurityException; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nullable; import org.apache.beam.sdk.extensions.gcp.auth.CredentialFactory; import org.apache.beam.sdk.extensions.gcp.auth.GcpCredentialFactory; import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer; import org.apache.beam.sdk.extensions.gcp.storage.PathValidator; import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.DefaultValueFactory; import org.apache.beam.sdk.options.Description; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.util.BackOffAdapter; import org.apache.beam.sdk.util.FluentBackoff; import org.apache.beam.sdk.util.InstanceBuilder; import org.apache.beam.sdk.util.RetryHttpRequestInitializer; import org.apache.beam.sdk.util.Transport; import org.apache.beam.sdk.util.gcsfs.GcsPath; import org.joda.time.Duration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Options used to configure Google Cloud Platform specific options such as the project * and credentials. * * <p>These options defer to the * <a href="https://developers.google.com/accounts/docs/application-default-credentials"> * application default credentials</a> for authentication. See the * <a href="https://github.com/google/google-auth-library-java">Google Auth Library</a> for * alternative mechanisms for creating credentials. */ @Description("Options used to configure Google Cloud Platform project and credentials.") public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions { /** * Project id to use when launching jobs. */ @Description("Project id. Required when using Google Cloud Platform services. " + "See https://cloud.google.com/storage/docs/projects for further details.") @Default.InstanceFactory(DefaultProjectFactory.class) String getProject(); void setProject(String value); /** * GCP <a href="https://developers.google.com/compute/docs/zones" * >availability zone</a> for operations. * * <p>Default is set on a per-service basis. */ @Description("GCP availability zone for running GCP operations. " + "Default is up to the individual service.") String getZone(); void setZone(String value); /** * The class of the credential factory that should be created and used to create * credentials. If gcpCredential has not been set explicitly, an instance of this class will * be constructed and used as a credential factory. */ @Description("The class of the credential factory that should be created and used to create " + "credentials. If gcpCredential has not been set explicitly, an instance of this class will " + "be constructed and used as a credential factory.") @Default.Class(GcpCredentialFactory.class) Class<? extends CredentialFactory> getCredentialFactoryClass(); void setCredentialFactoryClass( Class<? extends CredentialFactory> credentialFactoryClass); /** * The credential instance that should be used to authenticate against GCP services. * If no credential has been set explicitly, the default is to use the instance factory * that constructs a credential based upon the currently set credentialFactoryClass. */ @JsonIgnore @Description("The credential instance that should be used to authenticate against GCP services. " + "If no credential has been set explicitly, the default is to use the instance factory " + "that constructs a credential based upon the currently set credentialFactoryClass.") @Default.InstanceFactory(GcpUserCredentialsFactory.class) Credentials getGcpCredential(); void setGcpCredential(Credentials value); /** * Attempts to infer the default project based upon the environment this application * is executing within. Currently this only supports getting the default project from gcloud. */ class DefaultProjectFactory implements DefaultValueFactory<String> { private static final Logger LOG = LoggerFactory.getLogger(DefaultProjectFactory.class); @Override public String create(PipelineOptions options) { try { File configFile; if (getEnvironment().containsKey("CLOUDSDK_CONFIG")) { configFile = new File(getEnvironment().get("CLOUDSDK_CONFIG"), "properties"); } else if (isWindows() && getEnvironment().containsKey("APPDATA")) { configFile = new File(getEnvironment().get("APPDATA"), "gcloud/properties"); } else { // New versions of gcloud use this file configFile = new File( System.getProperty("user.home"), ".config/gcloud/configurations/config_default"); if (!configFile.exists()) { // Old versions of gcloud use this file configFile = new File(System.getProperty("user.home"), ".config/gcloud/properties"); } } String section = null; Pattern projectPattern = Pattern.compile("^project\\s*=\\s*(.*)$"); Pattern sectionPattern = Pattern.compile("^\\[(.*)\\]$"); for (String line : Files.readLines(configFile, StandardCharsets.UTF_8)) { line = line.trim(); if (line.isEmpty() || line.startsWith(";")) { continue; } Matcher matcher = sectionPattern.matcher(line); if (matcher.matches()) { section = matcher.group(1); } else if (section == null || section.equals("core")) { matcher = projectPattern.matcher(line); if (matcher.matches()) { String project = matcher.group(1).trim(); LOG.info("Inferred default GCP project '{}' from gcloud. If this is the incorrect " + "project, please cancel this Pipeline and specify the command-line " + "argument --project.", project); return project; } } } } catch (IOException expected) { LOG.debug("Failed to find default project.", expected); } // return null if can't determine return null; } /** * Returns true if running on the Windows OS. */ private static boolean isWindows() { return System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows"); } /** * Used to mock out getting environment variables. */ @VisibleForTesting Map<String, String> getEnvironment() { return System.getenv(); } } /** * Attempts to load the GCP credentials. See * {@link CredentialFactory#getCredential()} for more details. */ class GcpUserCredentialsFactory implements DefaultValueFactory<Credentials> { @Override public Credentials create(PipelineOptions options) { GcpOptions gcpOptions = options.as(GcpOptions.class); try { CredentialFactory factory = InstanceBuilder.ofType(CredentialFactory.class) .fromClass(gcpOptions.getCredentialFactoryClass()) .fromFactoryMethod("fromOptions") .withArg(PipelineOptions.class, options) .build(); return factory.getCredential(); } catch (IOException | GeneralSecurityException e) { throw new RuntimeException("Unable to obtain credential", e); } } } /** * A GCS path for storing temporary files in GCP. * * <p>Its default to {@link PipelineOptions#getTempLocation}. */ @Description("A GCS path for storing temporary files in GCP.") @Default.InstanceFactory(GcpTempLocationFactory.class) @Nullable String getGcpTempLocation(); void setGcpTempLocation(String value); /** * Returns {@link PipelineOptions#getTempLocation} as the default GCP temp location. */ class GcpTempLocationFactory implements DefaultValueFactory<String> { private static final FluentBackoff BACKOFF_FACTORY = FluentBackoff.DEFAULT.withMaxRetries(3).withInitialBackoff(Duration.millis(200)); static final String DEFAULT_REGION = "us-central1"; static final Logger LOG = LoggerFactory.getLogger(GcpTempLocationFactory.class); @Override @Nullable public String create(PipelineOptions options) { String tempLocation = options.getTempLocation(); if (isNullOrEmpty(tempLocation)) { tempLocation = tryCreateDefaultBucket(options, newCloudResourceManagerClient(options.as(CloudResourceManagerOptions.class)).build()); options.setTempLocation(tempLocation); } else { try { PathValidator validator = options.as(GcsOptions.class).getPathValidator(); validator.validateOutputFilePrefixSupported(tempLocation); } catch (Exception e) { throw new IllegalArgumentException(String.format( "Error constructing default value for gcpTempLocation: tempLocation is not" + " a valid GCS path, %s. ", tempLocation), e); } } return tempLocation; } /** * Creates a default bucket or verifies the existence and proper access control * of an existing default bucket. Returns the location if successful. */ @VisibleForTesting static String tryCreateDefaultBucket( PipelineOptions options, CloudResourceManager crmClient) { GcsOptions gcpOptions = options.as(GcsOptions.class); final String projectId = gcpOptions.getProject(); checkArgument(!isNullOrEmpty(projectId), "--project is a required option."); // Look up the project number, to create a default bucket with a stable // name with no special characters. long projectNumber = 0L; try { projectNumber = getProjectNumber(projectId, crmClient); } catch (IOException e) { throw new RuntimeException("Unable to verify project with ID " + projectId, e); } String region = DEFAULT_REGION; if (!isNullOrEmpty(gcpOptions.getZone())) { region = getRegionFromZone(gcpOptions.getZone()); } final String bucketName = "dataflow-staging-" + region + "-" + projectNumber; LOG.info("No tempLocation specified, attempting to use default bucket: {}", bucketName); Bucket bucket = new Bucket() .setName(bucketName) .setLocation(region); // Always try to create the bucket before checking access, so that we do not // race with other pipelines that may be attempting to do the same thing. try { gcpOptions.getGcsUtil().createBucket(projectId, bucket); } catch (FileAlreadyExistsException e) { LOG.debug("Bucket '{}'' already exists, verifying access.", bucketName); } catch (IOException e) { throw new RuntimeException("Unable create default bucket.", e); } // Once the bucket is expected to exist, verify that it is correctly owned // by the project executing the job. try { long owner = gcpOptions.getGcsUtil().bucketOwner( GcsPath.fromComponents(bucketName, "")); checkArgument( owner == projectNumber, "Bucket owner does not match the project from --project:" + " %s vs. %s", owner, projectNumber); } catch (IOException e) { throw new RuntimeException( "Unable to determine the owner of the default bucket at gs://" + bucketName, e); } return "gs://" + bucketName + "/temp/"; } /** * Returns the project number or throws an exception if the project does not * exist or has other access exceptions. */ private static long getProjectNumber( String projectId, CloudResourceManager crmClient) throws IOException { return getProjectNumber( projectId, crmClient, BackOffAdapter.toGcpBackOff(BACKOFF_FACTORY.backoff()), Sleeper.DEFAULT); } /** * Returns the project number or throws an error if the project does not * exist or has other access errors. */ private static long getProjectNumber( String projectId, CloudResourceManager crmClient, BackOff backoff, Sleeper sleeper) throws IOException { CloudResourceManager.Projects.Get getProject = crmClient.projects().get(projectId); try { Project project = ResilientOperation.retry( ResilientOperation.getGoogleRequestCallable(getProject), backoff, RetryDeterminer.SOCKET_ERRORS, IOException.class, sleeper); return project.getProjectNumber(); } catch (Exception e) { throw new IOException("Unable to get project number", e); } } @VisibleForTesting static String getRegionFromZone(String zone) { String[] zoneParts = zone.split("-"); checkArgument(zoneParts.length >= 2, "Invalid zone provided: %s", zone); return zoneParts[0] + "-" + zoneParts[1]; } /** * Returns a CloudResourceManager client builder using the specified * {@link CloudResourceManagerOptions}. */ @VisibleForTesting static CloudResourceManager.Builder newCloudResourceManagerClient( CloudResourceManagerOptions options) { Credentials credentials = options.getGcpCredential(); if (credentials == null) { NullCredentialInitializer.throwNullCredentialException(); } return new CloudResourceManager.Builder(Transport.getTransport(), Transport.getJsonFactory(), chainHttpRequestInitializer( credentials, // Do not log 404. It clutters the output and is possibly even required by the caller. new RetryHttpRequestInitializer(ImmutableList.of(404)))) .setApplicationName(options.getAppName()) .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); } private static HttpRequestInitializer chainHttpRequestInitializer( Credentials credential, HttpRequestInitializer httpRequestInitializer) { if (credential == null) { return new ChainingHttpRequestInitializer( new NullCredentialInitializer(), httpRequestInitializer); } else { return new ChainingHttpRequestInitializer( new HttpCredentialsAdapter(credential), httpRequestInitializer); } } } }