/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.config.store.hdfs;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Strings;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import gobblin.config.store.api.ConfigStoreCreationException;
import gobblin.config.store.api.ConfigStoreFactory;
/**
* An implementation of {@link ConfigStoreFactory} for creating {@link SimpleHDFSConfigStore}s. This class only works
* the physical scheme {@link #HDFS_SCHEME_NAME}.
*
* @see SimpleHDFSConfigStore
*/
public class SimpleHDFSConfigStoreFactory implements ConfigStoreFactory<SimpleHDFSConfigStore> {
protected static final String SIMPLE_HDFS_SCHEME_PREFIX = "simple-";
protected static final String HDFS_SCHEME_NAME = "hdfs";
/** Global namespace for properties if no scope is used */
public static final String DEFAULT_CONFIG_NAMESPACE = SimpleHDFSConfigStoreFactory.class.getName();
/** Scoped configuration properties */
public static final String DEFAULT_STORE_URI_KEY = "default_store_uri";
private final Optional<URI> defaultStoreURI;
private final FileSystem defaultStoreFS;
/** Instantiates a new instance using standard typesafe config defaults:
* {@link ConfigFactory#load()} */
public SimpleHDFSConfigStoreFactory() {
this(ConfigFactory.load().getConfig(DEFAULT_CONFIG_NAMESPACE));
}
/**
* Instantiates a new instance of the factory with the specified config. The configuration is
* expected to be scoped, i.e. the properties should not be prefixed.
*/
public SimpleHDFSConfigStoreFactory(Config factoryConfig) {
try {
if (factoryConfig.hasPath(DEFAULT_STORE_URI_KEY)) {
String uri = factoryConfig.getString(DEFAULT_STORE_URI_KEY);
if (Strings.isNullOrEmpty(uri)) {
throw new IllegalArgumentException("Default store URI should be non-empty!");
}
Path defStorePath = new Path(uri);
this.defaultStoreFS = defStorePath.getFileSystem(new Configuration());
this.defaultStoreURI = Optional.of(this.defaultStoreFS.makeQualified(defStorePath).toUri());
if (!isValidStoreRootPath(this.defaultStoreFS, defStorePath)) {
throw new IllegalArgumentException("Path does not appear to be a config store root: " + this.defaultStoreFS);
}
} else {
this.defaultStoreFS = FileSystem.get(new Configuration());
Path candidateStorePath = getDefaultRootDir();
if (candidateStorePath != null && isValidStoreRootPath(this.defaultStoreFS, candidateStorePath)) {
this.defaultStoreURI = Optional.of(this.defaultStoreFS.makeQualified(candidateStorePath).toUri());
} else {
this.defaultStoreURI = Optional.absent();
}
}
} catch (IOException e) {
throw new RuntimeException("Unable to initialize Hadoop FS store factory:" + e, e);
}
}
private static boolean isValidStoreRootPath(FileSystem fs, Path storeRootPath) throws IOException {
Path storeRoot = new Path(storeRootPath, SimpleHDFSConfigStore.CONFIG_STORE_NAME);
return fs.exists(storeRoot);
}
@Override
public String getScheme() {
return getSchemePrefix() + getPhysicalScheme();
}
/**
* Creates a {@link SimpleHDFSConfigStore} for the given {@link URI}. The {@link URI} specified should be the fully
* qualified path to the dataset in question. For example,
* {@code simple-hdfs://[authority]:[port][path-to-config-store][path-to-dataset]}. It is important to note that the
* path to the config store on HDFS must also be specified. The combination
* {@code [path-to-config-store][path-to-dataset]} need not specify an actual {@link Path} on HDFS.
*
* <p>
* If the {@link URI} does not contain an authority, a default authority and root directory are provided. The
* default authority is taken from the NameNode {@link URI} the current process is co-located with. The default path
* is "/user/[current-user]/".
* </p>
*
* @param configKey The URI of the config key that needs to be accessed.
*
* @return a {@link SimpleHDFSConfigStore} configured with the the given {@link URI}.
*
* @throws ConfigStoreCreationException if the {@link SimpleHDFSConfigStore} could not be created.
*/
@Override
public SimpleHDFSConfigStore createConfigStore(URI configKey) throws ConfigStoreCreationException {
FileSystem fs = createFileSystem(configKey);
URI physicalStoreRoot = getStoreRoot(fs, configKey);
URI logicalStoreRoot = URI.create(getSchemePrefix() + physicalStoreRoot);
return new SimpleHDFSConfigStore(fs, physicalStoreRoot, logicalStoreRoot);
}
protected String getSchemePrefix() {
return SIMPLE_HDFS_SCHEME_PREFIX;
}
/**
* Returns the physical scheme this {@link ConfigStoreFactory} is responsible for. To support new HDFS
* {@link FileSystem} implementations, subclasses should override this method.
*/
protected String getPhysicalScheme() {
return this.defaultStoreFS.getUri().getScheme();
}
/**
* Gets a default root directory if one is not specified. The default root dir is {@code /jobs/[current-user]/}.
*/
protected Path getDefaultRootDir() throws IOException {
return this.defaultStoreFS.getHomeDirectory();
}
/**
* Gets a default authority if one is not specified. The default authority is the authority
* configured as part of the {@link #DEFAULT_STORE_URI_KEY} configuration setting or the {@link FileSystem}
* the current process is running if the setting is missing.. For example, when running on a
* HDFS node, the authority will taken from the NameNode {@link URI}.
*/
private String getDefaultAuthority() throws IOException {
return this.defaultStoreFS.getUri().getAuthority();
}
/**
* Creates a {@link FileSystem} given a user specified configKey.
*/
private FileSystem createFileSystem(URI configKey) throws ConfigStoreCreationException {
try {
return FileSystem.get(createFileSystemURI(configKey), new Configuration());
} catch (IOException | URISyntaxException e) {
throw new ConfigStoreCreationException(configKey, e);
}
}
/**
* Creates a Hadoop FS {@link URI} given a user-specified configKey. If the given configKey does not have an authority,
* a default one is used instead, provided by the {@link #getDefaultAuthority()} method.
*/
private URI createFileSystemURI(URI configKey) throws URISyntaxException, IOException {
// Validate the scheme
String configKeyScheme = configKey.getScheme();
if (!configKeyScheme.startsWith(getSchemePrefix())) {
throw new IllegalArgumentException(
String.format("Scheme for configKey \"%s\" must begin with \"%s\"!", configKey, getSchemePrefix()));
}
if (Strings.isNullOrEmpty(configKey.getAuthority())) {
return new URI(getPhysicalScheme(), getDefaultAuthority(), "", "", "");
}
String uriPhysicalScheme = configKeyScheme.substring(getSchemePrefix().length(), configKeyScheme.length());
return new URI(uriPhysicalScheme, configKey.getAuthority(), "", "", "");
}
/**
* This method determines the physical location of the {@link SimpleHDFSConfigStore} root directory on HDFS. It does
* this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory
* contains the folder {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root
* directory.
*
* <p>
* If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does
* not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If
* the default root dir does not contain the {@link SimpleHDFSConfigStore#CONFIG_STORE_NAME} then a
* {@link ConfigStoreCreationException} is thrown.
* </p>
*/
private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException {
if (Strings.isNullOrEmpty(configKey.getAuthority())) {
if (hasDefaultStoreURI()) {
return this.defaultStoreURI.get();
} else if (isAuthorityRequired()) {
throw new ConfigStoreCreationException(configKey, "No default store has been configured.");
}
}
Path path = new Path(configKey.getPath());
while (path != null) {
try {
// the abs URI may point to an unexist path for
// 1. phantom node
// 2. as URI did not specify the version
if (fs.exists(path)) {
for (FileStatus fileStatus : fs.listStatus(path)) {
if (fileStatus.isDirectory()
&& fileStatus.getPath().getName().equals(SimpleHDFSConfigStore.CONFIG_STORE_NAME)) {
return fs.getUri().resolve(fileStatus.getPath().getParent().toUri());
}
}
}
} catch (IOException e) {
throw new ConfigStoreCreationException(configKey, e);
}
path = path.getParent();
}
throw new ConfigStoreCreationException(configKey, "Cannot find the store root!");
}
protected boolean isAuthorityRequired() {
return true;
}
@VisibleForTesting
boolean hasDefaultStoreURI() {
return this.defaultStoreURI.isPresent();
}
@VisibleForTesting
URI getDefaultStoreURI() {
return this.defaultStoreURI.get();
}
}