/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.hadoop.variant;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.config.DatabaseCredentials;
import org.opencb.opencga.storage.core.config.StorageEngineConfiguration;
import org.opencb.opencga.storage.core.config.StorageEtlConfiguration;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.exceptions.StoragePipelineException;
import org.opencb.opencga.storage.core.metadata.BatchFileOperation;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.VariantStoragePipeline;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsManager;
import org.opencb.opencga.storage.hadoop.auth.HBaseCredentials;
import org.opencb.opencga.storage.hadoop.utils.HBaseManager;
import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor;
import org.opencb.opencga.storage.hadoop.variant.annotation.HadoopDefaultVariantAnnotationManager;
import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveDriver;
import org.opencb.opencga.storage.hadoop.variant.executors.ExternalMRExecutor;
import org.opencb.opencga.storage.hadoop.variant.executors.MRExecutor;
import org.opencb.opencga.storage.hadoop.variant.index.AbstractVariantTableDriver;
import org.opencb.opencga.storage.hadoop.variant.index.VariantTableDeletionDriver;
import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseStudyConfigurationManager;
import org.opencb.opencga.storage.hadoop.variant.stats.HadoopDefaultVariantStatisticsManager;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.*;
import java.util.zip.GZIPInputStream;
/**
* Created by mh719 on 16/06/15.
*/
public class HadoopVariantStorageEngine extends VariantStorageEngine {
public static final String STORAGE_ENGINE_ID = "hadoop";
public static final String HADOOP_BIN = "hadoop.bin";
public static final String HADOOP_ENV = "hadoop.env";
public static final String OPENCGA_STORAGE_HADOOP_JAR_WITH_DEPENDENCIES = "opencga.storage.hadoop.jar-with-dependencies";
public static final String HADOOP_LOAD_ARCHIVE = "hadoop.load.archive";
public static final String HADOOP_LOAD_VARIANT = "hadoop.load.variant";
// Resume merge variants if the current status is RUNNING or DONE
/**
* @deprecated use {@link Options#RESUME}
*/
@Deprecated
public static final String HADOOP_LOAD_VARIANT_RESUME = "hadoop.load.variant.resume";
// Merge variants operation status. Skip merge and run post-load/post-merge step if status is DONE
public static final String HADOOP_LOAD_VARIANT_STATUS = "hadoop.load.variant.status";
//Other files to be loaded from Archive to Variant
public static final String HADOOP_LOAD_VARIANT_PENDING_FILES = "opencga.storage.hadoop.load.pending.files";
public static final String OPENCGA_STORAGE_HADOOP_INTERMEDIATE_HDFS_DIRECTORY = "opencga.storage.hadoop.intermediate.hdfs.directory";
public static final String OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE = "opencga.storage.hadoop.variant.hbase.namespace";
public static final String OPENCGA_STORAGE_HADOOP_VARIANT_ARCHIVE_TABLE_PREFIX = "opencga.storage.hadoop.variant.archive.table.prefix";
public static final String OPENCGA_STORAGE_HADOOP_MAPREDUCE_SCANNER_TIMEOUT = "opencga.storage.hadoop.mapreduce.scanner.timeout";
public static final String HADOOP_LOAD_ARCHIVE_BATCH_SIZE = "hadoop.load.archive.batch.size";
public static final String HADOOP_LOAD_VARIANT_BATCH_SIZE = "hadoop.load.variant.batch.size";
public static final String HADOOP_LOAD_DIRECT = "hadoop.load.direct";
public static final boolean HADOOP_LOAD_DIRECT_DEFAULT = true;
public static final String EXTERNAL_MR_EXECUTOR = "opencga.external.mr.executor";
public static final String ARCHIVE_TABLE_PREFIX = "opencga_study_";
protected Configuration conf = null;
protected MRExecutor mrExecutor;
private HdfsVariantReaderUtils variantReaderUtils;
private HBaseManager hBaseManager;
public HadoopVariantStorageEngine() {
// variantReaderUtils = new HdfsVariantReaderUtils(conf);
}
@Override
public List<StoragePipelineResult> index(List<URI> inputFiles, URI outdirUri, boolean doExtract, boolean doTransform, boolean doLoad)
throws StorageEngineException {
if (inputFiles.size() == 1 || !doLoad) {
return super.index(inputFiles, outdirUri, doExtract, doTransform, doLoad);
}
final boolean doArchive;
final boolean doMerge;
if (!getOptions().containsKey(HADOOP_LOAD_ARCHIVE) && !getOptions().containsKey(HADOOP_LOAD_VARIANT)) {
doArchive = true;
doMerge = true;
} else {
doArchive = getOptions().getBoolean(HADOOP_LOAD_ARCHIVE, false);
doMerge = getOptions().getBoolean(HADOOP_LOAD_VARIANT, false);
}
if (!doArchive && !doMerge) {
return Collections.emptyList();
}
final int nThreadArchive = getOptions().getInt(HADOOP_LOAD_ARCHIVE_BATCH_SIZE, 2);
ObjectMap extraOptions = new ObjectMap()
.append(HADOOP_LOAD_ARCHIVE, true)
.append(HADOOP_LOAD_VARIANT, false);
final List<StoragePipelineResult> concurrResult = new CopyOnWriteArrayList<>();
List<VariantStoragePipeline> etlList = new ArrayList<>();
ExecutorService executorService = Executors.newFixedThreadPool(
nThreadArchive,
r -> {
Thread t = new Thread(r);
t.setDaemon(true);
return t;
}); // Set Daemon for quick shutdown !!!
LinkedList<Future<StoragePipelineResult>> futures = new LinkedList<>();
List<Integer> indexedFiles = new CopyOnWriteArrayList<>();
for (URI inputFile : inputFiles) {
//Provide a connected storageETL if load is required.
VariantStoragePipeline storageETL = newStorageETL(doLoad, new ObjectMap(extraOptions));
futures.add(executorService.submit(() -> {
try {
Thread.currentThread().setName(Paths.get(inputFile).getFileName().toString());
StoragePipelineResult storagePipelineResult = new StoragePipelineResult(inputFile);
URI nextUri = inputFile;
boolean error = false;
if (doTransform) {
try {
nextUri = transformFile(storageETL, storagePipelineResult, concurrResult, nextUri, outdirUri);
} catch (StoragePipelineException ignore) {
//Ignore here. Errors are stored in the ETLResult
error = true;
}
}
if (doLoad && doArchive && !error) {
try {
loadFile(storageETL, storagePipelineResult, concurrResult, nextUri, outdirUri);
} catch (StoragePipelineException ignore) {
//Ignore here. Errors are stored in the ETLResult
error = true;
}
}
if (doLoad && !error) {
// Read the VariantSource to get the original fileName (it may be different from the
// nextUri.getFileName if this is the transformed file)
String fileName = storageETL.readVariantSource(nextUri, null).getFileName();
// Get latest study configuration from DB, might have been changed since
StudyConfiguration studyConfiguration = storageETL.getStudyConfiguration();
// Get file ID for the provided file name
Integer fileId = studyConfiguration.getFileIds().get(fileName);
indexedFiles.add(fileId);
}
return storagePipelineResult;
} finally {
try {
storageETL.close();
} catch (StorageEngineException e) {
logger.error("Issue closing DB connection ", e);
}
}
}));
}
executorService.shutdown();
int errors = 0;
try {
while (!futures.isEmpty()) {
executorService.awaitTermination(1, TimeUnit.MINUTES);
// Check values
if (futures.peek().isDone() || futures.peek().isCancelled()) {
Future<StoragePipelineResult> first = futures.pop();
StoragePipelineResult result = first.get(1, TimeUnit.MINUTES);
if (result.getTransformError() != null) {
//TODO: Handle errors. Retry?
errors++;
result.getTransformError().printStackTrace();
} else if (result.getLoadError() != null) {
//TODO: Handle errors. Retry?
errors++;
result.getLoadError().printStackTrace();
}
concurrResult.add(result);
}
}
if (errors > 0) {
throw new StoragePipelineException("Errors found", concurrResult);
}
if (doLoad && doMerge) {
int batchMergeSize = getOptions().getInt(HADOOP_LOAD_VARIANT_BATCH_SIZE, 10);
// Overwrite default ID list with user provided IDs
List<Integer> pendingFiles = indexedFiles;
if (getOptions().containsKey(HADOOP_LOAD_VARIANT_PENDING_FILES)) {
List<Integer> idList = getOptions().getAsIntegerList(HADOOP_LOAD_VARIANT_PENDING_FILES);
if (!idList.isEmpty()) {
// only if the list is not empty
pendingFiles = idList;
}
}
List<Integer> filesToMerge = new ArrayList<>(batchMergeSize);
int i = 0;
for (Iterator<Integer> iterator = pendingFiles.iterator(); iterator.hasNext(); i++) {
Integer indexedFile = iterator.next();
filesToMerge.add(indexedFile);
if (filesToMerge.size() == batchMergeSize || !iterator.hasNext()) {
extraOptions = new ObjectMap()
.append(HADOOP_LOAD_ARCHIVE, false)
.append(HADOOP_LOAD_VARIANT, true)
.append(HADOOP_LOAD_VARIANT_PENDING_FILES, filesToMerge);
AbstractHadoopVariantStoragePipeline localEtl = newStorageETL(doLoad, extraOptions);
int studyId = getOptions().getInt(Options.STUDY_ID.key());
localEtl.preLoad(inputFiles.get(i), outdirUri);
localEtl.merge(studyId, filesToMerge);
localEtl.postLoad(inputFiles.get(i), outdirUri);
filesToMerge.clear();
}
}
annotateLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions());
calculateStatsForLoadedFiles(outdirUri, inputFiles, concurrResult, getOptions());
}
} catch (InterruptedException e) {
Thread.interrupted();
throw new StoragePipelineException("Interrupted!", e, concurrResult);
} catch (ExecutionException e) {
throw new StoragePipelineException("Execution exception!", e, concurrResult);
} catch (TimeoutException e) {
throw new StoragePipelineException("Timeout Exception", e, concurrResult);
} finally {
if (!executorService.isShutdown()) {
try {
executorService.shutdownNow();
} catch (Exception e) {
logger.error("Problems shutting executer service down", e);
}
}
}
return concurrResult;
}
@Override
public AbstractHadoopVariantStoragePipeline newStoragePipeline(boolean connected) throws StorageEngineException {
return newStorageETL(connected, null);
}
@Override
protected VariantAnnotationManager newVariantAnnotationManager(VariantAnnotator annotator, VariantDBAdaptor dbAdaptor) {
return new HadoopDefaultVariantAnnotationManager(annotator, dbAdaptor);
}
@Override
public VariantStatisticsManager newVariantStatisticsManager(VariantDBAdaptor dbAdaptor) {
return new HadoopDefaultVariantStatisticsManager(dbAdaptor);
}
public AbstractHadoopVariantStoragePipeline newStorageETL(boolean connected, Map<? extends String, ?> extraOptions)
throws StorageEngineException {
ObjectMap options = new ObjectMap(configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions());
if (extraOptions != null) {
options.putAll(extraOptions);
}
boolean directLoad = options.getBoolean(HADOOP_LOAD_DIRECT, HADOOP_LOAD_DIRECT_DEFAULT);
VariantHadoopDBAdaptor dbAdaptor = connected ? getDBAdaptor() : null;
Configuration hadoopConfiguration = null == dbAdaptor ? null : dbAdaptor.getConfiguration();
hadoopConfiguration = hadoopConfiguration == null ? getHadoopConfiguration(options) : hadoopConfiguration;
hadoopConfiguration.setIfUnset(ArchiveDriver.CONFIG_ARCHIVE_TABLE_COMPRESSION, Algorithm.SNAPPY.getName());
HBaseCredentials archiveCredentials = buildCredentials(getArchiveTableName(options.getInt(Options.STUDY_ID.key()), options));
AbstractHadoopVariantStoragePipeline storageETL = null;
if (directLoad) {
storageETL = new HadoopDirectVariantStoragePipeline(configuration, storageEngineId, dbAdaptor, getMRExecutor(options),
hadoopConfiguration, archiveCredentials, getVariantReaderUtils(hadoopConfiguration), options);
} else {
storageETL = new HadoopVariantStoragePipeline(configuration, storageEngineId, dbAdaptor, getMRExecutor(options),
hadoopConfiguration, archiveCredentials, getVariantReaderUtils(hadoopConfiguration), options);
}
return storageETL;
}
public HdfsVariantReaderUtils getVariantReaderUtils() {
return getVariantReaderUtils(conf);
}
private HdfsVariantReaderUtils getVariantReaderUtils(Configuration config) {
if (null == variantReaderUtils) {
variantReaderUtils = new HdfsVariantReaderUtils(config);
} else if (this.variantReaderUtils.conf == null && config != null) {
variantReaderUtils = new HdfsVariantReaderUtils(config);
}
return variantReaderUtils;
}
@Override
public void dropFile(String study, int fileId) throws StorageEngineException {
ObjectMap options = configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant().getOptions();
// Use ETL as helper class
AbstractHadoopVariantStoragePipeline etl = newStoragePipeline(true);
VariantDBAdaptor dbAdaptor = etl.getDBAdaptor();
StudyConfiguration studyConfiguration;
StudyConfigurationManager scm = dbAdaptor.getStudyConfigurationManager();
List<Integer> fileList = Collections.singletonList(fileId);
final int studyId;
if (StringUtils.isNumeric(study)) {
studyId = Integer.parseInt(study);
} else {
studyConfiguration = scm.getStudyConfiguration(study, null).first();
studyId = studyConfiguration.getStudyId();
}
// Pre delete
long lock = scm.lockStudy(studyId);
try {
studyConfiguration = scm.getStudyConfiguration(studyId, null).first();
if (!studyConfiguration.getIndexedFiles().contains(fileId)) {
throw StorageEngineException.unableToExecute("File not indexed.", fileId, studyConfiguration);
}
boolean resume = options.getBoolean(Options.RESUME.key(), Options.RESUME.defaultValue())
|| options.getBoolean(HadoopVariantStorageEngine.HADOOP_LOAD_VARIANT_RESUME, false);
BatchFileOperation operation =
etl.addBatchOperation(studyConfiguration, VariantTableDeletionDriver.JOB_OPERATION_NAME, fileList, resume,
BatchFileOperation.Type.REMOVE);
options.put(AbstractVariantTableDriver.TIMESTAMP, operation.getTimestamp());
scm.updateStudyConfiguration(studyConfiguration, null);
} finally {
scm.unLockStudy(studyId, lock);
}
// Delete
Thread hook = etl.newShutdownHook(VariantTableDeletionDriver.JOB_OPERATION_NAME, fileList);
try {
Runtime.getRuntime().addShutdownHook(hook);
String archiveTable = getArchiveTableName(studyId, options);
HBaseCredentials variantsTable = getDbCredentials();
String hadoopRoute = options.getString(HADOOP_BIN, "hadoop");
String jar = AbstractHadoopVariantStoragePipeline.getJarWithDependencies(options);
Class execClass = VariantTableDeletionDriver.class;
String args = VariantTableDeletionDriver.buildCommandLineArgs(variantsTable.toString(), archiveTable,
variantsTable.getTable(), studyId, fileList, options);
String executable = hadoopRoute + " jar " + jar + ' ' + execClass.getName();
long startTime = System.currentTimeMillis();
logger.info("------------------------------------------------------");
logger.info("Remove file ID {} in archive '{}' and analysis table '{}'", fileId, archiveTable, variantsTable.getTable());
logger.debug(executable + " " + args);
logger.info("------------------------------------------------------");
int exitValue = getMRExecutor(options).run(executable, args);
logger.info("------------------------------------------------------");
logger.info("Exit value: {}", exitValue);
logger.info("Total time: {}s", (System.currentTimeMillis() - startTime) / 1000.0);
if (exitValue != 0) {
throw new StorageEngineException("Error removing fileId " + fileId + " from tables ");
}
// Post Delete
// If everything went fine, remove file column from Archive table and from studyconfig
lock = scm.lockStudy(studyId);
try {
studyConfiguration = scm.getStudyConfiguration(studyId, null).first();
etl.secureSetStatus(studyConfiguration, BatchFileOperation.Status.READY,
VariantTableDeletionDriver.JOB_OPERATION_NAME, fileList);
studyConfiguration.getIndexedFiles().remove(fileId);
scm.updateStudyConfiguration(studyConfiguration, null);
} finally {
scm.unLockStudy(studyId, lock);
}
} catch (Exception e) {
etl.setStatus(BatchFileOperation.Status.ERROR, VariantTableDeletionDriver.JOB_OPERATION_NAME, fileList);
throw e;
} finally {
Runtime.getRuntime().removeShutdownHook(hook);
}
}
@Override
public void dropStudy(String studyName) throws StorageEngineException {
throw new UnsupportedOperationException("Unimplemented");
}
@Override
public VariantHadoopDBAdaptor getDBAdaptor(String tableName) throws StorageEngineException {
tableName = getVariantTableName(tableName);
return getDBAdaptor(buildCredentials(tableName));
}
private HBaseCredentials getDbCredentials() throws StorageEngineException {
String table = getVariantTableName();
return buildCredentials(table);
}
@Override
public VariantHadoopDBAdaptor getDBAdaptor() throws StorageEngineException {
return getDBAdaptor(getDbCredentials());
}
protected VariantHadoopDBAdaptor getDBAdaptor(HBaseCredentials credentials) throws StorageEngineException {
try {
StorageEngineConfiguration storageEngine = this.configuration.getStorageEngine(STORAGE_ENGINE_ID);
Configuration configuration = getHadoopConfiguration(storageEngine.getVariant().getOptions());
configuration = VariantHadoopDBAdaptor.getHbaseConfiguration(configuration, credentials);
return new VariantHadoopDBAdaptor(getHBaseManager(configuration).getConnection(), credentials,
this.configuration, configuration);
} catch (IOException e) {
throw new StorageEngineException("Problems creating DB Adapter", e);
}
}
private synchronized HBaseManager getHBaseManager(Configuration configuration) {
if (hBaseManager == null) {
hBaseManager = new HBaseManager(configuration);
}
return hBaseManager;
}
public void close() throws IOException {
if (hBaseManager != null) {
hBaseManager.close();
hBaseManager = null;
}
}
public HBaseCredentials buildCredentials(String table) throws StorageEngineException {
StorageEtlConfiguration vStore = configuration.getStorageEngine(STORAGE_ENGINE_ID).getVariant();
DatabaseCredentials db = vStore.getDatabase();
String user = db.getUser();
String pass = db.getPassword();
List<String> hostList = db.getHosts();
if (hostList != null && hostList.size() > 1) {
throw new IllegalStateException("Expect only one server name");
}
String target = hostList != null && !hostList.isEmpty() ? hostList.get(0) : null;
try {
String server;
Integer port;
String zookeeperPath;
if (target == null || target.isEmpty()) {
Configuration conf = getHadoopConfiguration(getOptions());
server = conf.get(HConstants.ZOOKEEPER_QUORUM);
port = 60000;
zookeeperPath = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT);
} else {
URI uri;
try {
uri = new URI(target);
} catch (URISyntaxException e) {
try {
uri = new URI("hbase://" + target);
} catch (URISyntaxException e1) {
throw e;
}
}
server = uri.getHost();
port = uri.getPort() > 0 ? uri.getPort() : 60000;
// If just an IP or host name is provided, the URI parser will return empty host, and the content as "path". Avoid that
if (server == null) {
server = uri.getPath();
zookeeperPath = null;
} else {
zookeeperPath = uri.getPath();
}
}
HBaseCredentials credentials;
if (!StringUtils.isBlank(zookeeperPath)) {
credentials = new HBaseCredentials(server, table, user, pass, port, zookeeperPath);
} else {
credentials = new HBaseCredentials(server, table, user, pass, port);
}
return credentials;
} catch (URISyntaxException e) {
throw new IllegalStateException(e);
}
}
@Override
protected StudyConfigurationManager buildStudyConfigurationManager(ObjectMap options) throws StorageEngineException {
try {
HBaseCredentials dbCredentials = getDbCredentials();
Configuration configuration = VariantHadoopDBAdaptor.getHbaseConfiguration(getHadoopConfiguration(options), dbCredentials);
return new HBaseStudyConfigurationManager(dbCredentials.getTable(), configuration, options);
} catch (IOException e) {
e.printStackTrace();
return super.buildStudyConfigurationManager(options);
}
}
private Configuration getHadoopConfiguration(ObjectMap options) throws StorageEngineException {
Configuration conf = this.conf == null ? HBaseConfiguration.create() : this.conf;
// This is the only key needed to connect to HDFS:
// CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY = fs.defaultFS
//
if (conf.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) == null) {
throw new StorageEngineException("Missing configuration parameter \""
+ CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY + "\"");
}
options.entrySet().stream()
.filter(entry -> entry.getValue() != null)
.forEach(entry -> conf.set(entry.getKey(), options.getString(entry.getKey())));
return conf;
}
public MRExecutor getMRExecutor(ObjectMap options) {
if (options.containsKey(EXTERNAL_MR_EXECUTOR)) {
Class<? extends MRExecutor> aClass;
if (options.get(EXTERNAL_MR_EXECUTOR) instanceof Class) {
aClass = options.get(EXTERNAL_MR_EXECUTOR, Class.class).asSubclass(MRExecutor.class);
} else {
try {
aClass = Class.forName(options.getString(EXTERNAL_MR_EXECUTOR)).asSubclass(MRExecutor.class);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
try {
return aClass.newInstance();
} catch (InstantiationException | IllegalAccessException e) {
throw new RuntimeException(e);
}
} else if (mrExecutor == null) {
return new ExternalMRExecutor(options);
} else {
return mrExecutor;
}
}
/**
* Get the archive table name given a StudyId.
*
* @param studyId Numerical study identifier
* @return Table name
*/
public String getArchiveTableName(int studyId) {
String prefix = getOptions().getString(OPENCGA_STORAGE_HADOOP_VARIANT_ARCHIVE_TABLE_PREFIX);
if (StringUtils.isEmpty(prefix)) {
prefix = ARCHIVE_TABLE_PREFIX;
}
return buildTableName(getOptions().getString(OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE, ""),
prefix, studyId);
}
/**
* Get the archive table name given a StudyId.
*
* @param studyId Numerical study identifier
* @param conf Hadoop configuration with the OpenCGA values.
* @return Table name
*/
public static String getArchiveTableName(int studyId, Configuration conf) {
String prefix = conf.get(OPENCGA_STORAGE_HADOOP_VARIANT_ARCHIVE_TABLE_PREFIX);
if (StringUtils.isEmpty(prefix)) {
prefix = ARCHIVE_TABLE_PREFIX;
}
return buildTableName(conf.get(OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE, ""),
prefix, studyId);
}
/**
* Get the archive table name given a StudyId.
*
* @param studyId Numerical study identifier
* @param options Options
* @return Table name
*/
public static String getArchiveTableName(int studyId, ObjectMap options) {
String prefix = options.getString(OPENCGA_STORAGE_HADOOP_VARIANT_ARCHIVE_TABLE_PREFIX);
if (StringUtils.isEmpty(prefix)) {
prefix = ARCHIVE_TABLE_PREFIX;
}
return buildTableName(options.getString(OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE, ""),
prefix, studyId);
}
public String getVariantTableName() {
return getVariantTableName(getOptions().getString(Options.DB_NAME.key()));
}
public String getVariantTableName(String table) {
return getVariantTableName(table, getOptions());
}
public static String getVariantTableName(String table, ObjectMap options) {
return buildTableName(options.getString(OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE, ""), "", table);
}
public static String getVariantTableName(String table, Configuration conf) {
return buildTableName(conf.get(OPENCGA_STORAGE_HADOOP_VARIANT_HBASE_NAMESPACE, ""), "", table);
}
protected static String buildTableName(String namespace, String prefix, int studyId) {
return buildTableName(namespace, prefix, String.valueOf(studyId));
}
protected static String buildTableName(String namespace, String prefix, String tableName) {
StringBuilder sb = new StringBuilder();
if (StringUtils.isNotEmpty(namespace)) {
if (tableName.contains(":")) {
if (!tableName.startsWith(namespace + ":")) {
throw new IllegalArgumentException("Wrong namespace : '" + tableName + "'."
+ " Namespace mismatches with the read from configuration:" + namespace);
} else {
tableName = tableName.substring(tableName.indexOf(':') + 1); // Remove '<namespace>:'
}
}
sb.append(namespace).append(":");
}
if (StringUtils.isNotEmpty(prefix)) {
sb.append(prefix);
if (!prefix.endsWith("_")) {
sb.append("_");
}
}
sb.append(tableName);
String fullyQualified = sb.toString();
TableName.isLegalFullyQualifiedTableName(fullyQualified.getBytes());
return fullyQualified;
}
public VariantSource readVariantSource(URI input) throws StorageEngineException {
return getVariantReaderUtils(null).readVariantSource(input);
}
private static class HdfsVariantReaderUtils extends VariantReaderUtils {
private final Configuration conf;
HdfsVariantReaderUtils(Configuration conf) {
this.conf = conf;
}
@Override
public VariantSource readVariantSource(URI input) throws StorageEngineException {
VariantSource source;
if (input.getScheme() == null || input.getScheme().startsWith("file")) {
if (input.getPath().contains("variants.proto")) {
return VariantReaderUtils.readVariantSource(Paths.get(input.getPath().replace("variants.proto", "file.json")), null);
} else {
return VariantReaderUtils.readVariantSource(Paths.get(input.getPath()), null);
}
}
Path metaPath = new Path(VariantReaderUtils.getMetaFromTransformedFile(input.toString()));
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new StorageEngineException("Unable to get FileSystem", e);
}
try (
InputStream inputStream = new GZIPInputStream(fs.open(metaPath))
) {
source = VariantReaderUtils.readVariantSource(inputStream);
} catch (IOException e) {
e.printStackTrace();
throw new StorageEngineException("Unable to read VariantSource", e);
}
return source;
}
}
}