/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.hadoop.variant; import org.apache.hadoop.conf.Configuration; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.storage.core.config.StorageConfiguration; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; import org.opencb.opencga.storage.hadoop.auth.HBaseCredentials; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveDriver; import org.opencb.opencga.storage.hadoop.variant.executors.MRExecutor; import org.slf4j.LoggerFactory; import java.net.URI; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.HADOOP_BIN; import static org.opencb.opencga.storage.core.variant.VariantStorageEngine.Options; /** * Created on 31/03/16. * * @author Jacobo Coll <jacobo167@gmail.com> */ public class HadoopVariantStoragePipeline extends AbstractHadoopVariantStoragePipeline { public HadoopVariantStoragePipeline( StorageConfiguration configuration, String storageEngineId, VariantHadoopDBAdaptor dbAdaptor, MRExecutor mrExecutor, Configuration conf, HBaseCredentials archiveCredentials, VariantReaderUtils variantReaderUtils, ObjectMap options) { super(configuration, storageEngineId, LoggerFactory.getLogger(HadoopVariantStoragePipeline.class), dbAdaptor, variantReaderUtils, options, archiveCredentials, mrExecutor, conf); } protected void loadArch(URI input) throws StorageEngineException { int studyId = getStudyId(); URI vcfMeta = URI.create(VariantReaderUtils.getMetaFromTransformedFile(input.toString())); int fileId = options.getInt(Options.FILE_ID.key()); String hadoopRoute = options.getString(HADOOP_BIN, "hadoop"); String jar = getJarWithDependencies(); Class execClass = ArchiveDriver.class; String executable = hadoopRoute + " jar " + jar + " " + execClass.getName(); String args = ArchiveDriver.buildCommandLineArgs(input, vcfMeta, archiveTableCredentials.toString(), archiveTableCredentials.getTable(), studyId, fileId, options); long startTime = System.currentTimeMillis(); logger.info("------------------------------------------------------"); logger.info("Loading file {} into archive table '{}'", fileId, archiveTableCredentials.getTable()); logger.debug(executable + " " + args); logger.info("------------------------------------------------------"); int exitValue = mrExecutor.run(executable, args); logger.info("------------------------------------------------------"); logger.info("Exit value: {}", exitValue); logger.info("Total time: {}s", (System.currentTimeMillis() - startTime) / 1000.0); if (exitValue != 0) { throw new StorageEngineException("Error loading file " + input + " into archive table \"" + archiveTableCredentials.getTable() + "\""); } } @Override protected boolean needLoadFromHdfs() { return true; } }