/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.hadoop.variant.adaptors; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.protobuf.VcfMeta; import org.opencb.biodata.models.variant.stats.VariantSourceStats; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.StudyConfiguration; import org.opencb.opencga.storage.core.variant.adaptors.VariantSourceDBAdaptor; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveDriver; import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.UncheckedIOException; import java.util.*; /** * Created on 16/11/15. * * @author Jacobo Coll <jacobo167@gmail.com> */ public class HadoopVariantSourceDBAdaptor implements VariantSourceDBAdaptor { protected static Logger logger = LoggerFactory.getLogger(HadoopVariantSourceDBAdaptor.class); private final GenomeHelper genomeHelper; private final ObjectMapper objectMapper; public HadoopVariantSourceDBAdaptor(Configuration configuration) { this(new GenomeHelper(configuration)); } public HadoopVariantSourceDBAdaptor(Connection connection, Configuration configuration) { this(new GenomeHelper(configuration, connection)); } public HadoopVariantSourceDBAdaptor(GenomeHelper genomeHelper) { this.genomeHelper = genomeHelper; this.objectMapper = new ObjectMapper(); this.objectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); } @Override public QueryResult<Long> count() { throw new UnsupportedOperationException(); } public VcfMeta getVcfMeta(int studyId, int fileId, QueryOptions options) throws IOException { return new VcfMeta(getVariantSource(studyId, fileId, options)); } public VariantSource getVariantSource(int studyId, int fileId, QueryOptions options) throws IOException { return iterator(studyId, Collections.singletonList(fileId), options).next(); } @Override public Iterator<VariantSource> iterator(Query query, QueryOptions options) throws IOException { int studyId = query.getInt(VariantSourceQueryParam.STUDY_ID.key()); List<Integer> fileIds = query.getAsIntegerList(VariantSourceQueryParam.FILE_ID.key()); return iterator(studyId, fileIds, options); } public Iterator<VariantSource> iterator(int studyId, QueryOptions options) throws IOException { return iterator(studyId, Collections.emptyList(), options); } public Iterator<VariantSource> iterator(int studyId, List<Integer> fileIds, QueryOptions options) throws IOException { String tableName = HadoopVariantStorageEngine.getArchiveTableName(studyId, genomeHelper.getConf()); long start = System.currentTimeMillis(); Get get = new Get(genomeHelper.getMetaRowKey()); if (fileIds == null || fileIds.isEmpty()) { get.addFamily(genomeHelper.getColumnFamily()); } else { for (Integer fileId : fileIds) { byte[] columnName = Bytes.toBytes(ArchiveHelper.getColumnName(fileId)); get.addColumn(genomeHelper.getColumnFamily(), columnName); } } HBaseManager hBaseManager = getHBaseManager(); if (!hBaseManager.act(tableName, (table, admin) -> admin.tableExists(table.getName()))) { return Collections.emptyIterator(); } HBaseManager.HBaseTableFunction<Result> resultHBaseTableFunction = table -> table.get(get); Result result = hBaseManager.act(tableName, resultHBaseTableFunction); logger.debug("Get VcfMeta from : {}", fileIds); if (result.isEmpty()) { return Collections.emptyIterator(); } else { return result.getFamilyMap(genomeHelper.getColumnFamily()).entrySet() .stream() .filter(entry -> !Arrays.equals(entry.getKey(), genomeHelper.getMetaRowKey())) .map(entry -> { try { return objectMapper.readValue(entry.getValue(), VariantSource.class); } catch (IOException e) { throw new UncheckedIOException("Problem with " + Bytes.toString(entry.getKey()), e); } }) .iterator(); } // } catch (IOException e) { // throw new StorageEngineException("Error fetching VariantSources from study " + studyId // + ", from table \"" + tableName + "\"" // + " for files " + fileIds, e); // } } @Override public QueryResult updateSourceStats(VariantSourceStats variantSourceStats, StudyConfiguration studyConfiguration, QueryOptions queryOptions) { // String tableName = HadoopVariantStorageEngine.getTableName(Integer.parseInt(variantSource.getStudyId())); logger.warn("Unimplemented method!"); return null; } protected HBaseManager getHBaseManager() { return this.genomeHelper.getHBaseManager(); } public void updateVcfMetaData(VcfMeta meta) throws IOException { Objects.requireNonNull(meta); update(meta.getVariantSource()); } @Override public void updateVariantSource(VariantSource variantSource) throws StorageEngineException { try { update(variantSource); } catch (IOException e) { throw new StorageEngineException("Unable to update VariantSoruce " + variantSource, e); } } public void update(VariantSource variantSource) throws IOException { Objects.requireNonNull(variantSource); String tableName = HadoopVariantStorageEngine.getArchiveTableName(Integer.parseInt(variantSource.getStudyId()), genomeHelper.getConf()); if (ArchiveDriver.createArchiveTableIfNeeded(genomeHelper, tableName, getHBaseManager().getConnection())) { logger.info("Create table '{}' in hbase!", tableName); } Put put = wrapVcfMetaAsPut(variantSource, this.genomeHelper); getHBaseManager().act(tableName, table -> { table.put(put); }); } public static Put wrapVcfMetaAsPut(VariantSource variantSource, GenomeHelper helper) { Put put = new Put(helper.getMetaRowKey()); put.addColumn(helper.getColumnFamily(), Bytes.toBytes(variantSource.getFileId()), variantSource.getImpl().toString().getBytes()); return put; } public void updateLoadedFilesSummary(int studyId, List<Integer> newLoadedFiles) throws IOException { String tableName = HadoopVariantStorageEngine.getArchiveTableName(studyId, genomeHelper.getConf()); if (ArchiveDriver.createArchiveTableIfNeeded(genomeHelper, tableName, getHBaseManager().getConnection())) { logger.info("Create table '{}' in hbase!", tableName); } StringBuilder sb = new StringBuilder(); for (Integer newLoadedFile : newLoadedFiles) { sb.append(",").append(newLoadedFile); } Append append = new Append(genomeHelper.getMetaRowKey()); append.add(genomeHelper.getColumnFamily(), genomeHelper.getMetaRowKey(), Bytes.toBytes(sb.toString())); getHBaseManager().act(tableName, table -> { table.append(append); }); } @Override public void close() throws IOException { try { this.genomeHelper.close(); } catch (Exception e) { throw new IOException(e); } } public Set<Integer> getLoadedFiles(int studyId) throws IOException { String tableName = HadoopVariantStorageEngine.getArchiveTableName(studyId, genomeHelper.getConf()); if (!getHBaseManager().tableExists(tableName)) { return new HashSet<>(); } else { return getHBaseManager().act(tableName, table -> { Get get = new Get(genomeHelper.getMetaRowKey()); get.addColumn(genomeHelper.getColumnFamily(), genomeHelper.getMetaRowKey()); byte[] value = table.get(get).getValue(genomeHelper.getColumnFamily(), genomeHelper.getMetaRowKey()); Set<Integer> set; if (value != null) { set = new LinkedHashSet<Integer>(); for (String s : Bytes.toString(value).split(",")) { if (!s.isEmpty()) { if (s.startsWith("[")) { s = s.replaceFirst("\\[", ""); } if (s.endsWith("]")) { s = s.replaceAll("\\]", ""); } set.add(Integer.parseInt(s)); } } } else { set = new LinkedHashSet<Integer>(); } return set; }); } } }