/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.hadoop.variant.converters.stats; import com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import org.opencb.biodata.models.feature.Genotype; import org.opencb.biodata.models.variant.protobuf.VariantProto; import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; import org.opencb.opencga.storage.hadoop.variant.converters.AbstractPhoenixConverter; import org.opencb.opencga.storage.hadoop.variant.index.phoenix.VariantPhoenixHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.HashMap; import java.util.Map; import java.util.NavigableMap; /** * Created on 07/07/16. * * @author Jacobo Coll <jacobo167@gmail.com> */ public class HBaseToVariantStatsConverter extends AbstractPhoenixConverter { private final Logger logger = LoggerFactory.getLogger(HBaseToVariantStatsConverter.class); public HBaseToVariantStatsConverter(GenomeHelper genomeHelper) { super(genomeHelper.getColumnFamily()); } // // public static Converter<Result, List<VariantStats>> fromHbase(GenomeHelper genomeHelper) { // HBaseToVariantStatsConverter converter = new HBaseToVariantStatsConverter(genomeHelper); // return converter::convert; // } // // public static Converter<ResultSet, List<VariantStats>> fromPhoenix(GenomeHelper genomeHelper) { // HBaseToVariantStatsConverter converter = new HBaseToVariantStatsConverter(genomeHelper); // return converter::convert; // } public Map<Integer, Map<Integer, VariantStats>> convert(Result result) { // String studyIdStr = String.valueOf(studyConfiguration.getStudyId()); NavigableMap<byte[], byte[]> map = result.getFamilyMap(columnFamily); Map<Integer, Map<Integer, VariantStats>> studyCohortStatsMap = new HashMap<>(); for (Map.Entry<byte[], byte[]> entry : map.entrySet()) { byte[] columnBytes = entry.getKey(); byte[] value = entry.getValue(); if (value != null && startsWith(columnBytes, VariantPhoenixHelper.STATS_PREFIX_BYTES) && endsWith(columnBytes, VariantPhoenixHelper.STATS_PROTOBUF_SUFIX_BYTES)) { String columnName = Bytes.toString(columnBytes); String[] split = columnName.split("_"); Integer studyId = getStudyId(split); Integer cohortId = getCohortId(split); Map<Integer, VariantStats> statsMap = studyCohortStatsMap.get(studyId); if (statsMap == null) { statsMap = new HashMap<>(); studyCohortStatsMap.put(studyId, statsMap); } statsMap.put(cohortId, convert(value)); } } return studyCohortStatsMap; } public Map<Integer, Map<Integer, VariantStats>> convert(ResultSet resultSet) { // String studyIdStr = String.valueOf(studyConfiguration.getStudyId()); Map<Integer, Map<Integer, VariantStats>> studyCohortStatsMap = new HashMap<>(); try { ResultSetMetaData metaData = resultSet.getMetaData(); for (int i = 1; i <= metaData.getColumnCount(); i++) { String columnName = metaData.getColumnName(i); byte[] value = resultSet.getBytes(i); if (value != null && columnName.startsWith(VariantPhoenixHelper.STATS_PREFIX) && columnName.endsWith(VariantPhoenixHelper.STATS_PROTOBUF_SUFIX)) { String[] split = columnName.split("_"); Integer studyId = getStudyId(split); Integer cohortId = getCohortId(split); Map<Integer, VariantStats> statsMap = studyCohortStatsMap.get(studyId); if (statsMap == null) { statsMap = new HashMap<>(); studyCohortStatsMap.put(studyId, statsMap); } statsMap.put(cohortId, convert(value)); } } } catch (SQLException e) { throw new RuntimeException(e); } return studyCohortStatsMap; } public Integer getStudyId(String[] split) { return Integer.valueOf(split[0]); } public Integer getCohortId(String[] split) { return Integer.valueOf(split[1]); } protected VariantStats convert(byte[] data) { VariantStats stats = new VariantStats(); try { VariantProto.VariantStats protoStats = VariantProto.VariantStats.parseFrom(data); stats.setMgf(protoStats.getMgf()); stats.setMgfGenotype(protoStats.getMgfGenotype()); stats.setMaf(protoStats.getMaf()); stats.setMafAllele(protoStats.getMafAllele()); stats.setAltAlleleCount(protoStats.getAltAlleleCount()); stats.setAltAlleleFreq(protoStats.getAltAlleleFreq()); stats.setRefAlleleCount(protoStats.getRefAlleleCount()); stats.setRefAlleleFreq(protoStats.getRefAlleleFreq()); Map<Genotype, Float> genotypesFreq = new HashMap<>(); for (Map.Entry<String, Integer> entry : protoStats.getGenotypesCount().entrySet()) { Genotype g = new Genotype(entry.getKey()); stats.addGenotype(g, entry.getValue(), false); Float freq = protoStats.getGenotypesFreq().get(entry.getKey()); if (freq != null) { genotypesFreq.put(g, freq); } } stats.setGenotypesFreq(genotypesFreq); stats.setMissingAlleles(protoStats.getMissingAlleles()); stats.setMissingGenotypes(protoStats.getMissingGenotypes()); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(e); } return stats; } public boolean startsWith(byte[] bytes, byte[] startsWith) { if (bytes.length < startsWith.length) { return false; } for (int i = 0; i < startsWith.length; i++) { if (startsWith[i] != bytes[i]) { return false; } } return true; } public boolean endsWith(byte[] bytes, byte[] endsWith) { if (bytes.length < endsWith.length) { return false; } for (int i = endsWith.length - 1, f = bytes.length - 1; i >= 0; i--, f--) { if (endsWith[i] != bytes[f]) { return false; } } return true; } }