/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.hadoop.variant.converters.stats;
import org.apache.hadoop.hbase.client.Put;
import org.opencb.biodata.models.variant.protobuf.VariantProto;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.biodata.tools.variant.converters.Converter;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.variant.stats.VariantStatsWrapper;
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import org.opencb.opencga.storage.hadoop.variant.converters.AbstractPhoenixConverter;
import org.opencb.opencga.storage.hadoop.variant.index.phoenix.VariantPhoenixHelper;
import org.opencb.opencga.storage.hadoop.variant.index.phoenix.PhoenixHelper.Column;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
/**
* Created on 07/07/16.
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class VariantStatsToHBaseConverter extends AbstractPhoenixConverter implements Converter<VariantStatsWrapper, Put> {
private final GenomeHelper genomeHelper;
private final StudyConfiguration studyConfiguration;
private final int studyId;
private final Logger logger = LoggerFactory.getLogger(VariantStatsToHBaseConverter.class);
public VariantStatsToHBaseConverter(GenomeHelper genomeHelper, StudyConfiguration studyConfiguration) {
super(genomeHelper.getColumnFamily());
this.genomeHelper = genomeHelper;
this.studyConfiguration = studyConfiguration;
this.studyId = studyConfiguration.getStudyId();
}
@Override
public Put convert(VariantStatsWrapper variantStatsWrapper) {
if (variantStatsWrapper.getCohortStats() == null || variantStatsWrapper.getCohortStats().isEmpty()) {
return null;
}
VariantStats firstStats = variantStatsWrapper.getCohortStats().entrySet().iterator().next().getValue();
byte[] row = genomeHelper.generateVariantRowKey(
variantStatsWrapper.getChromosome(), variantStatsWrapper.getPosition(),
firstStats.getRefAllele(), firstStats.getAltAllele());
Put put = new Put(row);
for (Map.Entry<String, VariantStats> entry : variantStatsWrapper.getCohortStats().entrySet()) {
Integer cohortId = studyConfiguration.getCohortIds().get(entry.getKey());
Column mafColumn = VariantPhoenixHelper.getMafColumn(studyId, cohortId);
Column mgfColumn = VariantPhoenixHelper.getMgfColumn(studyId, cohortId);
Column statsColumn = VariantPhoenixHelper.getStatsColumn(studyId, cohortId);
VariantStats stats = entry.getValue();
add(put, mafColumn, stats.getMaf());
add(put, mgfColumn, stats.getMgf());
VariantProto.VariantStats.Builder builder = VariantProto.VariantStats.newBuilder()
.setAltAlleleFreq(stats.getAltAlleleFreq())
.setAltAlleleCount(stats.getAltAlleleCount())
.setRefAlleleFreq(stats.getRefAlleleFreq())
.setRefAlleleCount(stats.getRefAlleleCount())
.setMissingAlleles(stats.getMissingAlleles())
.setMissingGenotypes(stats.getMissingGenotypes());
if (stats.getMafAllele() != null) {
builder.setMaf(stats.getMaf())
.setMafAllele(stats.getMafAllele());
}
if (stats.getMgfGenotype() != null) {
builder.setMgf(stats.getMgf())
.setMgfGenotype(stats.getMgfGenotype());
}
if (stats.getGenotypesCount() != null) {
Map<String, Integer> map = new HashMap<>(stats.getGenotypesCount().size());
stats.getGenotypesCount().forEach((genotype, count) -> map.put(genotype.toString(), count));
builder.putAllGenotypesCount(map);
}
if (stats.getGenotypesFreq() != null) {
Map<String, Float> map = new HashMap<>(stats.getGenotypesFreq().size());
stats.getGenotypesFreq().forEach((genotype, freq) -> map.put(genotype.toString(), freq));
builder.putAllGenotypesFreq(map);
}
add(put, statsColumn, builder.build().toByteArray());
}
return put;
}
}