/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.mongodb.variant.converters;
import org.bson.Document;
import org.opencb.biodata.models.feature.Genotype;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.commons.datastore.core.ComplexTypeConverter;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.storage.core.metadata.StudyConfiguration;
import org.opencb.opencga.storage.core.metadata.StudyConfigurationManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* @author Cristina Yenyxe Gonzalez Garcia <cyenyxe@ebi.ac.uk>
* @author Jose Miguel Mut Lopez <jmmut@ebi.ac.uk>
*/
public class DocumentToVariantStatsConverter implements ComplexTypeConverter<VariantStats, Document> {
public static final QueryOptions STUDY_CONFIGURATION_MANAGER_QUERY_OPTIONS = new QueryOptions()
.append(StudyConfigurationManager.CACHED, true).append(StudyConfigurationManager.READ_ONLY, true);
public DocumentToVariantStatsConverter() {
}
public DocumentToVariantStatsConverter(List<StudyConfiguration> studyConfigurations) {
this.studyConfigurations = studyConfigurations
.stream()
.collect(Collectors.toMap(StudyConfiguration::getStudyId, Function.identity()));
}
public DocumentToVariantStatsConverter(StudyConfigurationManager studyConfigurationManager) {
this.studyConfigurationManager = studyConfigurationManager;
}
public static final String COHORT_ID = "cid";
public static final String STUDY_ID = "sid";
// public static final String FILE_ID = "fid";
public static final String MAF_FIELD = "maf";
public static final String MGF_FIELD = "mgf";
public static final String MAFALLELE_FIELD = "mafAl";
public static final String MGFGENOTYPE_FIELD = "mgfGt";
public static final String MISSALLELE_FIELD = "missAl";
public static final String MISSGENOTYPE_FIELD = "missGt";
public static final String NUMGT_FIELD = "numGt";
protected static Logger logger = LoggerFactory.getLogger(DocumentToVariantStatsConverter.class);
private StudyConfigurationManager studyConfigurationManager = null;
private Map<Integer, StudyConfiguration> studyConfigurations;
private Map<Integer, String> studyIds = new HashMap<>();
private Map<Integer, Map<Integer, String>> studyCohortNames = new HashMap<>();
public void setStudyConfigurationManager(StudyConfigurationManager studyConfigurationManager) {
this.studyConfigurationManager = studyConfigurationManager;
}
@Override
public VariantStats convertToDataModelType(Document object) {
VariantStats stats = new VariantStats();
convertToDataModelType(object, stats);
return stats;
}
public void convertToDataModelType(Document object, VariantStats stats) {
// Basic fields
stats.setMaf(((Double) object.get(MAF_FIELD)).floatValue());
stats.setMgf(((Double) object.get(MGF_FIELD)).floatValue());
stats.setMafAllele((String) object.get(MAFALLELE_FIELD));
stats.setMgfGenotype((String) object.get(MGFGENOTYPE_FIELD));
stats.setMissingAlleles((int) object.get(MISSALLELE_FIELD));
stats.setMissingGenotypes((int) object.get(MISSGENOTYPE_FIELD));
// Genotype counts
int alleleNumber = 0;
int gtNumber = 0;
Document genotypes = (Document) object.get(NUMGT_FIELD);
HashMap<Genotype, Integer> genotypesCount = new HashMap<>();
for (Map.Entry<String, Object> o : genotypes.entrySet()) {
String genotypeStr = o.getKey().replace("-1", ".");
int value = (int) o.getValue();
Genotype g = new Genotype(genotypeStr);
genotypesCount.put(g, value);
alleleNumber += value * g.getAllelesIdx().length;
gtNumber += value;
}
stats.setGenotypesCount(genotypesCount);
HashMap<Genotype, Float> genotypesFreq = new HashMap<>();
for (Map.Entry<Genotype, Integer> entry : genotypesCount.entrySet()) {
genotypesFreq.put(entry.getKey(), entry.getValue().floatValue() / gtNumber);
}
stats.setGenotypesFreq(genotypesFreq);
int[] alleleCounts = {0, 0};
if (stats.getGenotypesCount().isEmpty()) {
if (stats.getRefAllele().equals(stats.getMafAllele())) {
stats.setRefAlleleFreq(stats.getMaf());
stats.setAltAlleleFreq(1 - stats.getMaf());
} else {
stats.setAltAlleleFreq(stats.getMaf());
stats.setRefAlleleFreq(1 - stats.getMaf());
}
} else {
for (Map.Entry<Genotype, Integer> entry : stats.getGenotypesCount().entrySet()) {
for (int i : entry.getKey().getAllelesIdx()) {
if (i == 0 || i == 1) {
alleleCounts[i] += entry.getValue();
}
}
}
stats.setRefAlleleCount(alleleCounts[0]);
stats.setAltAlleleCount(alleleCounts[1]);
if (alleleNumber == 0) {
stats.setRefAlleleFreq(0F);
stats.setAltAlleleFreq(0F);
} else {
stats.setRefAlleleFreq(alleleCounts[0] / ((float) alleleNumber));
stats.setAltAlleleFreq(alleleCounts[1] / ((float) alleleNumber));
}
}
}
@Override
public Document convertToStorageType(VariantStats vs) {
// Basic fields
Document mongoStats = new Document(MAF_FIELD, vs.getMaf());
mongoStats.append(MGF_FIELD, vs.getMgf());
mongoStats.append(MAFALLELE_FIELD, vs.getMafAllele());
mongoStats.append(MGFGENOTYPE_FIELD, vs.getMgfGenotype());
mongoStats.append(MISSALLELE_FIELD, vs.getMissingAlleles());
mongoStats.append(MISSGENOTYPE_FIELD, vs.getMissingGenotypes());
// Genotype counts
Document genotypes = new Document();
for (Map.Entry<Genotype, Integer> g : vs.getGenotypesCount().entrySet()) {
String genotypeStr = g.getKey().toString().replace(".", "-1");
genotypes.append(genotypeStr, g.getValue());
}
mongoStats.append(NUMGT_FIELD, genotypes);
return mongoStats;
}
/**
* As in mongo, a variant is {studies:[],stats:[]} but the data model is {studies:[stats:[]]} this method doesn't
* return anything. Instead, the sourceEntries within the variant is filled.
*
* @param cohortsStats List from mongo containing VariantStats.
* @param variant contains allele info to fill the VariantStats, and it sourceEntries will be filled.
*/
public void convertCohortsToDataModelType(List<Document> cohortsStats, Variant variant) {
for (Document vs : cohortsStats) {
VariantStats variantStats = new VariantStats();
variantStats.setRefAllele(variant.getReference());
variantStats.setAltAllele(variant.getAlternate());
convertToDataModelType(vs, variantStats);
if (variant != null) {
variantStats.setRefAllele(variant.getReference());
variantStats.setAltAllele(variant.getAlternate());
variantStats.setVariantType(variant.getType());
// Integer fid = (Integer) vs.get(FILE_ID);
String sid = getStudyName((Integer) vs.get(STUDY_ID));
String cid = getCohortName((Integer) vs.get(STUDY_ID), (Integer) vs.get(COHORT_ID));
StudyEntry sourceEntry = null;
if (sid != null && cid != null) {
sourceEntry = variant.getStudiesMap().get(sid);
if (sourceEntry != null) {
sourceEntry.setStats(cid, variantStats);
} else {
//This could happen if the study has been excluded
logger.trace("ignoring non present source entry studyId={}", sid);
}
} else {
logger.error("invalid mongo document: all studyId={}, cohortId={} should be present.", sid, cid);
}
}
}
}
/**
* converts all the cohortstats within the sourceEntries.
*
* @param sourceEntries for instance, you can pass in variant.getSourceEntries()
* @return list of VariantStats (as Documents)
*/
public List<Document> convertCohortsToStorageType(Map<String, StudyEntry> sourceEntries) {
List<Document> cohortsStatsList = new LinkedList<>();
for (String studyIdFileId : sourceEntries.keySet()) {
StudyEntry sourceEntry = sourceEntries.get(studyIdFileId);
List<Document> list = convertCohortsToStorageType(sourceEntry.getStats(),
Integer.parseInt(sourceEntry.getStudyId()));
cohortsStatsList.addAll(list);
}
return cohortsStatsList;
}
/**
* converts just some cohorts stats in one VariantSourceEntry.
*
* @param cohortStats for instance, you can pass in sourceEntry.stats()
* @param studyId of the source entry
* @return list of VariantStats (as Documents)
*/
public List<Document> convertCohortsToStorageType(Map<String, VariantStats> cohortStats, int studyId) {
List<Document> cohortsStatsList = new LinkedList<>();
VariantStats variantStats;
for (Map.Entry<String, VariantStats> variantStatsEntry : cohortStats.entrySet()) {
variantStats = variantStatsEntry.getValue();
Document variantStatsDocument = convertToStorageType(variantStats);
Integer cohortId = getCohortId(studyId, variantStatsEntry.getKey());
if (cohortId != null) {
variantStatsDocument.put(DocumentToVariantStatsConverter.COHORT_ID, (int) cohortId);
variantStatsDocument.put(DocumentToVariantStatsConverter.STUDY_ID, studyId);
cohortsStatsList.add(variantStatsDocument);
}
}
return cohortsStatsList;
}
private String getStudyName(int studyId) {
if (!studyIds.containsKey(studyId)) {
if (studyConfigurationManager == null) {
studyIds.put(studyId, Integer.toString(studyId));
} else {
QueryResult<StudyConfiguration> queryResult = studyConfigurationManager.getStudyConfiguration(studyId, null);
if (queryResult.getResult().isEmpty()) {
studyIds.put(studyId, Integer.toString(studyId));
} else {
studyIds.put(studyId, queryResult.first().getStudyName());
}
}
}
return studyIds.get(studyId);
}
private String getCohortName(int studyId, int cohortId) {
if (studyCohortNames.containsKey(studyId)) {
return studyCohortNames.get(studyId).get(cohortId);
} else {
Map<Integer, String> cohortNames = StudyConfiguration.inverseMap(getStudyConfiguration(studyId).getCohortIds());
studyCohortNames.put(studyId, cohortNames);
return cohortNames.get(cohortId);
}
}
private Integer getCohortId(int studyId, String cohortName) {
StudyConfiguration studyConfiguration = getStudyConfiguration(studyId);
Map<String, Integer> cohortIds = studyConfiguration.getCohortIds();
Integer integer = cohortIds.get(cohortName);
return integer;
}
private StudyConfiguration getStudyConfiguration(int studyId) {
if (studyConfigurations != null && studyConfigurations.containsKey(studyId)) {
return studyConfigurations.get(studyId);
} else {
return studyConfigurationManager.getStudyConfiguration(studyId, STUDY_CONFIGURATION_MANAGER_QUERY_OPTIONS).first();
}
}
}