package org.opencb.opencga.storage.hadoop.variant.stats;
import com.google.common.collect.BiMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsCalculator;
import org.opencb.opencga.storage.core.variant.stats.VariantStatsWrapper;
import org.opencb.opencga.storage.hadoop.variant.AbstractHBaseMapReduce;
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import org.opencb.opencga.storage.hadoop.variant.converters.stats.VariantStatsToHBaseConverter;
import org.opencb.opencga.storage.hadoop.variant.index.AbstractVariantTableMapReduce;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Created by mh719 on 07/12/2016.
*/
public class AnalysisStatsMapper extends AbstractHBaseMapReduce<ImmutableBytesWritable, Put> {
private VariantStatisticsCalculator variantStatisticsCalculator;
private String studyId;
private byte[] studiesRow;
private Map<String, Set<String>> samples;
private VariantStatsToHBaseConverter variantStatsToHBaseConverter;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
this.getHbaseToVariantConverter().setSimpleGenotypes(true);
studiesRow = getHelper().generateVariantRowKey(GenomeHelper.DEFAULT_METADATA_ROW_KEY, 0);
variantStatisticsCalculator = new VariantStatisticsCalculator(true);
this.variantStatisticsCalculator.setAggregationType(VariantSource.Aggregation.NONE, null);
this.studyId = Integer.valueOf(this.getStudyConfiguration().getStudyId()).toString();
BiMap<Integer, String> sampleIds = getStudyConfiguration().getSampleIds().inverse();
variantStatsToHBaseConverter = new VariantStatsToHBaseConverter(this.getHelper(), this.getStudyConfiguration());
// map from cohort Id to <cohort name, <sample names>>
this.samples = this.getStudyConfiguration().getCohortIds().entrySet().stream()
.map(e -> new MutablePair<>(e.getKey(), this.getStudyConfiguration().getCohorts().get(e.getValue())))
.map(p -> new MutablePair<>(p.getKey(),
p.getValue().stream().map(i -> sampleIds.get(i)).collect(Collectors.toSet())))
.collect(Collectors.toMap(p -> p.getKey(), p -> p.getValue()));
this.samples.forEach((k, v) -> getLog().info("Calculate {} stats for cohort {} with {}", studyId, k, StringUtils.join(v, ",")));
}
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
boolean done = false;
if (!Bytes.startsWith(value.getRow(), this.studiesRow)) { // ignore _METADATA row
try {
Variant variant = this.getHbaseToVariantConverter().convert(value);
List<VariantStatsWrapper> annotations = this.variantStatisticsCalculator.calculateBatch(
Collections.singletonList(variant), this.studyId, "notused", this.samples);
for (VariantStatsWrapper annotation : annotations) {
Put convert = this.variantStatsToHBaseConverter.convert(annotation);
if (null != convert) {
context.write(key, convert);
done = true;
context.getCounter(AbstractVariantTableMapReduce.COUNTER_GROUP_NAME, "stats.put").increment(1);
}
}
if (done) {
context.getCounter(AbstractVariantTableMapReduce.COUNTER_GROUP_NAME, "variants").increment(1);
}
} catch (IllegalStateException e) {
throw new IllegalStateException("Problem with row [hex:" + Bytes.toHex(key.copyBytes()) + "]", e);
}
}
}
}