/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package org.opencb.opencga.storage.hadoop.variant.index;
import com.google.common.collect.BiMap;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Mapper;
import org.opencb.biodata.models.feature.Genotype;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import java.io.IOException;
import java.util.*;
/**
* Removes Sample data for a provided file from the Analysis (Variant) and the
* file data from the Archive Table.
*
* @author Matthias Haimel mh719+git@cam.ac.uk
*
*/
public class VariantTableDeletionMapReduce extends AbstractVariantTableMapReduce {
private Table analysisTable;
private Table archiveTable;
@Override
protected void setup(Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Mutation>.Context context) throws IOException,
InterruptedException {
super.setup(context);
Connection connection = getHelper().getHBaseManager().getConnection();
this.analysisTable = connection.getTable(TableName.valueOf(getHelper().getOutputTable()));
this.archiveTable = connection.getTable(TableName.valueOf(getHelper().getIntputTable()));
}
@Override
protected void doMap(VariantMapReduceContext ctx) throws IOException, InterruptedException {
List<Variant> updateLst = new ArrayList<>();
List<Variant> removeLst = new ArrayList<>();
BiMap<Integer, String> sampleIds = getStudyConfiguration().getSampleIds().inverse();
List<Cell> cells = GenomeHelper.getVariantColumns(ctx.getValue().rawCells());
List<Variant> analysisVar = parseCurrentVariantsRegion(cells, ctx.getChromosome());
ctx.getContext().getCounter(COUNTER_GROUP_NAME, "VARIANTS_FROM_ANALYSIS").increment(analysisVar.size());
getLog().info("Loaded {} variants ... ", analysisVar.size());
if (!analysisVar.isEmpty()) {
Variant tmpVar = analysisVar.get(0);
if (getLog().isDebugEnabled()) {
getLog().debug("Loaded variant from analysis table: " + tmpVar.toJson());
}
}
endTime("2 Unpack and convert input ANALYSIS variants (" + GenomeHelper.VARIANT_COLUMN_PREFIX + ")");
for (Variant var : analysisVar) {
// remove values for Sample
for (Integer sample : ctx.sampleIds) {
String sampleName = sampleIds.get(sample);
removeSample(var, sampleName);
}
// check if there are still variants to be committed
boolean hasVariants = containsVariants(var);
// save to commit list
if (hasVariants) {
updateLst.add(var);
} else {
removeLst.add(var);
}
}
List<VariantTableStudyRow> rows = new ArrayList<>();
deleteFromAnalysisTable(ctx.context, removeLst);
updateOutputTable(ctx.context, updateLst, rows, null);
updateArchiveTable(ctx.getCurrRowKey(), ctx.context, rows);
deleteFromArchiveTable(ctx.context, ctx.currRowKey, ctx.fileIds);
}
private void deleteFromArchiveTable(Context context, byte[] rowKey, Set<Integer> fileIds) throws IOException {
byte[] cf = getHelper().getColumnFamily();
Delete del = new Delete(rowKey); // TODO HBase time stamp specific delete -> more efficient
for (Integer fid : fileIds) {
del.addColumn(cf, Bytes.toBytes(fid.toString()));
}
context.getCounter(COUNTER_GROUP_NAME, "ARCHIVE_TABLE_ROW-DELETE_cells").increment(fileIds.size());
context.getCounter(COUNTER_GROUP_NAME, "ARCHIVE_TABLE_ROW-DELETE_commands").increment(1);
this.archiveTable.delete(del);
}
private void deleteFromAnalysisTable(Context context, List<Variant> removeLst) throws IOException, InterruptedException {
int studyId = getStudyConfiguration().getStudyId();
BiMap<String, Integer> idMapping = getStudyConfiguration().getSampleIds();
for (Variant variant : removeLst) {
VariantTableStudyRow row = new VariantTableStudyRow(variant, studyId, idMapping);
Delete delete = row.createDelete(getHelper());
// this.analysisTable.delete(delete);
context.write(new ImmutableBytesWritable(getHelper().getOutputTable()), delete);
context.getCounter(COUNTER_GROUP_NAME, "ANALYSIS_TABLE_ROW-DELETE").increment(1);
}
}
/**
* Remove Sample from Variant object.
* @param var Variant object.
* @param sampleName Sample name.
* @throws IllegalStateException If no Study or the Sample name is not found in the first Study of the Variant.
*/
private void removeSample(Variant var, String sampleName) throws IllegalStateException {
StudyEntry se = var.getStudies().get(0);
if (se == null) {
throw new IllegalStateException(String.format("No study found in variant {0}", var));
}
LinkedHashMap<String, Integer> samplesPos = se.getSamplesPosition();
Integer remPos = samplesPos.get(sampleName);
if (remPos == null) {
throw new IllegalStateException(String.format("Sample {0} not found for variant {1}", sampleName, var));
}
LinkedHashMap<String, Integer> updSamplesPos = new LinkedHashMap<>(samplesPos.size() - 1);
samplesPos.forEach((k, v) -> updSamplesPos.put(k, v < remPos ? v : v - 1)); // update positions
updSamplesPos.remove(sampleName);
List<List<String>> sd = new LinkedList<>(se.getSamplesData());
sd.remove(remPos.intValue());
se.setSamplesData(sd);
se.setSamplesPosition(updSamplesPos);
}
/**
* Checks if a Variant contains individuals with the ALT variant.
* @param var Variant object.
* @return boolean True, if one individual contains one ALT allele. Otherwise False e.g. only nocall, hom_ref or secondary alts.
*/
private boolean containsVariants(Variant var) {
StudyEntry se = var.getStudies().get(0);
Integer gtPos = se.getFormatPositions().get("GT");
List<List<String>> samplesData = se.getSamplesData();
for (List<String> data : samplesData) {
String gts = data.get(gtPos);
if (gts.contains(",")) {
for (String gt : gts.split(",")) {
if (hasAlt(gt)) {
return true; // Found at least one ALT genotype
}
}
} else {
if (hasAlt(gts)) {
return true; // Found at least one ALT genotype
}
}
}
// Only contains secondary alternate, HOM_REF, no-call, etc. -> remove!!!
return false;
}
private boolean hasAlt(String gt) {
int[] idxArr = new Genotype(gt).getAllelesIdx();
for (int anIdxArr : idxArr) {
if (anIdxArr == 1) {
return true;
}
}
return false;
}
}