/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.hadoop.variant.archive.mr; import org.apache.avro.io.DatumWriter; import org.apache.avro.mapred.AvroKey; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAvro; import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos.VcfSlice; import org.opencb.biodata.tools.variant.converters.proto.VariantToProtoVcfRecord; import org.opencb.biodata.tools.variant.converters.proto.VariantToVcfSliceConverter; import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Collections; import java.util.concurrent.atomic.AtomicReference; /** * @author Matthias Haimel mh719+git@cam.ac.uk. */ public class VariantToVcfSliceMapper extends Mapper<AvroKey<VariantAvro>, NullWritable, ImmutableBytesWritable, VcfSliceWritable> { private final Logger logger = LoggerFactory.getLogger(VariantToVcfSliceMapper.class); // private final VariantToProtoVcfRecord converter = new VariantToProtoVcfRecord(); private final VariantToVcfSliceConverter converter = new VariantToVcfSliceConverter(); private final AtomicReference<ArchiveHelper> helper = new AtomicReference<>(); private DatumWriter<VariantAvro> variantDatumWriter = new SpecificDatumWriter<>(VariantAvro.class); public VariantToVcfSliceMapper() { } @Override protected void setup(Context context) throws IOException, InterruptedException { this.helper.set(new ArchiveHelper(context.getConfiguration())); // converter.updateVcfMeta(getHelper().getMeta()); super.setup(context); } public ArchiveHelper getHelper() { return helper.get(); } public byte[] getColumn() { return getHelper().getColumn(); } /** * Convert a Variant to a list of {@link VcfSlice} converting the position into the slice position <br> * e.g. using chunk size 100 with position 1234 would result in slice position 1200. */ @Override protected void map(AvroKey<VariantAvro> key, NullWritable value, Context context) throws IOException, InterruptedException { VariantAvro varAvro = key.datum(); Variant variant = new Variant(varAvro); context.getCounter("OPENCGA.HBASE", "VCF_MAP_COUNT").increment(1); long[] slicePositionArr = getCoveredSlicePositions(variant); for (long slicePos : slicePositionArr) { VcfSlice slice = converter.convert(Collections.singletonList(variant), (int) slicePos); ImmutableBytesWritable rowKey = new ImmutableBytesWritable(getHelper().generateBlockIdAsBytes(variant.getChromosome(), (int) slicePos)); context.write(rowKey, new VcfSliceWritable(slice)); } } private long[] getCoveredSlicePositions(Variant variant) { int chSize = getHelper().getChunkSize(); long startChunk = VariantToProtoVcfRecord.getSlicePosition(variant.getStart(), chSize); long endChunk = VariantToProtoVcfRecord.getSlicePosition(variant.getEnd(), chSize); if (endChunk == startChunk) { return new long[]{startChunk}; } int len = (int) ((endChunk - startChunk) / chSize) + 1; long[] ret = new long[len]; for (int i = 0; i < len; ++i) { ret[i] = startChunk + (((long) i) * chSize); } return ret; } private String extractChromosome(Variant var) { return getHelper().standardChromosome(var.getChromosome()); } public Logger getLogger() { return logger; } }