/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package org.opencb.opencga.storage.hadoop.variant.archive;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.biodata.models.variant.protobuf.VcfMeta;
import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos.VcfRecord;
import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos.VcfSlice;
import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos.VcfSlice.Builder;
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import org.opencb.opencga.storage.hadoop.variant.adaptors.HadoopVariantSourceDBAdaptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
/**
* @author Matthias Haimel mh719+git@cam.ac.uk.
*/
public class ArchiveHelper extends GenomeHelper {
private final Logger logger = LoggerFactory.getLogger(ArchiveHelper.class);
private final AtomicReference<VcfMeta> meta = new AtomicReference<>();
private byte[] column;
private final VcfRecordComparator vcfComparator = new VcfRecordComparator();
public ArchiveHelper(Configuration conf) throws IOException {
this(conf, null);
int fileId = conf.getInt(ArchiveDriver.CONFIG_ARCHIVE_FILE_ID, 0);
int studyId = conf.getInt(GenomeHelper.CONFIG_STUDY_ID, 0);
try (HadoopVariantSourceDBAdaptor metadataManager = new HadoopVariantSourceDBAdaptor(conf)) {
VcfMeta meta = metadataManager.getVcfMeta(getStudyId(), fileId, null);
this.meta.set(meta);
}
column = Bytes.toBytes(getColumnName(meta.get().getVariantSource()));
}
public ArchiveHelper(GenomeHelper helper, VcfMeta meta) {
super(helper);
this.meta.set(meta);
column = Bytes.toBytes(getColumnName(meta.getVariantSource()));
}
public ArchiveHelper(Configuration conf, VcfMeta meta) {
super(conf);
if (meta != null) {
this.meta.set(meta);
VariantSource variantSource = getMeta().getVariantSource();
column = Bytes.toBytes(getColumnName(variantSource));
}
}
public ArchiveHelper(GenomeHelper helper, VariantSource source) throws IOException {
super(helper);
this.meta.set(new VcfMeta(source));
column = Bytes.toBytes(getColumnName(source));
}
/**
* Get the archive column name for a file given a FileId.
*
* @param fileId Numerical file identifier
* @return Column name or Qualifier
*/
public static String getColumnName(int fileId) {
return Integer.toString(fileId);
}
/**
* Get the archive column name for a file given a FileId.
*
* @param columnName Column name
* @return Related fileId
*/
public static int getFileIdFromColumnName(byte[] columnName) {
return Integer.parseInt(Bytes.toString(columnName));
}
/**
* Get the archive column name for a file given a VariantSource.
*
* @param variantSource VariantSource
* @return Column name or Qualifier
*/
public static String getColumnName(VariantSource variantSource) {
return variantSource.getFileId();
}
public VcfMeta getMeta() {
return meta.get();
}
public byte[] getColumn() {
return column;
}
@Deprecated
public VcfSlice join(byte[] key, Iterable<VcfSlice> input) throws InvalidProtocolBufferException {
Builder sliceBuilder = VcfSlice.newBuilder();
boolean isFirst = true;
List<VcfRecord> vcfRecordLst = new ArrayList<VcfRecord>();
for (VcfSlice slice : input) {
byte[] skey = generateBlockIdAsBytes(slice.getChromosome(), slice.getPosition());
// Consistency check
if (!Bytes.equals(skey, key)) { // Address doesn't match up -> should never happen
throw new IllegalStateException(String.format("Row keys don't match up!!! %s != %s", Bytes.toString(key),
Bytes.toString(skey)));
}
if (isFirst) { // init new slice
sliceBuilder.setChromosome(slice.getChromosome()).setPosition(slice.getPosition());
isFirst = false;
}
vcfRecordLst.addAll(slice.getRecordsList());
}
// Sort records
try {
Collections.sort(vcfRecordLst, getVcfComparator());
} catch (IllegalArgumentException e) {
logger.error("Issue with comparator: ");
for (VcfRecord r : vcfRecordLst) {
logger.error(r.toString());
}
throw e;
}
// Add all
sliceBuilder.addAllRecords(vcfRecordLst);
return sliceBuilder.build();
}
private VcfSlice extractSlice(Put put) throws InvalidProtocolBufferException {
List<Cell> cList = put.get(getColumnFamily(), getColumn());
if (cList.isEmpty()) {
throw new IllegalStateException(String.format("No data available for row % in column %s in familiy %s!!!",
Bytes.toString(put.getRow()), Bytes.toString(getColumn()), Bytes.toString(getColumnFamily())));
}
if (cList.size() > 1) {
throw new IllegalStateException(String.format("One entry instead of %s expected for row %s column %s in familiy %s!!!",
cList.size(), Bytes.toString(put.getRow()), Bytes.toString(getColumn()), Bytes.toString(getColumnFamily())));
}
Cell cell = cList.get(0);
byte[] arr = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
VcfSlice slice = VcfSlice.parseFrom(arr);
return slice;
}
private VcfRecordComparator getVcfComparator() {
return vcfComparator;
}
public byte[] wrap(VcfRecord record) {
return record.toByteArray();
}
public Put wrap(VcfSlice slice) {
// byte[] rowId = generateBlockIdAsBytes(slice.getChromosome(), (long) slice.getPosition() + slice.getRecords(0).getRelativeStart
// () * 100);
byte[] rowId = generateBlockIdAsBytes(slice.getChromosome(), slice.getPosition());
return wrapAsPut(getColumn(), rowId, slice);
}
}