/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.alignment.hbase;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.protobuf.InvalidProtocolBufferException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.opencb.biodata.formats.alignment.io.AlignmentDataReader;
import org.opencb.biodata.models.alignment.Alignment;
import org.opencb.biodata.models.alignment.AlignmentHeader;
import org.opencb.biodata.models.feature.Region;
import org.opencb.commons.containers.map.ObjectMap;
import org.opencb.opencga.storage.alignment.AlignmentSummary;
import org.opencb.opencga.storage.alignment.proto.AlignmentProto;
import org.opencb.opencga.storage.alignment.proto.AlignmentProtoHelper;
import org.xerial.snappy.Snappy;
/**
* Created with IntelliJ IDEA.
* User: jcoll
* Date: 3/6/14
* Time: 6:54 PM
* To change this template use File | Settings | File Templates.
*/
public class AlignmentHBaseDataReader implements AlignmentDataReader<Alignment> {
private final HBaseManager hBaseManager;
private final String tableName, sampleName;
private HTable table;
private Region region;
private boolean regionScannerSet = false; //If region is set, only one scanner may be created.
private int bucketSize;// = AlignmentHBase.ALIGNMENT_BUCKET_SIZE;
private boolean snappyCompress;
private final String columnFamilyName = AlignmentHBase.ALIGNMENT_COLUMN_FAMILY_NAME;
private AlignmentHBaseHeader hbHeader;
private AlignmentHeader header;
private Iterator<AlignmentHeader.SequenceRecord> sequenceDiccionaryIterator;
public AlignmentHBaseDataReader(Configuration config, String tableName, String sampleName) {
hBaseManager = new HBaseManager(config);
this.tableName = tableName;
this.sampleName = sampleName;
}
public AlignmentHBaseDataReader(Properties pro, String tableName, String sampleName) {
hBaseManager = new HBaseManager(pro);
this.tableName = tableName;
this.sampleName = sampleName;
}
@Override
public boolean open() {
return hBaseManager.connect();
}
@Override
public boolean close() {
return hBaseManager.disconnect();
}
@Override
public boolean pre() {
table = hBaseManager.createTable(tableName, columnFamilyName);
Result result;
try {
String startRow = AlignmentHBase.getHeaderRowKey();
Get get = new Get(Bytes.toBytes(startRow));
get.addColumn(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sampleName));
result = table.get(get);
} catch (IOException e) {
e.printStackTrace();
return false;
}
Cell cell = result.listCells().get(0);
try {
String json = new String(CellUtil.cloneValue(cell));
ObjectMap o = new ObjectMap();
Map<String, AlignmentHBaseHeader> map = o.getJsonObjectMapper().readValue(json, new TypeReference<Map<String, AlignmentHBaseHeader>>() { });
this.hbHeader = map.get("header");
this.header = this.hbHeader.getHeader();
this.bucketSize = hbHeader.getBucketSize();
this.snappyCompress = hbHeader.isSnappyCompress();
} catch (IOException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
}
return getNewScanner();
}
@Override
public boolean post() {
return true;
}
@Override
public List<Alignment> read(){
return Arrays.asList(readElem());
}
private List<Alignment> readAlignments = new LinkedList<>();
private String lastRowKey;
private AlignmentSummary summary;
private String chromosome = "0";
private Region regionLimit = null;
public ResultScanner scanner;
public Alignment readElem() {
if(readAlignments.isEmpty()){
Result result;
try {
result = scanner.next();
} catch (IOException ex) {
scanner.close();
return null;
}
if(result == null){
if (getNewScanner()) {
return readElem();
} else {
return null;
}
}
for(Cell cell : result.listCells()){
AlignmentProto.AlignmentBucket bucket;
lastRowKey = new String(CellUtil.cloneRow(cell));
String newChromosome = AlignmentHBase.getChromosomeFromRowkey(lastRowKey);
if (!newChromosome.equals(chromosome)) {
scanner.close();
if (getNewScanner()) {
return readElem();
} else {
return null;
}
}
try {
byte[] value = CellUtil.cloneValue(cell);
byte[] uncompress;
if(snappyCompress){
uncompress = Snappy.uncompress(value);
} else {
uncompress = value;
}
bucket = AlignmentProto.AlignmentBucket.parseFrom(uncompress);
} catch (InvalidProtocolBufferException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
return null;
} catch (IOException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
return null;
}
int index = bucket.getSummaryIndex();
if(summary == null || summary.getIndex() != index) {
getSummary(index);
}
List<Alignment> alignmentList = AlignmentProtoHelper.toAlignmentList(bucket, summary, chromosome, AlignmentHBase.getPositionFromRowkey(lastRowKey, bucketSize));
readAlignments.addAll(alignmentList);
}
}
if(readAlignments.isEmpty()){
return readElem();
} else {
Alignment ret = readAlignments.remove(0);
if(region != null){
if(ret.getStart() > region.getEnd() /*|| ret.getStart() < region.getStart()*/){
ret = null; //If it's out of bounds, return null.
//Don't need to test chromosome, already tested.
}
}
return ret;
}
}
private boolean getNewScanner(){
if(sequenceDiccionaryIterator == null){
sequenceDiccionaryIterator = header.getSequenceDiccionary().iterator();
}
if (sequenceDiccionaryIterator.hasNext()){
int startPosition = 0;
if(scanner != null){
scanner.close();
}
if (region != null) {
if(regionScannerSet){
return false;
} else {
chromosome = region.getChromosome();
startPosition = region.getStart();
regionScannerSet = true;
}
} else {
chromosome = sequenceDiccionaryIterator.next().getSequenceName();
}
summary = null;
String bucketRowkey = AlignmentHBase.getBucketRowkey(chromosome, startPosition, bucketSize);
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(bucketRowkey));
scan.addColumn(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sampleName));
//scan.setMaxVersions(); test this
try {
scanner = table.getScanner(scan);
} catch (IOException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
return false;
}
return true;
} else {
return false;
}
}
@Override
public List<Alignment> read(int batchSize) { //TODO jj: Check
List<Alignment> alignmentList = new LinkedList<>();
// for(int i = 0; i < batchSize; i++){
// alignmentRegionList.add(read());
// }
Alignment alignment;
for(int i = 0; i < batchSize; i++){
alignment = readElem();
if(alignment != null){
alignmentList.add(alignment);
}
}
return alignmentList;
}
@Override
public AlignmentHeader getHeader() {
return header;
}
private void getSummary(int summaryIndex) {
Result result;
Get get;
String rowKey = AlignmentHBase.getSummaryRowkey(chromosome, summaryIndex);
try {
get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sampleName));
result = table.get(get);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("[ERROR] Fail to get Summary from HBase : " + rowKey);
}
for (Cell cell : result.listCells()) {
try {
byte[] value = CellUtil.cloneValue(cell);
byte[] uncompress;
if(snappyCompress){
uncompress = Snappy.uncompress(value);
} else {
uncompress = value;
}
AlignmentProto.Summary summary = AlignmentProto.Summary.parseFrom(uncompress);
this.summary = new AlignmentSummary(summary, summaryIndex);
return;
} catch (InvalidProtocolBufferException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
throw new RuntimeException("[ERROR] Fail to decode Summary from proto : " + rowKey);
} catch (IOException ex) {
Logger.getLogger(AlignmentHBaseDataReader.class.getName()).log(Level.SEVERE, null, ex);
throw new RuntimeException("[ERROR] Missing Summary : " + rowKey);
}
}
}
public String getColumnFamilyName() {
return columnFamilyName;
}
public String getTableName() {
return tableName;
}
public void setRegion(Region region){
this.region = region;
}
}