/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.alignment.hbase;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
import org.apache.hadoop.hbase.util.Bytes;
import org.opencb.biodata.formats.alignment.io.AlignmentDataReader;
import org.opencb.biodata.models.alignment.Alignment;
import org.opencb.biodata.models.alignment.AlignmentHeader;
import org.opencb.biodata.models.alignment.AlignmentRegion;
import org.opencb.commons.containers.map.ObjectMap;
import org.opencb.commons.io.DataWriter;
import org.opencb.opencga.core.auth.MonbaseCredentials;
import org.opencb.opencga.storage.alignment.AlignmentSummary;
import org.opencb.opencga.storage.alignment.AlignmentSummary.AlignmentRegionSummaryBuilder;
import org.opencb.opencga.storage.alignment.proto.AlignmentProto;
import org.opencb.opencga.storage.alignment.proto.AlignmentProtoHelper;
import org.xerial.snappy.Snappy;
/**
* Created with IntelliJ IDEA.
* User: jcoll
* Date: 3/6/14
* Time: 4:48 PM
*/
public class AlignmentRegionHBaseDataWriter implements DataWriter<AlignmentRegion> {
private final HBaseManager hBaseManager;
private final String tableName;
private final AlignmentDataReader reader;
private HTable table;
private final String sample;
private List<Put> puts;
private AlignmentHeader header;
private int alignmentBucketSize = AlignmentHBase.ALIGNMENT_BUCKET_SIZE;
private String columnFamilyName = AlignmentHBase.ALIGNMENT_COLUMN_FAMILY_NAME;
//
List<Alignment> alignmentsRemain = new LinkedList<>();
private int summaryIndex = 0;
private String chromosome = "";
// alignments overlapped along several buckets
private LinkedList<Integer> numBucketsOverlapped = new LinkedList<>();
private long bucketsOverlappedStart = 0;
private boolean snappyCompress = false;
//Summary Fields
private long summarySpace = 0;
private long alignmentsSpace = 0;
private long bucketsWritten = 0;
public AlignmentRegionHBaseDataWriter(Properties props, String tableName, String sampleName, AlignmentDataReader reader) {
hBaseManager = new HBaseManager(props);
this.puts = new LinkedList<>();
this.tableName = tableName;
this.reader = reader;
this.sample = sampleName;
}
public AlignmentRegionHBaseDataWriter(Configuration config, String tableName, String sampleName, AlignmentDataReader reader) {
hBaseManager = new HBaseManager(config);
this.puts = new LinkedList<>();
this.tableName = tableName;
this.reader = reader;
this.sample = sampleName;
}
@Override
public boolean open() {
return hBaseManager.connect();
}
@Override
public boolean close() {
return hBaseManager.disconnect();
}
@Override
public boolean pre() {
table = hBaseManager.createTable(tableName,columnFamilyName); //Creates or get table
header = reader.getHeader();
writeGlobalHeader(header);
return true;
}
@Override
public boolean post() {
try {
//System.out.println("Puteamos la tabla. " + puts.size());
table.put(puts);
puts.clear();
} catch (IOException e) {
e.printStackTrace();
return false;
}
System.out.println("---------------HBase Storage-----------------");
System.out.println("Amoung space stored in Summaries : " + summarySpace);
System.out.println("Amoung space stored in Alignments: " + alignmentsSpace);
System.out.println("Buckets written : " + this.bucketsWritten);
System.out.println("AlignmentBucketSize : " + this.alignmentBucketSize);
System.out.println("Snappy compress : " + this.snappyCompress);
return true;
}
@Override
public boolean write(AlignmentRegion alignmentRegion) {
/*
* 1º Add remaining alignments from last AR
* 2º Cut Alignments from tail
* 3º Split into AlignmentBucket
*
* 4º Create summary
* 5º Create AlignmentProto.AlignmentBucket
* 6º Write into hbase
*
*/
//if(currentBucket != bucketsOverlappedStart)
//Changes chromosome. init and write chromosomeHeader.
//First alignment. Init and writes headers
if(!chromosome.equals(alignmentRegion.getChromosome())){
//There are remaining alignments
if(!alignmentsRemain.isEmpty()){
long remainBucket = alignmentsRemain.get(0).getStart() / alignmentBucketSize;
List<Alignment>[] list = new List[1];
list[0] = alignmentsRemain;
AlignmentSummary summary = createSummary(list);
putSummary(summary);
int overlapped = numBucketsOverlapped.remove((int) (remainBucket - bucketsOverlappedStart));
putBucket(AlignmentProtoHelper.toAlignmentBucketProto(alignmentsRemain, summary, remainBucket * alignmentBucketSize, overlapped), remainBucket);
}
alignmentsRemain.clear();
chromosome = alignmentRegion.getChromosome();
summaryIndex = 0; //Set to 0, only if the summary rowkey has the chromosome.
}
if(alignmentRegion.getAlignments().get(0) == null){
return false;
}
long currentBucket;
if(alignmentsRemain.isEmpty()){ //Only empty when starts new chromosome.
currentBucket = alignmentRegion.getAlignments().get(0).getStart() / alignmentBucketSize;
numBucketsOverlapped.clear();
numBucketsOverlapped.add(0); //First bucket has 0 overlapped;
bucketsOverlappedStart = currentBucket;
} else {
currentBucket = alignmentsRemain.get(0).getStart()/alignmentBucketSize;
}
//1º, 2º, 3º
List<Alignment>[] alignmentBuckets = splitIntoAlignmentBuckets(alignmentRegion);
//4º
AlignmentSummary summary = createSummary(alignmentBuckets);
putSummary(summary);
//5º Create Proto
for(List<Alignment> bucket : alignmentBuckets){
int overlapped = numBucketsOverlapped.remove((int)(currentBucket - bucketsOverlappedStart));
assert((int) currentBucket - bucketsOverlappedStart == 0); //TODO jj: Replace
bucketsOverlappedStart++;
putBucket(AlignmentProtoHelper.toAlignmentBucketProto(bucket, summary, currentBucket * alignmentBucketSize, overlapped), currentBucket);
currentBucket++;
}
//6º Write into hbase
try {
//System.out.println("Puteamos la tabla. " + puts.size()+ " Pos: " + currentBucket*alignmentBucketSize);
table.put(puts);
puts.clear();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
return false;
}
return true;
}
private void writeGlobalHeader(AlignmentHeader header) {
String rowKey = AlignmentHBase.getHeaderRowKey();
Put put = new Put(Bytes.toBytes(rowKey));
AlignmentHBaseHeader hbHeader= new AlignmentHBaseHeader(header, alignmentBucketSize, snappyCompress);
ObjectMap h = new ObjectMap("header", hbHeader);
String headerString = h.toJson();
put.add(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sample), Bytes.toBytes(headerString));
try {
table.put(put);
return;
} catch (InterruptedIOException ex) {
Logger.getLogger(AlignmentRegionHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex);
} catch (RetriesExhaustedWithDetailsException ex) {
Logger.getLogger(AlignmentRegionHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex);
}
throw new RuntimeException("[ERROR] while pushing the AlignmentHeader to HBase");
}
private void putSummary(AlignmentSummary summary){
String rowKey = AlignmentHBase.getSummaryRowkey(chromosome, summary.getIndex());
Put put = new Put(Bytes.toBytes(rowKey));
byte[] compress;
try {
AlignmentProto.Summary toProto = summary.toProto();
compress = toProto.toByteArray();
if(snappyCompress){
compress = Snappy.compress(compress);
}
summarySpace+=compress.length;
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("[ERROR] this AlignmentProto.Summary " + rowKey + " could not be compressed by snappy");
}
put.add(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sample), compress);
puts.add(put);
}
private void putBucket(AlignmentProto.AlignmentBucket alignmentBucket, long index){
if(alignmentBucket == null)
return;
String rowKey = AlignmentHBase.getBucketRowkey(chromosome,index);
//System.out.println("Creamos un Put() con rowKey " + rowKey);
Put put = new Put(Bytes.toBytes(rowKey));
byte[] compress = alignmentBucket.toByteArray();
try {
if(snappyCompress){
compress = Snappy.compress(compress);
}
alignmentsSpace+=compress.length;
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("[ERROR] this AlignmentProto.AlignmentBucket " + rowKey + " could not be compressed by snappy");
}
bucketsWritten++;
put.add(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sample), compress);
puts.add(put);
}
private List<Alignment>[] splitIntoAlignmentBuckets(AlignmentRegion alignmentRegion){
//1º Add remaining alignments.
List<Alignment> alignments = alignmentsRemain;
alignments.addAll(alignmentRegion.getAlignments());
//2º Cut alignments from tail.
alignmentsRemain = new LinkedList<>();
Alignment alignmentAux = alignments.remove(alignments.size()-1); //Remove last
alignmentsRemain.add(0, alignmentAux);
long firstBucket = alignments.get(0).getStart()/alignmentBucketSize;
long lastBucket = alignmentAux.getStart()/alignmentBucketSize;
while(alignments.size() != 0){
if(alignments.get(alignments.size()-1).getStart()/alignmentBucketSize != lastBucket){
break;
} else {
alignmentsRemain.add(0, alignments.remove(alignments.size() - 1)); //Remove last
}
}
lastBucket = alignments.get(alignments.size() - 1).getStart()/alignmentBucketSize;
//3º Split in AlignmentBuckets
List<Alignment>[] alignmentBuckets = new List[(int)(lastBucket - firstBucket + 1)];
long bucketEnd = (firstBucket+1)*alignmentBucketSize;
int i = 0;
alignmentBuckets[i] = new LinkedList<Alignment>();
for(Alignment alignment : alignments){
if(alignment.getStart() > bucketEnd){
i++;
bucketEnd+=alignmentBucketSize;
if(alignment.getStart()/alignmentBucketSize != i+firstBucket){
//System.out.println("Cuidado!");
i = (int)(alignment.getStart()/alignmentBucketSize-firstBucket);
bucketEnd = (1+i+firstBucket)*alignmentBucketSize;
}
alignmentBuckets[i] = new LinkedList<Alignment>();
}
alignmentBuckets[i].add(alignment);
}
return alignmentBuckets;
}
private AlignmentSummary createSummary(List<Alignment>[] alignmentBuckets){
//4º Create Summary
AlignmentRegionSummaryBuilder builder = new AlignmentRegionSummaryBuilder(summaryIndex++);
int i = 0;
while(alignmentBuckets[i] == null){
i++;
if(i > alignmentBuckets.length){
System.out.println("TODO! FIXME! PAINFULL!! AlignmentRegionHBaseDataWriter.createSummary(List<Alignment>[] alignmentBuckets)");
return builder.build();//Empty Summary
}
}
long currentBucket = alignmentBuckets[i].get(0).getStart()/alignmentBucketSize;
long lastOverlappedPosition = alignmentBuckets[0].get(0).getUnclippedEnd();
for(List<Alignment> bucket : alignmentBuckets){
//System.out.println(currentBucket + " " + bucketsOverlappedStart);
builder.addOverlappedBucket(numBucketsOverlapped.get((int)(currentBucket - bucketsOverlappedStart)));
lastOverlappedPosition = 0;
if(bucket != null){
for(Alignment alignment : bucket){
builder.addAlignment(alignment);
lastOverlappedPosition = ((lastOverlappedPosition > alignment.getUnclippedEnd()) ? lastOverlappedPosition : alignment.getUnclippedEnd()); // max
}
} else {
lastOverlappedPosition = currentBucket*alignmentBucketSize;
}
/**
* Updates the array of overlaps.
* An overlap of 2 in bucket 7 means that some alignment in
* bucket 7-2=5 is long enough to end in bucket 7.
*/
for (i = 0; i <= lastOverlappedPosition/alignmentBucketSize - currentBucket; i++) { // write bucket overlaps
if(numBucketsOverlapped.size() < (currentBucket + i - bucketsOverlappedStart)){
int previousOverlap = numBucketsOverlapped.get((int) (currentBucket + i- bucketsOverlappedStart)); // get overlap already stored
if (previousOverlap < i) {
numBucketsOverlapped.set((int)( currentBucket + i - bucketsOverlappedStart), i);
}
} else {
numBucketsOverlapped.add(i);
}
}
currentBucket++;
}
return builder.build();
}
@Override
public boolean write(List<AlignmentRegion> alignmentRegions) {
for(AlignmentRegion alignmentRegion : alignmentRegions){
if(!write(alignmentRegion)){
return false;
}
}
return true;
}
public String getSample() {
return sample;
}
public String getTableName() {
return tableName;
}
public String getColumnFamilyName() {
return columnFamilyName;
}
public void setColumnFamilyName(String columnFamilyName) {
this.columnFamilyName = columnFamilyName;
}
}