/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.alignment.hbase; import java.io.IOException; import java.io.InterruptedIOException; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException; import org.apache.hadoop.hbase.util.Bytes; import org.opencb.biodata.models.alignment.AlignmentRegion; import org.opencb.biodata.models.alignment.stats.MeanCoverage; import org.opencb.biodata.models.alignment.stats.RegionCoverage; import org.opencb.commons.io.DataWriter; import org.opencb.opencga.storage.alignment.proto.AlignmentProto; /** * Created with IntelliJ IDEA. * User: jcoll * Date: 2/20/14 * Time: 4:58 PM * To change this template use File | Settings | File Templates. */ public class AlignmentRegionCoverageHBaseDataWriter implements DataWriter<AlignmentRegion> { private final HBaseManager hBaseManager; private HTable table; private final String tableName; private final String sample; private String columnFamilyName = AlignmentHBase.ALIGNMENT_COVERAGE_COLUMN_FAMILY_NAME; private List<Put> puts; private int bucketSize = AlignmentHBase.ALIGNMENT_BUCKET_SIZE; private AlignmentProto.Coverage.Builder coverageBuilder; private long coverageStart; private long coverageEnd; private String chromosome = ""; private int a, c, g, t; //Counters for null values in coverage. public AlignmentRegionCoverageHBaseDataWriter(Configuration config, String tableName, String sampleName) { this.puts = new LinkedList<>(); this.tableName = tableName; this.sample = sampleName; this.hBaseManager = new HBaseManager(config); } public AlignmentRegionCoverageHBaseDataWriter(Properties props, String tableName, String sampleName) { this.puts = new LinkedList<>(); this.tableName = tableName; this.sample = sampleName; this.hBaseManager = new HBaseManager(props); } // public AlignmentRegionCoverageHBaseDataWriter(MonbaseCredentials credentials, String tableName) { // // HBase configuration // config = HBaseConfiguration.create(); // config.set("hbase.master", credentials.getHbaseMasterHost() + ":" + credentials.getHbaseMasterPort()); // config.set("hbase.zookeeper.quorum", credentials.getHbaseZookeeperQuorum()); // config.set("hbase.zookeeper.property.clientPort", String.valueOf(credentials.getHbaseZookeeperClientPort())); // // this.puts = new LinkedList<>(); // this.tableName = tableName; // } @Override public boolean open() { return hBaseManager.connect(); } @Override public boolean close() { return hBaseManager.disconnect(); } @Override public boolean pre() { table = hBaseManager.createTable(tableName,columnFamilyName); //Creates or get table return true; } @Override public boolean post() { if(coverageBuilder != null){ putCoverage(); } try { table.put(puts); puts.clear(); return true; } catch (InterruptedIOException ex) { Logger.getLogger(AlignmentRegionCoverageHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex); } catch (RetriesExhaustedWithDetailsException ex) { Logger.getLogger(AlignmentRegionCoverageHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex); } return false; } @Override public boolean write(AlignmentRegion alignmentRegion) { //System.out.println("Write " + alignmentRegion.getStart() + "Elements " + alignmentRegion.getCoverage().getAll().length); RegionCoverage regionCoverage = alignmentRegion.getCoverage(); if(coverageBuilder == null){ this.initBuilder(regionCoverage, 0); } //Check if it's the same range that if(regionCoverage.getStart() > coverageEnd || !regionCoverage.getChromosome().equals(chromosome)){ this.putCoverage(); this.initBuilder(regionCoverage, 0); } //write RegionCoverage per nucleotide for(int i = 0; i < regionCoverage.getAll().length; i++){ if(regionCoverage.getStart()+i > coverageEnd){ this.putCoverage(); this.initBuilder(regionCoverage, i); } if (regionCoverage.getA()[i] != 0) { coverageBuilder.addA(a); coverageBuilder.addA(regionCoverage.getA()[i]); a = 0; } else { a++; } if (regionCoverage.getC()[i] != 0) { coverageBuilder.addC(c); coverageBuilder.addC(regionCoverage.getC()[i]); c=0; } else { c++; } if (regionCoverage.getG()[i] != 0) { coverageBuilder.addG(g); coverageBuilder.addG(regionCoverage.getG()[i]); g=0; } else { g++; }if (regionCoverage.getT()[i] != 0) { coverageBuilder.addT(t); coverageBuilder.addT(regionCoverage.getT()[i]); t=0; } else { t++; } // coverageBuilder.addA (regionCoverage.getA()[i]); // coverageBuilder.addC (regionCoverage.getC()[i]); // coverageBuilder.addG (regionCoverage.getG()[i]); // coverageBuilder.addT (regionCoverage.getT()[i]); coverageBuilder.addAll(regionCoverage.getAll()[i]); } //write mean coverage for(MeanCoverage meanCoverage : alignmentRegion.getMeanCoverage()){ String coverageName = meanCoverage.getName(); float[] coverage = meanCoverage.getCoverage(); for(int i = 0; i < coverage.length; i++){ Put put = new Put(Bytes.toBytes(AlignmentHBase.getMeanCoverageRowKey(chromosome, coverageName, (meanCoverage.getInitPosition()/meanCoverage.getSize()) + i))); String columnName = sample; byte[] value = AlignmentProto.MeanCoverage.newBuilder().setCoverage(coverage[i]).build().toByteArray(); put.add(Bytes.toBytes(columnFamilyName), Bytes.toBytes(columnName), value); puts.add(put); // System.out.println("value.length = " + value.length); // try { // float l; // System.out.println("Snappy(value).length = " + (l=Snappy.compress(value).length)); // System.out.println("value/snappy = " + value.length/l); // } catch (IOException ex) { // Logger.getLogger(AlignmentRegionCoverageHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex); // } } } // System.out.println("End Write"); try { table.put(puts); puts.clear(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. return false; } // System.out.println("Table.Puts"); return true; } @Override public boolean write(List<AlignmentRegion> alignmentRegions) { for(AlignmentRegion alignmentRegion : alignmentRegions){ if(!write(alignmentRegion)){ return false; } } return true; } private void initBuilder(RegionCoverage regionCoverage, int offset) { coverageBuilder = AlignmentProto.Coverage.newBuilder(); coverageStart = ((regionCoverage.getStart() + offset) / bucketSize) * bucketSize; coverageEnd = coverageStart + bucketSize; chromosome = regionCoverage.getChromosome(); a = c = g = t = 0; } private void putCoverage() { byte[] value; Put put = new Put(Bytes.toBytes(AlignmentHBase.getBucketRowkey(chromosome, coverageStart, bucketSize))); put.add(Bytes.toBytes(columnFamilyName), Bytes.toBytes(sample), value = coverageBuilder.build().toByteArray()); puts.add(put); // System.out.println("value.length = " + value.length); // try { // float l; // System.out.println("Snappy(value).length = " + (l = Snappy.compress(value).length)); // System.out.println("value/snappy = " + value.length / l); // } catch (IOException ex) { // Logger.getLogger(AlignmentRegionCoverageHBaseDataWriter.class.getName()).log(Level.SEVERE, null, ex); // } } public String getSample() { return sample; } public String getTableName() { return tableName; } public String getColumnFamilyName() { return columnFamilyName; } public void setColumnFamilyName(String columnFamilyName) { this.columnFamilyName = columnFamilyName; } }