package org.apache.hadoop.hbase.regionserver.memstore;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import parquet.column.ColumnDescriptor;
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;
import parquet.schema.Type;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicLong;
/**
*
* Created by wangxiaoyi on 15/5/6.
*
* implement memstore for mutation
*
*/
public class PMemStoreImpl implements PMemStore{
private static final Log LOG = LogFactory.getLog(PMemStore.class);
public final static long FIXED_OVERHEAD = ClassSize.align(
ClassSize.OBJECT + (4 * ClassSize.REFERENCE) + (2 * Bytes.SIZEOF_LONG));
public final static long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD +
ClassSize.ATOMIC_LONG + (0 * ClassSize.TIMERANGE_TRACKER) +
(0 * ClassSize.CELL_SKIPLIST_SET) + (2 * ClassSize.CONCURRENT_SKIPLISTMAP));
private Configuration conf;
private volatile Map<byte[], Mutation> rowInMem;
private volatile Map<byte[], Mutation> snapshotRowInMem;
private volatile byte[] startkey = null;
private volatile byte[] endkey = null;
// Used to track own heapSize
private AtomicLong memstoreSize;
private volatile long snapshotSize;
// Used to track when to flush
volatile long timeOfOldestEdit = Long.MAX_VALUE;
volatile long snapshotId;
public PMemStoreImpl(Configuration conf){
this.conf = conf;
rowInMem = new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);
snapshotRowInMem = new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);
memstoreSize = new AtomicLong(DEEP_OVERHEAD);
snapshotSize = 0;
}
/**
* insert mutation into memstore
* @param m
* @return
*/
@Override
public long add(Mutation m) throws IOException{
Mutation mutation = rowInMem.get(m.getRow());
if(mutation != null){
if(m instanceof Put)
((Put)mutation).mergePut((Put) m);
else {
//TODO: other mutation different merge function
}
}else {
//TODO : make a efficient implementation
if(startkey == null){
startkey = m.getRow();
}else {
if(Bytes.compareTo(m.getRow(), startkey) < 0){
startkey = m.getRow();
}
}
if(endkey == null){
endkey = m.getRow();
}else {
if(Bytes.compareTo(endkey, m.getRow()) < 0){
endkey = m.getRow();
}
}
rowInMem.put(m.getRow(), m);
}
memstoreSize.getAndAdd(m.heapSize());
setOldestEditTimeToNow();
return m.heapSize();
}
/**
* get row from the memstore
*
* @param row
*/
@Override
public Mutation get(byte[] row) {
Mutation m = rowInMem.get(row);
return m;
}
/**
* @return num in memory
*/
public int getRecordCount(){
return rowInMem.size();
}
/**
* Write a delete
*
* @param m
* @return approximate size of the passed Mutation
*/
@Override
public long delete(Mutation m) {
if(m == null || rowInMem.get(m.getRow()) == null)
return 0;
else {
rowInMem.remove(m.getRow());
}
memstoreSize.getAndSet(memstoreSize.get() - m.heapSize());
setOldestEditTimeToNow();
return m.heapSize();
}
/**
* Creates a snapshot of the current memstore. Snapshot must be cleared by call to
* {@link #clearSnapshot(long)}.
*
* @return {@link PMemStoreSnapshot}
*/
@Override
public PMemStoreSnapshot snapshot() {
if (!this.snapshotRowInMem.isEmpty()) {
//snapshotRowInMem.clear();
LOG.warn("Snapshot called again without clearing previous. " +
"Doing nothing. Another ongoing flush or did we fail last attempt?");
return null;
}else {
snapshotId = EnvironmentEdgeManager.currentTime();
this.snapshotSize = dataSize();
if(! rowInMem.isEmpty()){
this.snapshotRowInMem = this.rowInMem;
this.rowInMem = new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);
this.memstoreSize.set(DEEP_OVERHEAD);
timeOfOldestEdit = Long.MAX_VALUE;
}
PMemStoreSnapshot snapshot = new PMemStoreSnapshot(snapshotId,
snapshotRowInMem.size(),
snapshotSize,
getScanner(this.snapshotRowInMem, null),
startkey, endkey);
this.startkey = null;
this.endkey = null;
return snapshot;
}
}
/**
* Clears the current snapshot of the Memstore.
*
* @param id
* @throws UnexpectedStateException
* @see #snapshot()
*/
@Override
public void clearSnapshot(long id) throws UnexpectedStateException {
//MemStoreLAB tmpAllocator = null;
if (this.snapshotId != id) {
throw new UnexpectedStateException("Current snapshot id is " + this.snapshotId + ",passed "
+ id);
}
// OK. Passed in snapshot is same as current snapshot. If not-empty,
// create a new snapshot and let the old one go.
if (!this.snapshotRowInMem.isEmpty()) {
this.snapshotRowInMem = new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);
}
this.snapshotSize = 0l;
this.snapshotId = -1l;
}
public long getCurrSnapshotId(){
return snapshotId;
}
/**
* @return Approximate 'exclusive deep size' of implementing object. Includes
* count of payload and hosting object sizings.
*/
@Override
public long heapSize() {
return this.memstoreSize.get();
}
@Override
public long getFlushableSize() {
return this.snapshotSize > 0 ? snapshotSize : size();
}
/**
* @return Oldest timestamp of all the Mutations in the MemStore
*/
@Override
public long timeOfOldestEdit() {
return this.timeOfOldestEdit;
}
@Override
public byte[] getStartKey() {
return this.startkey;
}
@Override
public byte[] getEndKey() {
return this.endkey;
}
/**
* @return Total memory occupied by this MemStore.
*/
@Override
public long size() {
return heapSize();
}
public long dataSize(){
return size() - DEEP_OVERHEAD;
}
void setOldestEditTimeToNow() {
if (timeOfOldestEdit == Long.MAX_VALUE) {
timeOfOldestEdit = EnvironmentEdgeManager.currentTime();
}
}
/**
* create scanner for {@link PMemStore}
*
* @return {@link PMemStoreScanner}
*/
@Override
public RowScanner getScanner(Scan scan) {
return new PMemStoreScanner(this.rowInMem, scan);
}
public RecordScanner getSnapshotScanner(Scan scan){
return new PMemStoreScanner(snapshotRowInMem, scan);
}
public RowScanner getScanner(Map<byte[], Mutation> rowInMem, Scan scan) {
return new PMemStoreScanner(rowInMem, scan);
}
/**
* return the scanner heap
* of current kv scanner and snapshot scanner
* @param startkey
* @return
*/
public ScannerHeap getRecordScanner(byte[] startkey, Scan scan){
List<RecordScanner> scanners = new LinkedList<>();
if( ! snapshotRowInMem.isEmpty() ) {
PMemStoreScanner snapshotScanner = new PMemStoreScanner(snapshotRowInMem, scan);
snapshotScanner.seek(startkey);
scanners.add(snapshotScanner);
}
if( ! rowInMem.isEmpty() ){
PMemStoreScanner memStoreScanner = new PMemStoreScanner(rowInMem, scan);
memStoreScanner.seek(startkey);
scanners.add(memStoreScanner);
}
ScannerHeap heap = null;
try {
heap = new ScannerHeap(scanners, new RecordScannerComparator());
}catch (IOException ioe){
LOG.error("create record scanner error : " + ioe);
}finally {
return heap;
}
}
void dump() {
for(Map.Entry<byte[], Mutation> en : rowInMem.entrySet()){
try {
LOG.info(((Mutation) en.getValue()).toJSON());
}catch (IOException ioe){
LOG.error(ioe);
}
}
}
/**
* row scanner for {@link PMemStore}
*/
class PMemStoreScanner implements RowScanner, InternalRecordScanner{
// private final Logger LOG = LoggerFactory.getLogger(PMemStoreScanner.class);
private byte[] curr = null;
private byte[] next = null;
private Iterator<byte []> it =null;
private int countLeft = 0;
private List<byte[]> filterColumns = new LinkedList<>();
private Map<byte[], Mutation> rowInMem;
public PMemStoreScanner(Map<byte[], Mutation> rowInMem, Scan scan){
this.rowInMem = rowInMem;
countLeft = rowInMem.size();
initScanFilter(scan);
seek();
}
/**
* init the scan filter with the read schema
* @param scan
*/
public void initScanFilter(Scan scan){
String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
try {
if (scan != null && schema != null && !schema.isEmpty()) {
MessageType readSchema = MessageTypeParser.parseMessageType(schema);
//readSchema.getFields();
List<Type> types = readSchema.getFields();
for(Type type : types){
String columnName = type.getName();
if(columnName.startsWith("cf"))// fetch the real column name
columnName = columnName.substring(3);
filterColumns.add(columnName.getBytes());
}
}
}catch (Exception e){
//TODO: send the exception back to the client
LOG.error("parse the message schema error" + e);
}
}
/**
* seek the query row
* @param row
*/
public void seek(byte[] row){
if(Bytes.compareTo(row, HConstants.EMPTY_START_ROW) == 0
|| row == null
|| rowInMem == null
|| rowInMem.size() == 0)
return;
Set<byte []> rows = rowInMem.keySet();
it = rows.iterator();
boolean seekEd = false;
while (it.hasNext()){
curr = it.next();
if(Bytes.compareTo(curr, row) >= 0){
seekEd = true;
break;
}
}
if(it.hasNext()){
next = it.next();
}
if(!seekEd){//查询数据不再该范围
curr = null;
next = null;
}
}
/**
* init use
*/
public void seek(){
if(rowInMem == null || rowInMem.size() == 0) return;
Set<byte []> rows = rowInMem.keySet();
it = rows.iterator();
int count = 1;
while (it.hasNext()){
if(count == 1) {
curr = it.next();
}
if (count == 2){
next = it.next();
break;
}
count ++;
}
}
/**
* has next row
* @return
*/
public boolean hasNext(){
if(rowInMem == null || rowInMem.size() == 0){
curr = null;
}
return curr == null ? false : true;
}
/**
* return the next row
* @return
*/
public Mutation nextRow(){
Mutation m = rowInMem.get(curr);
curr = next;
next = it.hasNext() ? it.next() : null;
countLeft --;
return m;
}
/**
* @return max result count left of this scanner
*/
@Override
public long getMaxResultsCount() {
return countLeft;
}
/**
* @return total records' count of this scanner
*/
@Override
public long getRecordCount() {
return rowInMem.size();
}
/**
* @return start key of this scanner
*/
@Override
public byte[] getStartKey() {//todo
return startkey;
}
/**
* return record
*/
@Override
public List<Cell> next() {
List<Cell> cells = new LinkedList<>();
Mutation m = nextRow();
try {
if(m != null){
CellScanner scanner = m.cellScanner();
while (scanner.advance()){
Cell cell = scanner.current();
if(match(cell))
cells.add(cell);
}
}
}catch (IOException ioe){
LOG.error(ioe);
}
return cells;
}
/**
* judge whether the cell is accepted by the readSchema
* @param cell
* @return
*/
private boolean match(Cell cell){
if(filterColumns.isEmpty()) return true;
else {
boolean isMatched = false;
for(byte[] column : filterColumns){
if(CellUtil.matchingQualifier(cell, column)) {
isMatched = true;
return isMatched;
}
}
return isMatched;
}
}
/**
* @return end key of this scanner
*/
@Override
public byte[] getEndKey() {
return endkey;
}
/**
* don't iterate just
*
* @return first element of the scanner
*/
@Override
public List<Cell> peek() {
if(curr != null) {
Mutation m = rowInMem.get(curr);
List<Cell> cells = new LinkedList<>();
try {
if (m != null) {
CellScanner scanner = m.cellScanner();
while (scanner.advance()) {
Cell cell = scanner.current();
cells.add(cell);
}
}
} catch (IOException ioe) {
LOG.error(ioe);
}
return cells;
}else {
return new LinkedList<>();
}
}
/**
* Closes this stream and releases any system resources associated
* with it. If the stream is already closed then invoking this
* method has no effect.
*
* @throws IOException if an I/O error occurs
*/
@Override
public void close() throws IOException {
curr = null;
next = null;
}
}
public static void main(String []args){
MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式
" message people { " +
"required binary rowkey;" +
"required binary cf:name;" +
"required binary cf:age;" +
"required int64 timestamp;"+
" }");
}
}