package com.ctriposs.sdb.merge;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.concurrent.CountDownLatch;
import com.ctriposs.sdb.stats.SDBStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.ctriposs.sdb.LevelQueue;
import com.ctriposs.sdb.SDB;
import com.ctriposs.sdb.table.AbstractMapTable;
import com.ctriposs.sdb.table.AbstractSortedMapTable;
import com.ctriposs.sdb.table.FCMapTable;
import com.ctriposs.sdb.table.IMapEntry;
import com.ctriposs.sdb.utils.BytesUtil;
import com.ctriposs.sdb.utils.DateFormatter;
/**
* Level 1 to 2 merge sorting thread
*
* @author bulldog
*
*/
public class Level1Merger extends Thread {
static final Logger log = LoggerFactory.getLogger(Level1Merger.class);
private static final int MAX_SLEEP_TIME = 5 * 1000; // 5 seconds
private static final int DEFAULT_MERGE_WAYS = 4; // 4 way merge
private static final int CACHED_MAP_ENTRIES = 32;
private List<LevelQueue> levelQueueList;
private SDB sdb;
private final SDBStats stats;
private volatile boolean stop = false;
private CountDownLatch countDownLatch;
private short shard;
public Level1Merger(SDB sdb, List<LevelQueue> levelQueueList, CountDownLatch countDownLatch, short shard,
SDBStats stats) {
this.sdb = sdb;
this.levelQueueList = levelQueueList;
this.countDownLatch = countDownLatch;
this.shard = shard;
this.stats = stats;
}
@Override
public void run() {
while(!stop) {
try {
boolean merged = false;
LevelQueue lq1 = levelQueueList.get(SDB.LEVEL1);
LevelQueue lq2 = levelQueueList.get(SDB.LEVEL2);
boolean hasLevel2MapTable = lq2.size() > 0;
if ((!hasLevel2MapTable && lq1.size() >= DEFAULT_MERGE_WAYS) ||
(hasLevel2MapTable && lq1.size() >= DEFAULT_MERGE_WAYS - 1)) {
log.info("Start running level 1 merging at " + DateFormatter.formatCurrentDate());
log.info("Current queue size at level 1 is " + lq1.size());
log.info("Current queue size at level 2 is " + lq2.size());
long start = System.nanoTime();
mergeSort(lq1, lq2, DEFAULT_MERGE_WAYS, sdb.getDir(), shard);
stats.recordMerging(SDB.LEVEL1, System.nanoTime() - start);
merged = true;
log.info("End running level 1 to 2 merging at " + DateFormatter.formatCurrentDate());
}
if (!merged) {
Thread.sleep(MAX_SLEEP_TIME);
}
} catch (Exception ex) {
log.error("Error occured in the level 1 to 2 merger", ex);
}
}
this.countDownLatch.countDown();
log.info("Stopped level 1 to 2 merge thread " + this.getName());
}
public static void mergeSort(LevelQueue lq1, LevelQueue lq2, int ways, String dir, short shard)
throws IOException, ClassNotFoundException {
boolean hasLevel2MapTable = lq2.size() > 0;
List<AbstractMapTable> tables = new ArrayList<AbstractMapTable>(ways);
lq1.getReadLock().lock();
try {
Iterator<AbstractMapTable> iter = lq1.descendingIterator();
for(int i = 0; i < ways - 1; i++) {
tables.add(iter.next());
}
if (hasLevel2MapTable) {
tables.add(lq2.get(0));
} else {
tables.add(iter.next());
}
} finally {
lq1.getReadLock().unlock();
}
long expectedInsertions = 0;
for(AbstractMapTable table : tables) {
expectedInsertions += table.getAppendedSize();
}
if (expectedInsertions > Integer.MAX_VALUE) expectedInsertions = Integer.MAX_VALUE;
// target table
AbstractSortedMapTable sortedMapTable = new FCMapTable(dir, shard, SDB.LEVEL2, System.nanoTime(), (int)expectedInsertions);
PriorityQueue<QueueElement> pq = new PriorityQueue<QueueElement>();
// build initial heap
for(AbstractMapTable table : tables) {
QueueElement qe = new QueueElement();
qe.sortedMapTable = table;
qe.size = qe.sortedMapTable.getAppendedSize();
qe.index = 0;
qe.queue = new LinkedList<IMapEntry>();
IMapEntry me = qe.getNextMapEntry();
if (me != null) {
qe.key = me.getKey();
qe.mapEntry = me;
qe.keyHash = me.getKeyHash();
pq.add(qe);
}
}
LinkedList<IMapEntry> targetCacheQueue = new LinkedList<IMapEntry>();
// merge sort
while(pq.size() > 0) {
QueueElement qe1 = pq.poll();
// remove old/stale entries
while(pq.peek() != null && qe1.keyHash == pq.peek().keyHash && BytesUtil.compare(qe1.key, pq.peek().key) == 0) {
QueueElement qe2 = pq.poll();
IMapEntry me = qe2.getNextMapEntry();
if (me != null) {
qe2.key = me.getKey();
qe2.mapEntry = me;
qe2.keyHash = me.getKeyHash();
pq.add(qe2);
}
}
// remove deleted or expired entries in final merge sorting
if (!qe1.mapEntry.isDeleted() && !qe1.mapEntry.isExpired()) {
targetCacheQueue.add(qe1.mapEntry);
}
if (targetCacheQueue.size() >= CACHED_MAP_ENTRIES * DEFAULT_MERGE_WAYS) {
while(targetCacheQueue.size() > 0) {
IMapEntry mapEntry = targetCacheQueue.poll();
byte[] value = mapEntry.getValue();
// disk space optimization
if (mapEntry.isExpired()) {
continue;
}
sortedMapTable.appendNew(mapEntry.getKey(), mapEntry.getKeyHash(), value, mapEntry.getTimeToLive(),
mapEntry.getCreatedTime(), mapEntry.isDeleted(), mapEntry.isCompressed());
}
}
IMapEntry me = qe1.getNextMapEntry();
if (me != null) {
qe1.key = me.getKey();
qe1.mapEntry = me;
qe1.keyHash = me.getKeyHash();
pq.add(qe1);
}
}
// remaining cached entries
while(targetCacheQueue.size() > 0) {
IMapEntry mapEntry = targetCacheQueue.poll();
byte[] value = mapEntry.getValue();
// disk space optimization
if (mapEntry.isExpired()) {
continue;
}
sortedMapTable.appendNew(mapEntry.getKey(), mapEntry.getKeyHash(), value, mapEntry.getTimeToLive(),
mapEntry.getCreatedTime(), mapEntry.isDeleted(), mapEntry.isCompressed());
}
// persist metadata
sortedMapTable.reMap();
sortedMapTable.saveMetadata();
// switching
lq1.getWriteLock().lock();
lq2.getWriteLock().lock();
try {
for(int i = 0; i < ways - 1; i++) {
lq1.removeLast();
}
if (hasLevel2MapTable) {
lq2.removeLast();
} else {
lq1.removeLast();
}
for(AbstractMapTable table : tables) {
table.markUsable(false);
}
sortedMapTable.markUsable(true);
lq2.addFirst(sortedMapTable);
} finally {
lq2.getWriteLock().unlock();
lq1.getWriteLock().unlock();
}
for(AbstractMapTable table : tables) {
table.close();
table.delete();
}
}
public void setStop() {
this.stop = true;
log.info("Stopping level 1 to 2 merge thread " + this.getName());
}
static class QueueElement implements Comparable<QueueElement> {
AbstractMapTable sortedMapTable;
long size;
int index;
byte[] key;
int keyHash;
IMapEntry mapEntry;
LinkedList<IMapEntry> queue;
// cache optimization
public IMapEntry getNextMapEntry() throws IOException {
IMapEntry me = queue.poll();
if (me != null) return me;
if (me == null) {
int count = 0;
while(index < size && count < CACHED_MAP_ENTRIES) {
IMapEntry mapEntry = sortedMapTable.getMapEntry(index);
// eager loading
mapEntry.getKey();
mapEntry.getValue();
mapEntry.getTimeToLive();
mapEntry.getCreatedTime();
queue.add(mapEntry);
index++;
count++;
}
}
return queue.poll();
}
@Override
public int compareTo(QueueElement other) {
if (keyHash < other.keyHash) return -1;
else if (keyHash > other.keyHash) return 1;
else {
if (BytesUtil.compare(key, other.key) < 0) {
return -1;
} else if (BytesUtil.compare(key, other.key) > 0) {
return 1;
} else {
if (this.sortedMapTable.getLevel() == SDB.LEVEL1 && other.sortedMapTable.getLevel() == SDB.LEVEL1) {
if (sortedMapTable.getCreatedTime() > other.sortedMapTable.getCreatedTime()) {
return -1;
} else if (sortedMapTable.getCreatedTime() < other.sortedMapTable.getCreatedTime()) {
return 1;
} else {
return 0;
}
} else {
if (this.sortedMapTable.getLevel() == SDB.LEVEL1) return -1;
else return 1;
}
}
}
}
}
}