/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.store.common;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.zip.GZIPInputStream;
import com.addthis.basis.io.GZOut;
import com.addthis.basis.util.MemoryCounter;
import com.addthis.basis.util.Parameter;
import com.addthis.basis.util.Varint;
import com.addthis.codec.codables.BytesCodable;
import com.addthis.hydra.store.kv.KeyCoder;
import com.addthis.hydra.store.kv.PageEncodeType;
import com.addthis.hydra.store.skiplist.LockMode;
import com.addthis.hydra.store.skiplist.SkipListCache;
import com.google.common.base.Throwables;
import com.jcraft.jzlib.Deflater;
import com.jcraft.jzlib.DeflaterOutputStream;
import com.jcraft.jzlib.InflaterInputStream;
import com.ning.compress.lzf.LZFInputStream;
import com.ning.compress.lzf.LZFOutputStream;
import com.yammer.metrics.core.Histogram;
import org.xerial.snappy.SnappyInputStream;
import org.xerial.snappy.SnappyOutputStream;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.ByteBufOutputStream;
import io.netty.buffer.Unpooled;
public abstract class AbstractPage<K, V extends BytesCodable> implements Page<K, V> {
public static final int gzlevel = Parameter.intValue("eps.gz.level", 1);
public static final int gztype = Parameter.intValue("eps.gz.type", 1);
public static final int gzbuf = Parameter.intValue("eps.gz.buffer", 1024);
public static final int estimateMissingFactor = Parameter.intValue("eps.mem.estimate.missing.factor", 8);
public static final int memEstimationStrategy = Parameter.intValue("eps.mem.estimate.method", 1);
public static final int estimateRollMin = Parameter.intValue("eps.mem.estimate.roll.min", 1000);
public static final int estimateRollFactor = Parameter.intValue("eps.mem.estimate.roll.factor", 100);
protected final AbstractPageCache<K, V> parent;
public final K firstKey;
public K nextFirstKey;
/**
* This value is updated each time the node is accessed.
*/
volatile long timeStamp;
int size;
@Nullable
private ArrayList<K> keys;
@Nullable
private ArrayList<V> values;
@Nullable
private ArrayList<byte[]> rawValues;
@Nonnull
private ExternalMode state;
private int estimateTotal, estimates, avgEntrySize;
private int memoryEstimate;
private PageEncodeType encodeType;
public static final int ESTIMATES_BIT_OFFSET = 4;
public static final int TYPE_BIT_OFFSET = 5;
public static final int FLAGS_HAS_ESTIMATES = 1 << ESTIMATES_BIT_OFFSET;
public final KeyCoder<K, V> keyCoder;
private final ReentrantReadWriteLock lock;
/**
* This value is incremented each time the write lock
* is released.
*/
@GuardedBy("lock")
long writeStamp;
@Override
public long getWriteStamp() {
return writeStamp;
}
public void incrementWriteStamp() {
writeStamp++;
}
public AbstractPage(AbstractPageCache<K, V> cache, K firstKey, K nextFirstKey, PageEncodeType encodeType) {
this.parent = cache;
this.keyCoder = parent != null ? parent.keyCoder : null;
this.firstKey = firstKey;
this.nextFirstKey = nextFirstKey;
this.timeStamp = AbstractPageCache.generateTimestamp();
this.state = ExternalMode.DISK_MEMORY_IDENTICAL;
this.encodeType = encodeType;
this.lock = initLock();
}
public ReentrantReadWriteLock initLock() {
// default implementation does nothing, subclasses may override
return null;
}
public AbstractPage(AbstractPageCache<K, V> cache, K firstKey,
K nextFirstKey, int size, ArrayList<K> keys, ArrayList<V> values,
ArrayList<byte[]> rawValues, PageEncodeType encodeType) {
assert (keys != null);
assert (values != null);
assert (rawValues != null);
assert (keys.size() == size);
assert (values.size() == size);
assert (rawValues.size() == size);
this.parent = cache;
this.keyCoder = parent.keyCoder;
this.firstKey = firstKey;
this.nextFirstKey = nextFirstKey;
this.size = size;
this.keys = keys;
this.values = values;
this.rawValues = rawValues;
this.timeStamp = AbstractPageCache.generateTimestamp();
this.state = ExternalMode.DISK_MEMORY_IDENTICAL;
this.encodeType = encodeType;
this.lock = initLock();
}
public void readLock() {
lock.readLock().lock();
}
public void readUnlock() {
modeUnlock(LockMode.READMODE);
}
public void writeLock() {
lock.writeLock().lock();
}
public boolean writeTryLock() {
return lock.writeLock().tryLock();
}
public void writeUnlock() {
writeStamp++;
lock.writeLock().unlock();
}
public void modeLock(LockMode mode) {
switch (mode) {
case READMODE:
lock.readLock().lock();
break;
case WRITEMODE:
lock.writeLock().lock();
break;
}
}
public void modeUnlock(LockMode mode) {
switch (mode) {
case READMODE:
lock.readLock().unlock();
break;
case WRITEMODE:
writeUnlock();
break;
}
}
public void downgradeLock() {
assert (lock.isWriteLockedByCurrentThread());
readLock();
writeUnlock();
}
public boolean isWriteLockedByCurrentThread() {
return lock.isWriteLockedByCurrentThread();
}
public boolean isReadLockedByCurrentThread() {
return lock.getReadHoldCount() > 0;
}
/**
* Generate a blank page.
*/
@Override
public void initialize() {
keys = new ArrayList<>();
values = new ArrayList<>();
rawValues = new ArrayList<>();
size = 0;
timeStamp = AbstractPageCache.generateTimestamp();
}
protected final void updateHistogram(Histogram histogram, int value, boolean record) {
/**
* The JIT compiler should be smart enough to eliminate this code
* when {@link SkipListCache.trackEncodingByteUsage} is false.
*/
if (AbstractPageCache.trackEncodingByteUsage && record) {
histogram.update(value);
}
}
@Override
public byte[] encode(ByteBufOutputStream out) {
return encode(out, true);
}
public byte[] encode(ByteBufOutputStream out, boolean record) {
PageCacheMetrics<K, V> metrics = parent.metrics;
parent.numPagesEncoded.getAndIncrement();
PageEncodeType upgradeType = PageEncodeType.defaultType();
try {
OutputStream os = out;
out.write(gztype | FLAGS_HAS_ESTIMATES | (upgradeType.ordinal() << TYPE_BIT_OFFSET));
switch (gztype) {
case 0:
break;
case 1:
os = new DeflaterOutputStream(out, new Deflater(gzlevel));
break;
case 2:
os = new GZOut(out, gzbuf, gzlevel);
break;
case 3:
os = new LZFOutputStream(out);
break;
case 4:
os = new SnappyOutputStream(out);
break;
default:
throw new RuntimeException("invalid gztype: " + gztype);
}
DataOutputStream dos = new DataOutputStream(os);
byte[] firstKeyEncoded = keyCoder.keyEncode(firstKey);
byte[] nextFirstKeyEncoded = keyCoder.keyEncode(nextFirstKey);
updateHistogram(metrics.encodeNextFirstKeySize, nextFirstKeyEncoded.length, record);
Varint.writeUnsignedVarInt(size, dos);
Varint.writeUnsignedVarInt(firstKeyEncoded.length, dos);
dos.write(firstKeyEncoded);
Varint.writeUnsignedVarInt(nextFirstKeyEncoded.length, dos);
if (nextFirstKeyEncoded.length > 0) {
dos.write(nextFirstKeyEncoded);
}
for (int i = 0; i < size; i++) {
byte[] keyEncoded = keyCoder.keyEncode(keys.get(i), firstKey, upgradeType);
byte[] rawVal = rawValues.get(i);
if (rawVal == null || upgradeType != encodeType) {
fetchValue(i);
rawVal = keyCoder.valueEncode(values.get(i), upgradeType);
}
updateHistogram(metrics.encodeKeySize, keyEncoded.length, record);
updateHistogram(metrics.encodeValueSize, rawVal.length, record);
Varint.writeUnsignedVarInt(keyEncoded.length, dos);
dos.write(keyEncoded);
Varint.writeUnsignedVarInt(rawVal.length, dos);
dos.write(rawVal);
}
Varint.writeUnsignedVarInt((estimateTotal > 0 ? estimateTotal : 1), dos);
Varint.writeUnsignedVarInt((estimates > 0 ? estimates : 1), dos);
switch (gztype) {
case 1:
((DeflaterOutputStream) os).finish();
break;
case 2:
((GZOut) os).finish();
break;
}
os.flush(); // flush should be called by dos.close(), but better safe than sorry
dos.close();
ByteBuf buffer = out.buffer();
byte[] returnValue = new byte[out.writtenBytes()];
buffer.readBytes(returnValue);
buffer.clear();
updateHistogram(metrics.numberKeysPerPage, size, record);
updateHistogram(metrics.encodePageSize, returnValue.length, record);
return returnValue;
} catch (Exception ex) {
throw Throwables.propagate(ex);
}
}
public void decode(byte[] page) {
parent.numPagesDecoded.getAndIncrement();
ByteBuf buffer = Unpooled.wrappedBuffer(page);
try {
InputStream in = new ByteBufInputStream(buffer);
int flags = in.read() & 0xff;
int gztype = flags & 0x0f;
int pageType = flags >>> TYPE_BIT_OFFSET;
boolean hasEstimates = (flags & FLAGS_HAS_ESTIMATES) != 0;
switch (gztype) {
case 1:
in = new InflaterInputStream(in);
break;
case 2:
in = new GZIPInputStream(in);
break;
case 3:
in = new LZFInputStream(in);
break;
case 4:
in = new SnappyInputStream(in);
break;
}
DataInputStream dis = null;
switch (pageType) {
case 0:
encodeType = PageEncodeType.LEGACY;
break;
case 1:
encodeType = PageEncodeType.SPARSE;
dis = new DataInputStream(in);
break;
case 2:
encodeType = PageEncodeType.LONGIDS;
dis = new DataInputStream(in);
break;
}
decodeKeysAndValues(encodeType, in, dis, hasEstimates);
in.close();
} catch (Exception ex) {
throw Throwables.propagate(ex);
} finally {
buffer.release();
}
}
/**
* @param encodeType
* @param in
* @param dis
* @param hasEstimates
* @throws IOException
*/
private void decodeKeysAndValues(PageEncodeType encodeType, InputStream in,
DataInputStream dis, boolean hasEstimates) throws IOException {
K firstKey;
byte[] nextFirstKeyBytes;
int readEstimateTotal;
int readEstimates;
int entries = encodeType.readInt(in, dis);
firstKey = keyCoder.keyDecode(encodeType.readBytes(in, dis));
nextFirstKeyBytes = encodeType.nextFirstKey(in, dis);
nextFirstKey = keyCoder.keyDecode(nextFirstKeyBytes);
assert (this.firstKey.equals(firstKey));
int bytes = 0;
size = entries;
keys = new ArrayList<>(size);
values = new ArrayList<>(size);
rawValues = new ArrayList<>(size);
for (int i = 0; i < entries; i++) {
byte[] kb = encodeType.readBytes(in, dis);
byte[] vb = encodeType.readBytes(in, dis);
bytes += kb.length + vb.length;
keys.add(keyCoder.keyDecode(kb, firstKey, encodeType));
values.add(null);
rawValues.add(vb);
}
if (hasEstimates) {
readEstimateTotal = encodeType.readInt(in, dis);
readEstimates = encodeType.readInt(in, dis);
setAverage(readEstimateTotal, readEstimates);
} else {
/** use a pessimistic/conservative byte/entry estimate */
setAverage(bytes * estimateMissingFactor, entries);
}
updateMemoryEstimate();
}
private int estimatedMem() {
/**
* We want to account for the three pointers that point
* the key, the value, and the raw value. The 64-bit JVM
* should have 8-byte pointers but the HotSpot JVM uses
* compressed pointers so the true value is somewhere between
* 4 and 8. Use 4 bytes as an approximation.
* (3 pointers * 4 bytes) = 12 bytes.
*/
int weightedAvg = avgEntrySize + 12;
return (weightedAvg * size);
}
public void updateAverage(K key, V val, int count) {
long next = parent.estimateCounter.incrementAndGet();
if (avgEntrySize == 0 ||
(parent.getEstimateInterval() <= 0 && estimates > 0 && next % estimates == 0) ||
(parent.getEstimateInterval() > 0 && next % parent.getEstimateInterval() == 0)) {
switch (memEstimationStrategy) {
case 0:
/** use encoded byte size as crude proxy for mem size */
updateAverage((keyCoder.keyEncode(key).length +
keyCoder.valueEncode(val, encodeType).length), count);
break;
case 1:
/** walk objects and estimate. possibly slower and not demonstrably more accurate */
updateAverage((int) (MemoryCounter.estimateSize(key) + MemoryCounter.estimateSize(val)), count);
break;
default:
throw new IllegalStateException("invalid sample strategy: " + memEstimationStrategy);
}
}
}
private void updateAverage(int byteCount, int count) {
assert (byteCount > 0);
int byteTotal = byteCount * count;
if (estimates > Math.min(estimateRollMin, size * estimateRollFactor)) {
estimates = 1;
estimateTotal = avgEntrySize;
} else {
estimates += count;
estimateTotal += byteTotal;
avgEntrySize = estimateTotal / estimates;
}
}
private void setAverage(int total, int count) {
if ((count == 0) || (total == 1 && count == 1)) {
avgEntrySize = 0;
estimates = 0;
estimateTotal = 0;
} else {
avgEntrySize = total / count;
estimates = count;
estimateTotal = total;
}
}
public int getMemoryEstimate() {
return memoryEstimate;
}
public void updateMemoryEstimate() {
memoryEstimate = estimatedMem();
}
public boolean interval(Comparable<? super K> ckey) {
assert (ckey.compareTo(firstKey) >= 0);
if (nextFirstKey == null) {
return true;
} else {
if (ckey.compareTo(nextFirstKey) < 0) {
return true;
}
}
return false;
}
/**
* Given a integer position if {@link #values} is storing a null entry
* and {@link #rawValues} is storing the representation of a
* non-null entry then populate {@link #values} with the decoded
* result of {@link #rawValues}.
*/
public void fetchValue(int position) {
V value = values.get(position);
byte[] rawValue = rawValues.get(position);
if (value == null) {
values.set(position, keyCoder.valueDecode(rawValue, encodeType));
}
}
public boolean splitCondition() {
if (size == 1) {
return false;
} else if (parent.getMaxPageMem() > 0 && estimatedMem() > parent.getMaxPageMem()) {
return true;
} else if (parent.getMaxPageSize() > 0) {
if (size > parent.getMaxPageSize()) {
return true;
}
} else if (size > AbstractPageCache.defaultMaxPageEntries) {
return true;
}
return false;
}
@Override
public int size() {
return size;
}
@Override
public void setSize(int size) {
this.size = size;
}
@Override
public K getNextFirstKey() {
return nextFirstKey;
}
@Override
public void setNextFirstKey(K nextFirstKey) {
this.nextFirstKey = nextFirstKey;
}
@Override
public int getAvgEntrySize() {
return avgEntrySize;
}
@Override
public void setAvgEntrySize(int avgEntrySize) {
this.avgEntrySize = avgEntrySize;
}
@Override
public int getEstimates() {
return estimates;
}
@Override
public void setEstimates(int estimates) {
this.estimates = estimates;
}
@Override
public int getEstimateTotal() {
return estimateTotal;
}
@Override
public void setEstimateTotal(int estimateTotal) {
this.estimateTotal = estimateTotal;
}
@Override
public void setTimeStamp(long timeStamp) {
this.timeStamp = timeStamp;
}
@Override
public void setKeys(ArrayList<K> keys) {
this.keys = keys;
}
@Override
public void setValues(ArrayList<V> values) {
this.values = values;
}
@Override
public void setRawValues(ArrayList<byte[]> rawValues) {
this.rawValues = rawValues;
}
@Override
public long getTimeStamp() {
return timeStamp;
}
@Override
public boolean inTransientState() {
return state.isTransient();
}
@Override
public PageEncodeType getEncodeType() {
return encodeType;
}
@Override
public ExternalMode getState() {
return state;
}
@Override
public void setState(ExternalMode state) {
this.state = state;
}
@Override
public ArrayList<K> keys() {
return keys;
}
@Override
public ArrayList<V> values() {
return values;
}
@Override
public ArrayList<byte[]> rawValues() {
return rawValues;
}
@Override
public K getFirstKey() {
return firstKey;
}
}