/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.contrib.streaming.state;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.StateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.core.memory.ByteArrayInputStreamWithPos;
import org.apache.flink.core.memory.ByteArrayOutputStreamWithPos;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.query.netty.message.KvStateRequestSerializer;
import org.apache.flink.runtime.state.internal.InternalKvState;
import org.apache.flink.runtime.state.KeyGroupRangeAssignment;
import org.apache.flink.util.Preconditions;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.RocksDBException;
import org.rocksdb.WriteOptions;
import java.io.IOException;
/**
* Base class for {@link State} implementations that store state in a RocksDB database.
*
* <p>State is not stored in this class but in the {@link org.rocksdb.RocksDB} instance that
* the {@link RocksDBStateBackend} manages and checkpoints.
*
* @param <K> The type of the key.
* @param <N> The type of the namespace.
* @param <S> The type of {@link State}.
* @param <SD> The type of {@link StateDescriptor}.
*/
public abstract class AbstractRocksDBState<K, N, S extends State, SD extends StateDescriptor<S, V>, V>
implements InternalKvState<N>, State {
/** Serializer for the namespace */
final TypeSerializer<N> namespaceSerializer;
/** The current namespace, which the next value methods will refer to */
private N currentNamespace;
/** Backend that holds the actual RocksDB instance where we store state */
protected RocksDBKeyedStateBackend<K> backend;
/** The column family of this particular instance of state */
protected ColumnFamilyHandle columnFamily;
/** State descriptor from which to create this state instance */
protected final SD stateDesc;
/**
* We disable writes to the write-ahead-log here.
*/
private final WriteOptions writeOptions;
protected final ByteArrayOutputStreamWithPos keySerializationStream;
protected final DataOutputView keySerializationDataOutputView;
private final boolean ambiguousKeyPossible;
/**
* Creates a new RocksDB backed state.
* @param namespaceSerializer The serializer for the namespace.
*/
protected AbstractRocksDBState(
ColumnFamilyHandle columnFamily,
TypeSerializer<N> namespaceSerializer,
SD stateDesc,
RocksDBKeyedStateBackend<K> backend) {
this.namespaceSerializer = namespaceSerializer;
this.backend = backend;
this.columnFamily = columnFamily;
writeOptions = new WriteOptions();
writeOptions.setDisableWAL(true);
this.stateDesc = Preconditions.checkNotNull(stateDesc, "State Descriptor");
this.keySerializationStream = new ByteArrayOutputStreamWithPos(128);
this.keySerializationDataOutputView = new DataOutputViewStreamWrapper(keySerializationStream);
this.ambiguousKeyPossible = (backend.getKeySerializer().getLength() < 0)
&& (namespaceSerializer.getLength() < 0);
}
// ------------------------------------------------------------------------
@Override
public void clear() {
try {
writeCurrentKeyWithGroupAndNamespace();
byte[] key = keySerializationStream.toByteArray();
backend.db.remove(columnFamily, writeOptions, key);
} catch (IOException|RocksDBException e) {
throw new RuntimeException("Error while removing entry from RocksDB", e);
}
}
@Override
public void setCurrentNamespace(N namespace) {
this.currentNamespace = Preconditions.checkNotNull(namespace, "Namespace");
}
@Override
@SuppressWarnings("unchecked")
public byte[] getSerializedValue(byte[] serializedKeyAndNamespace) throws Exception {
Preconditions.checkNotNull(serializedKeyAndNamespace, "Serialized key and namespace");
//TODO make KvStateRequestSerializer key-group aware to save this round trip and key-group computation
Tuple2<K, N> des = KvStateRequestSerializer.<K, N>deserializeKeyAndNamespace(
serializedKeyAndNamespace,
backend.getKeySerializer(),
namespaceSerializer);
int keyGroup = KeyGroupRangeAssignment.assignToKeyGroup(des.f0, backend.getNumberOfKeyGroups());
// we cannot reuse the keySerializationStream member since this method
// is called concurrently to the other ones and it may thus contain garbage
ByteArrayOutputStreamWithPos tmpKeySerializationStream = new ByteArrayOutputStreamWithPos(128);
DataOutputViewStreamWrapper tmpKeySerializationDateDataOutputView = new DataOutputViewStreamWrapper(tmpKeySerializationStream);
writeKeyWithGroupAndNamespace(keyGroup, des.f0, des.f1,
tmpKeySerializationStream, tmpKeySerializationDateDataOutputView);
return backend.db.get(columnFamily, tmpKeySerializationStream.toByteArray());
}
protected void writeCurrentKeyWithGroupAndNamespace() throws IOException {
writeKeyWithGroupAndNamespace(
backend.getCurrentKeyGroupIndex(),
backend.getCurrentKey(),
currentNamespace,
keySerializationStream,
keySerializationDataOutputView);
}
protected void writeKeyWithGroupAndNamespace(
int keyGroup, K key, N namespace,
ByteArrayOutputStreamWithPos keySerializationStream,
DataOutputView keySerializationDataOutputView) throws IOException {
Preconditions.checkNotNull(key, "No key set. This method should not be called outside of a keyed context.");
keySerializationStream.reset();
writeKeyGroup(keyGroup, keySerializationDataOutputView);
writeKey(key, keySerializationStream, keySerializationDataOutputView);
writeNameSpace(namespace, keySerializationStream, keySerializationDataOutputView);
}
private void writeKeyGroup(
int keyGroup,
DataOutputView keySerializationDateDataOutputView) throws IOException {
for (int i = backend.getKeyGroupPrefixBytes(); --i >= 0;) {
keySerializationDateDataOutputView.writeByte(keyGroup >>> (i << 3));
}
}
private void writeKey(
K key,
ByteArrayOutputStreamWithPos keySerializationStream,
DataOutputView keySerializationDataOutputView) throws IOException {
//write key
int beforeWrite = keySerializationStream.getPosition();
backend.getKeySerializer().serialize(key, keySerializationDataOutputView);
if (ambiguousKeyPossible) {
//write size of key
writeLengthFrom(beforeWrite, keySerializationStream,
keySerializationDataOutputView);
}
}
private void writeNameSpace(
N namespace,
ByteArrayOutputStreamWithPos keySerializationStream,
DataOutputView keySerializationDataOutputView) throws IOException {
int beforeWrite = keySerializationStream.getPosition();
namespaceSerializer.serialize(namespace, keySerializationDataOutputView);
if (ambiguousKeyPossible) {
//write length of namespace
writeLengthFrom(beforeWrite, keySerializationStream,
keySerializationDataOutputView);
}
}
private static void writeLengthFrom(
int fromPosition,
ByteArrayOutputStreamWithPos keySerializationStream,
DataOutputView keySerializationDateDataOutputView) throws IOException {
int length = keySerializationStream.getPosition() - fromPosition;
writeVariableIntBytes(length, keySerializationDateDataOutputView);
}
private static void writeVariableIntBytes(
int value,
DataOutputView keySerializationDateDataOutputView)
throws IOException {
do {
keySerializationDateDataOutputView.writeByte(value);
value >>>= 8;
} while (value != 0);
}
protected Tuple3<Integer, K, N> readKeyWithGroupAndNamespace(ByteArrayInputStreamWithPos inputStream, DataInputView inputView) throws IOException {
int keyGroup = readKeyGroup(inputView);
K key = readKey(inputStream, inputView);
N namespace = readNamespace(inputStream, inputView);
return new Tuple3<>(keyGroup, key, namespace);
}
private int readKeyGroup(DataInputView inputView) throws IOException {
int keyGroup = 0;
for (int i = 0; i < backend.getKeyGroupPrefixBytes(); ++i) {
keyGroup <<= 8;
keyGroup |= (inputView.readByte() & 0xFF);
}
return keyGroup;
}
private K readKey(ByteArrayInputStreamWithPos inputStream, DataInputView inputView) throws IOException {
int beforeRead = inputStream.getPosition();
K key = backend.getKeySerializer().deserialize(inputView);
if (ambiguousKeyPossible) {
int length = inputStream.getPosition() - beforeRead;
readVariableIntBytes(inputView, length);
}
return key;
}
private N readNamespace(ByteArrayInputStreamWithPos inputStream, DataInputView inputView) throws IOException {
int beforeRead = inputStream.getPosition();
N namespace = namespaceSerializer.deserialize(inputView);
if (ambiguousKeyPossible) {
int length = inputStream.getPosition() - beforeRead;
readVariableIntBytes(inputView, length);
}
return namespace;
}
private void readVariableIntBytes(DataInputView inputView, int value) throws IOException {
do {
inputView.readByte();
value >>>= 8;
} while (value != 0);
}
}