package org.yamcs.yarch;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yamcs.utils.DatabaseCorruptionException;
import org.yamcs.utils.StringConverter;
import org.yamcs.yarch.ColumnSerializerFactory.EnumColumnSerializer;
import org.yamcs.yarch.PartitioningSpec._type;
import org.yamcs.yarch.streamsql.ColumnNotFoundException;
import org.yamcs.yarch.streamsql.GenericStreamSqlException;
import org.yamcs.yarch.streamsql.NotSupportedException;
import org.yamcs.yarch.streamsql.StreamSqlException;
import org.yamcs.yarch.streamsql.StreamSqlException.ErrCode;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
/**
* A table definition consists of a (key,value) pair of tuple definitions.
* A tuple has to contain all the columns from the key while it can contain only a few of the columns from the value
* (basically it's a sparse table).
*
*
* The key is encoded as a bytestream of all the columns in order
* The value is encoded as a bytestream of all the columns prceded by their index.
*
*
* A table can also be partitioned in multiple files on disc, according to the partitioningSpec.
* @author nm
*
*/
public class TableDefinition {
static Logger log = LoggerFactory.getLogger(TableDefinition.class.getName());
/* table version history
/* 0: yamcs version < 3.0
* 1: - the histogram were stored in a separate rocksdb database.
* - pp table contained a column ppgroup instead of group
* 2: - the PROTOBUF(org.yamcs.protobuf.Pvalue$ParameterValue) is replaced by PARAMETER_VALUE in the pp table
*
* To switch to the latest version, use the bin/yamcs archive --upgrade command
*/
public static final int CURRENT_FORMAT_VERSION = 2;
private int formatVersion = CURRENT_FORMAT_VERSION;
//used for rocksdb - IN_KEY means storing the partition in front of the key
// - COLUMN_FAMILY : store data for each partition in a different column family
//this is used only if the table is partitioned by value
public enum PartitionStorage {IN_KEY, COLUMN_FAMILY}
private PartitionStorage partitionStorage = PartitionStorage.IN_KEY;
private final TupleDefinition keyDef;
//the definition of all the value columns that the table can have. A particular row can have less columns
//We have two references, one that is written to disk as part of the serialization and the other one that is actually used
//we do this in order to prevent that a column is used before the serialization has been flushed to disk
TupleDefinition serializedValueDef = new TupleDefinition();
private volatile TupleDefinition valueDef = serializedValueDef;
// keyDef+valueDef
private volatile TupleDefinition tupleDef;
private YarchDatabase ydb;
private boolean customDataDir=false; //if not null, dataDir represents a directory different than the YarchDatabase root.
//It will not be discarded after serialisation.
private String dataDir;
private boolean compressed;
private PartitioningSpec partitioningSpec = PartitioningSpec.noneSpec();
private String storageEngineName = YarchDatabase.RDB_ENGINE_NAME;
transient private String name; //we make this transient such that tables names can be changed by changing the filename
private List<String> histoColumns;
private List<ColumnSerializer<?>> keySerializers=new ArrayList<ColumnSerializer<?>>();
private List<ColumnSerializer<?>> valueSerializers=new ArrayList<ColumnSerializer<?>>();
//mapping from String to short for the columns of type enum
Map<String, BiMap<String,Short>> serializedEmumValues;
private volatile Map<String, BiMap<String,Short>> enumValues;
/**
* Used when creating an "empty"(i.e. no enum values) table via sql.
* @param name
* @param tdef
* @param primaryKey
* @throws StreamSqlException
*/
public TableDefinition(String name, TupleDefinition tdef, List<String> primaryKey) throws StreamSqlException {
keyDef = new TupleDefinition();
this.name=name;
for(String s:primaryKey) {
ColumnDefinition c=tdef.getColumn(s);
if(c==null) {
throw new ColumnNotFoundException(s);
}
keyDef.addColumn(c);
keySerializers.add(ColumnSerializerFactory.getColumnSerializer(this, c));
}
for(ColumnDefinition c:tdef.getColumnDefinitions()) {
if(keyDef.getColumn(c.getName())==null) {
valueDef.addColumn(c);
valueSerializers.add(ColumnSerializerFactory.getColumnSerializer(this, c));
}
}
computeTupleDef();
}
/**
* Used when creating the table from the def file on disk
* @param keyDef
* @param valueDef
* @param enumValues
*/
TableDefinition(TupleDefinition keyDef, TupleDefinition valueDef, Map<String, BiMap<String,Short>> enumValues) {
this.valueDef = valueDef;
this.serializedValueDef = valueDef;
this.keyDef = keyDef;
computeTupleDef();
this.enumValues = enumValues;
this.serializedEmumValues = enumValues;
for(ColumnDefinition cd:keyDef.getColumnDefinitions()) {
ColumnSerializer<?> cs = ColumnSerializerFactory.getColumnSerializer(this, cd);
keySerializers.add(cs);
if((cd.getType()==DataType.ENUM) && enumValues.containsKey(cd.getName())) {
((EnumColumnSerializer)cs).setEnumValues(enumValues.get(cd.getName()));
}
}
for(ColumnDefinition cd:valueDef.getColumnDefinitions()) {
ColumnSerializer<?> cs = ColumnSerializerFactory.getColumnSerializer(this, cd);
valueSerializers.add(cs);
if((cd.getType()==DataType.ENUM) && enumValues.containsKey(cd.getName())) {
((EnumColumnSerializer)cs).setEnumValues(enumValues.get(cd.getName()));
}
}
}
public void setDb(YarchDatabase ydb) {
this.ydb=ydb;
}
/**
* time based partitions can be on the first column of the key (which has to be of type timestamp)
* value based partitions can be on any other mandatory column
* @param pspec
*/
public void setPartitioningSpec(PartitioningSpec pspec) throws StreamSqlException {
if((pspec.type==PartitioningSpec._type.TIME) ||
(pspec.type==PartitioningSpec._type.TIME_AND_VALUE)){
ColumnDefinition cd=keyDef.getColumn(pspec.timeColumn);
if(cd==null) {
throw new GenericStreamSqlException("time partition specified on a column not part of the primary key: '"+pspec.timeColumn+"'");
}
if(cd.getType()!=DataType.TIMESTAMP) {
throw new GenericStreamSqlException("time partition specified on a column of type "+cd.getType());
}
if(!keyDef.getColumn(0).getName().equals(pspec.timeColumn)){
throw new GenericStreamSqlException("time partition supported only on the first column of the primary key");
}
}
if((pspec.type==PartitioningSpec._type.VALUE) ||
(pspec.type==PartitioningSpec._type.TIME_AND_VALUE)) {
ColumnDefinition c;
if(keyDef.hasColumn(pspec.valueColumn)) {
c = keyDef.getColumn(pspec.valueColumn);
} else if(valueDef.hasColumn(pspec.valueColumn)) {
c = valueDef.getColumn(pspec.valueColumn);
} else {
throw new ColumnNotFoundException(pspec.valueColumn);
}
pspec.setValueColumnType(c.getType());
}
this.partitioningSpec=pspec;
}
private void computeTupleDef() {
tupleDef=new TupleDefinition();
for(ColumnDefinition cd:keyDef.getColumnDefinitions()) {
tupleDef.addColumn(cd);
}
for(ColumnDefinition cd:valueDef.getColumnDefinitions()) {
tupleDef.addColumn(cd);
}
}
public TupleDefinition getKeyDefinition() {
return keyDef;
}
public TupleDefinition getValueDefinition() {
return valueDef;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name=name;
}
/**
* sets the customDataDir
* - if true, the dataDir will not be discarded after serialisation, so the next time the server is restarted it will stay to the set value.
* - if false, at restart the dataDir will be set to the YarchDatabase.dataDir
* @param customDataDir
*/
public void setCustomDataDir(boolean customDataDir) {
this.customDataDir = customDataDir;
}
public boolean hasCustomDataDir() {
return customDataDir;
}
public String getDataDir() {
return dataDir;
}
/**
* sets dataDir to this value
* @param dataDir
*/
public void setDataDir(String dataDir) {
this.dataDir = dataDir;
}
public TupleDefinition getTupleDefinition() {
return tupleDef;
}
/**
* Checks that the table definition is valid:
* - primary key not string, except for the last in the list (otherwise the binary sorting does not work properly)
* @throws StreamSqlException
*/
public void validate() throws StreamSqlException{
for(int i=0; i<keyDef.getColumnDefinitions().size()-1; i++) {
ColumnDefinition cd=keyDef.getColumnDefinitions().get(i);
if(cd.getType()==DataType.BINARY) {
throw new NotSupportedException("Primary key of type binary except the last in the list (otherwise the binary sorting does not work properly)");
}
}
}
/**
* Transforms the key part of the tuple into a byte array to be written to disk.
* The tuple must contain each column from the key and they are written in order (such that sorting is according to the definition of the primary key).
* @param t
* @return serialized key value
*/
public byte[] serializeKey(Tuple t) {
try ( ByteArrayOutputStream baos = new ByteArrayOutputStream()){
DataOutputStream dos = new DataOutputStream(baos);
for(int i=0; i<keyDef.size(); i++) {
ColumnSerializer cs = keySerializers.get(i);
String colName = keyDef.getColumn(i).getName();
Object v = t.getColumn(colName);
if(v==null){
throw new IllegalArgumentException("Tuple does not have mandatory column '"+colName+"'");
}
cs.serialize(dos, v);
}
return baos.toByteArray();
} catch (IOException e) {
throw new IllegalArgumentException("Cannot serialize key from tuple "+t+": ", e);
}
}
/**
* adds a column to the value part and serializes the table definition to disk
* @param cd
*/
private synchronized void addValueColumn(ColumnDefinition cd) {
serializedValueDef = valueDef.copy();
serializedValueDef.addColumn(cd);
ydb.serializeTableDefinition(this);
valueDef = serializedValueDef;
valueSerializers.add(ColumnSerializerFactory.getColumnSerializer(this, cd));
computeTupleDef();
}
/**
* Changes the formatVersion and serializes the table definition to disk
* @param formatVersion new format version
*/
public synchronized void changeFormatDefinition(int formatVersion) {
this.formatVersion = formatVersion;
ydb.serializeTableDefinition(this);
}
/**
* Renames column and serializes the table definition to disk.
*
* Should not be used when the table is in used (e.g. by a table writer or reader).
*
* @param oldName - old name of the column
* @param newName - new name of the column
*/
public synchronized void renameColumn(String oldName, String newName) {
if(keyDef.hasColumn(oldName)) {
keyDef.renameColumn(oldName, newName);
} else if(valueDef.hasColumn(oldName)) {
valueDef.renameColumn(oldName, newName);
} else {
throw new IllegalArgumentException("no column named '"+oldName+"'");
}
if(oldName.equals(partitioningSpec.timeColumn)) {
PartitioningSpec newSpec = new PartitioningSpec(partitioningSpec.type, newName, partitioningSpec.valueColumn);
newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema());
partitioningSpec = newSpec;
} else if (oldName.equals(partitioningSpec.valueColumn)) {
PartitioningSpec newSpec = new PartitioningSpec(partitioningSpec.type, partitioningSpec.timeColumn, newName);
newSpec.setTimePartitioningSchema(partitioningSpec.getTimePartitioningSchema());
partitioningSpec = newSpec;
}
int idx = histoColumns.indexOf(oldName);
if(idx!=-1) {
histoColumns.set(idx, newName);
}
if((enumValues!=null) && (enumValues.containsKey(oldName))) {
BiMap<String, Short> b = enumValues.remove(oldName);
serializedEmumValues.put(newName, b);
}
ydb.serializeTableDefinition(this);
enumValues = serializedEmumValues;
}
/**
* Adds a value to a enum and writes the table definition to disk
* we first modify the serializedEnumValues to make sure that nobody else sees the new enum id
* before the serialization is finished (i.e. flushed on disk)
*
*/
synchronized void addEnumValue(EnumColumnSerializer cs, String v) {
String columnName = cs.getColumnName();
BiMap<String, Short> b;
//first check if it's not already in the map
if((enumValues!=null) && ((b=enumValues.get(columnName))!=null) && b.containsKey(v)) {
return;
}
log.debug("Adding enum value {} for {}.{}", v, name, columnName);
serializedEmumValues = new HashMap<>();
if(enumValues!=null) {
serializedEmumValues.putAll(enumValues);
}
b = serializedEmumValues.remove(columnName);
BiMap<String, Short> b2= HashBiMap.create();
if(b!=null) {
b2.putAll(b);
}
b2.put(v, (short)b2.size());
serializedEmumValues.put(columnName, b2);
ydb.serializeTableDefinition(this);
enumValues = serializedEmumValues;
cs.setEnumValues(b2);
}
/**
* get the enum value corresponding to a column, creating it if it does not exist
* @return
*/
public Short addAndGetEnumValue(String columnName, String value) {
Short enumValue;
Short v1;
BiMap<String, Short> b;
if((enumValues==null) || ((b=enumValues.get(columnName))==null) || (v1=b.get(value))==null) {
EnumColumnSerializer cs = (EnumColumnSerializer)getColumnSerializer(columnName);
addEnumValue(cs, value);
enumValue = enumValues.get(columnName).get(value);
} else {
enumValue = v1;
}
return enumValue;
}
/**
* Transform the value part of the tuple into a byte array to be written on disk.
* Each column is preceded by a tag (the column index).
* If there are columns in the tuple which are not in the valueDef, they are added and the TableDefinition is
* serialized on disk.
*
* @param t
* @return the serialized version of the value part of the tuple
*/
public byte[] serializeValue(Tuple t) {
TupleDefinition tdef=t.getDefinition();
try ( ByteArrayOutputStream baos = new ByteArrayOutputStream()){
DataOutputStream dos = new DataOutputStream(baos);
for(int i=0; i<tdef.size(); i++ ) {
ColumnDefinition tupleCd = tdef.getColumn(i);
if(keyDef.hasColumn(tupleCd.getName())){
continue;
}
int cidx = valueDef.getColumnIndex(tupleCd.getName());
if(cidx==-1) { //call again this function after adding the column to the table
addValueColumn(tupleCd);
return serializeValue(t);
}
ColumnDefinition tableCd = valueDef.getColumn(cidx);
Object v = t.getColumn(i);
Object v1 = DataType.castAs(tupleCd.type, tableCd.type, v);
ColumnSerializer tcs = valueSerializers.get(cidx);
dos.writeInt(cidx);
tcs.serialize(dos, v1);
}
//add a final -1 eof marker
dos.writeInt(-1);
return baos.toByteArray();
} catch (IOException e) {
throw new IllegalArgumentException("Cannot serialize column tuple "+t+": ", e);
}
}
public Tuple deserialize(byte[] k, byte[] v) {
TupleDefinition tdef=keyDef.copy();
ArrayList<Object> cols = new ArrayList<>();
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(k));
try {
//deserialize the key
for(int i=0;i<keyDef.size(); i++) {
ColumnDefinition cd = keyDef.getColumn(i);
ColumnSerializer cs = keySerializers.get(i);
Object o = cs.deserialize(dis, cd);
cols.add(o);
}
//deserialize the value
dis = new DataInputStream(new ByteArrayInputStream(v));
while(true) {
int cidx = dis.readInt(); //column index
if(cidx==-1) {
break;
}
if(cidx>=valueDef.size()){
throw new IllegalArgumentException("Reference to index "+cidx+" found but the table definition does not have this column");
}
ColumnDefinition cd = valueDef.getColumn(cidx);
ColumnSerializer cs = valueSerializers.get(cidx);
Object o = cs.deserialize(dis, cd);
tdef.addColumn(cd);
cols.add(o);
}
} catch (IOException e) {
throw new DatabaseCorruptionException("cannot deserialize ("
+StringConverter.byteBufferToHexString(ByteBuffer.wrap(k))+ ","
+StringConverter.byteBufferToHexString(ByteBuffer.wrap(v))
+")", e);
}
return new Tuple(tdef, cols.toArray());
}
public boolean isCompressed() {
return compressed;
}
/**
* @param cname the column name
* @return true if cname is the first column of the key
*/
public boolean isIndexedByKey(String cname) {
return keyDef.getColumnIndex(cname)==0;
}
public ColumnDefinition getColumnDefinition(String cname) {
if(keyDef.hasColumn(cname)) {
return keyDef.getColumn(cname);
}
if(valueDef.hasColumn(cname)) {
return valueDef.getColumn(cname);
}
return null;
}
public boolean hasPartitioning() {
return partitioningSpec!=null;
}
public PartitioningSpec getPartitioningSpec() {
return partitioningSpec;
}
public void setCompressed(boolean compressed) {
this.compressed = compressed;
}
public void setHistogramColumns(List<String> histoColumns) throws StreamSqlException {
if(keyDef.getColumn(0).getType()!=DataType.TIMESTAMP)
throw new StreamSqlException(ErrCode.INVALID_HISTOGRAM_COLUMN, "Cannot only create histogram on tables with the first column of the primary key of type TIMESTAMP");
for(String hc:histoColumns) {
if(keyDef.getColumn(0).equals(hc))
throw new StreamSqlException(ErrCode.INVALID_HISTOGRAM_COLUMN, "Cannot create histogram on the first column of the primary key");
if(!tupleDef.hasColumn(hc))
throw new StreamSqlException(ErrCode.INVALID_HISTOGRAM_COLUMN, "Invalid column specified for histogram: "+hc);
}
this.histoColumns=histoColumns;
}
public boolean hasHistogram() {
return histoColumns!=null;
}
public BiMap<String, Short> getEnumValues(String columnName) {
if(enumValues==null){
return null;
}
return enumValues.get(columnName);
}
public List<String> getHistogramColumns() {
return histoColumns;
}
@Override
public String toString() {
StringBuilder sb=new StringBuilder();
sb.append(name).
append("(").
append(keyDef.toString()).
append(", ").
append(valueDef.toString()).
append(", primaryKey(").
append(keyDef).
append("))");
return sb.toString();
}
public ColumnSerializer getColumnSerializer(String columnName) {
if (keyDef.hasColumn(columnName)) {
int idx=keyDef.getColumnIndex(columnName);
return keySerializers.get(idx);
} else if (valueDef.hasColumn(columnName)) {
int idx=valueDef.getColumnIndex(columnName);
return valueSerializers.get(idx);
} else {
throw new IllegalArgumentException("Cannot find a serializer for invalid column "+columnName);
}
}
public String getStorageEngineName() {
return storageEngineName;
}
public void setStorageEngineName(String storageEngineName) {
this.storageEngineName = storageEngineName;
}
public PartitionStorage getPartitionStorage() {
return partitionStorage;
}
public void setPartitionStorage(PartitionStorage partitionStorage) {
this.partitionStorage = partitionStorage;
}
public boolean isPartitionedByValue() {
return partitioningSpec.type==_type.TIME_AND_VALUE|| partitioningSpec.type==_type.VALUE;
}
public int getFormatVersion() {
return formatVersion;
}
void setFormatVersion(int formatVersion) {
this.formatVersion = formatVersion;
}
}