package org.yamcs.yarch.rocksdb; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.ColumnFamilyOptions; import org.rocksdb.DBOptions; import org.rocksdb.Options; import org.rocksdb.ReadOptions; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.rocksdb.RocksIterator; import org.yamcs.utils.ByteArrayWrapper; import org.yamcs.utils.StringConverter; import org.yamcs.yarch.rocksdb.RdbConfig.TableConfig; /** * wrapper around RocksDB that keeps track of column families * * @author nm * */ public class YRDB { //keep mapping from raw byte array and the object that is used by some applications Map<ByteArrayWrapper, ColumnFamilyHandle> columnFamilies = new HashMap<>(); private final RocksDB db; private boolean isClosed = false; private final String path; private final ColumnFamilyOptions cfoptions; private final DBOptions dbOptions; /** * Create or open a new RocksDb. * * @param dir - if it exists, it has to be a directory * @param cfSerializer - column family serializer * @throws RocksDBException * @throws IOException */ YRDB(String dir) throws RocksDBException, IOException { File f = new File(dir); if(f.exists() && !f.isDirectory()) { throw new IOException("'"+dir+"' exists and it is not a directory"); } RdbConfig rdbConfig = RdbConfig.getInstance(); TableConfig tc = rdbConfig.getTableConfig(f.getName()); cfoptions = (tc==null)? rdbConfig.getDefaultColumnFamilyOptions():tc.getColumnFamilyOptions(); Options opt = (tc==null)? rdbConfig.getDefaultOptions():tc.getOptions(); dbOptions = (tc==null)? rdbConfig.getDefaultDBOptions():tc.getDBOptions(); this.path = dir; File current = new File(dir+File.separatorChar+"CURRENT"); if(current.exists()) { List<byte[]> cfl = RocksDB.listColumnFamilies(opt, dir); if(cfl!=null) { List<ColumnFamilyDescriptor> cfdList = new ArrayList<ColumnFamilyDescriptor>(cfl.size()); for(byte[] b: cfl) { cfdList.add(new ColumnFamilyDescriptor(b, cfoptions)); } List<ColumnFamilyHandle> cfhList = new ArrayList<ColumnFamilyHandle>(cfl.size()); db = RocksDB.open(dbOptions, dir, cfdList, cfhList); for(int i=0;i<cfl.size();i++) { byte[] b = cfl.get(i); columnFamilies.put(new ByteArrayWrapper(b), cfhList.get(i)); } } else { //no existing column families db = RocksDB.open(opt, dir); } } else { //new DB db = RocksDB.open(opt, dir); } } /** * Close the database. Shall only be done from the RDBFactory */ void close() { db.close(); isClosed = true; } /** * @return true if the database is open */ public boolean isOpen() { return !isClosed; } public List<RocksIterator> newIterators(List<ColumnFamilyHandle> cfhList, boolean tailing) throws RocksDBException { ReadOptions ro = new ReadOptions(); ro.setTailing(tailing); return db.newIterators(cfhList, ro); } public RocksIterator newIterator(ColumnFamilyHandle cfh) throws RocksDBException { return db.newIterator(cfh); } public synchronized ColumnFamilyHandle getColumnFamilyHandle(byte[] cfname) { ColumnFamilyHandle cfh = columnFamilies.get(new ByteArrayWrapper(cfname)); //in yamcs 0.29.3 and older we used to create a column family for null values (i.e. when not partitioning on a value) //starting with yamcs 0.29.4 we use the default column family for this // the old tables are still supported because at startup the columnFamilies map will be populated with the null key if((cfname==null) && (cfh==null)) { return db.getDefaultColumnFamily(); } return cfh; } public synchronized ColumnFamilyHandle getColumnFamilyHandle(String cfname) { ColumnFamilyHandle cfh = columnFamilies.get(new ByteArrayWrapper(cfname.getBytes(StandardCharsets.UTF_8))); //in yamcs 0.29.3 and older we used to create a column family for null values (i.e. when not partitioning on a value) //starting with yamcs 0.29.4 we use the default column family for this // the old tables are still supported because at startup the columnFamilies map will be populated with the null key if((cfname==null) && (cfh==null)) { return db.getDefaultColumnFamily(); } return cfh; } public byte[] get(ColumnFamilyHandle cfh, byte[] key) throws RocksDBException { return db.get(cfh, key); } public byte[] get(byte[] k) throws RocksDBException { return db.get(k); } public synchronized ColumnFamilyHandle createColumnFamily(byte[] cfname) throws RocksDBException { ColumnFamilyDescriptor cfd = new ColumnFamilyDescriptor(cfname, cfoptions); ColumnFamilyHandle cfh = db.createColumnFamily(cfd); columnFamilies.put(new ByteArrayWrapper(cfname), cfh); return cfh; } public synchronized ColumnFamilyHandle createColumnFamily(String name) throws RocksDBException { return createColumnFamily(name.getBytes(StandardCharsets.UTF_8)); } public void put(ColumnFamilyHandle cfh, byte[] k, byte[] v) throws RocksDBException { db.put(cfh, k, v); } public void put(byte[] k, byte[] v) throws RocksDBException { db.put(k, v); } public List<byte[]> getColumnFamilies() { List<byte[]> l = new ArrayList<>(); for(ByteArrayWrapper baw: columnFamilies.keySet()) { l.add(baw.getData()); } return l; } public Collection<String> getColumnFamiliesAsStrings() { List<String> l = new ArrayList<>(); for(ByteArrayWrapper baw: columnFamilies.keySet()) { l.add(new String(baw.getData(),StandardCharsets.UTF_8)); } return l; } public String getPath() { return path; } public String getProperites() throws RocksDBException { if(isClosed) { throw new IllegalStateException("Database is closed"); } final List<String> mlprops = Arrays.asList("rocksdb.stats", "rocksdb.sstables", "rocksdb.cfstats", "rocksdb.dbstats", "rocksdb.levelstats" , "rocksdb.aggregated-table-properties"); final List<String> slprops = Arrays.asList("rocksdb.num-immutable-mem-table", "rocksdb.num-immutable-mem-table-flushed" , "rocksdb.mem-table-flush-pending", "rocksdb.num-running-flushes" , "rocksdb.compaction-pending", "rocksdb.num-running-compactions", "rocksdb.background-errors", "rocksdb.cur-size-active-mem-table" , "rocksdb.cur-size-all-mem-tables", "rocksdb.size-all-mem-tables", "rocksdb.num-entries-active-mem-table", "rocksdb.num-entries-imm-mem-tables" , "rocksdb.num-deletes-active-mem-table", "rocksdb.num-deletes-imm-mem-tables", "rocksdb.estimate-num-keys", "rocksdb.estimate-table-readers-mem" , "rocksdb.is-file-deletions-enabled" , "rocksdb.num-snapshots","rocksdb.oldest-snapshot-time" , "rocksdb.num-live-versions" , "rocksdb.current-super-version-number", "rocksdb.estimate-live-data-size", "rocksdb.base-level"); StringBuilder sb = new StringBuilder(); for(Map.Entry<ByteArrayWrapper, ColumnFamilyHandle> e: columnFamilies.entrySet()) { Object o = cfNameToString(e.getKey().getData()); ColumnFamilyHandle chf = e.getValue(); sb.append("============== Column Family: "+o+"========\n"); for(String p:slprops) { sb.append(p).append(": "); sb.append(db.getProperty(chf, p)); sb.append("\n"); } for(String p:mlprops) { sb.append("---------- "+p+"----------------\n"); sb.append(db.getProperty(chf, p)); sb.append("\n"); } } return sb.toString(); } static public String cfNameToString(byte[] cfname) { for(byte b: cfname) { if(b==0) { return "HEX["+StringConverter.arrayToHexString(cfname)+"]"; } } return new String(cfname, StandardCharsets.UTF_8); } public RocksDB getDb() { return db; } public synchronized void dropColumnFamily(ColumnFamilyHandle cfh) throws RocksDBException { for(Map.Entry<ByteArrayWrapper, ColumnFamilyHandle> e: columnFamilies.entrySet()) { if(e.getValue()==cfh) { db.dropColumnFamily(cfh); columnFamilies.remove(e.getKey()); break; } } } /** * scans and returns a list of all prefixes of specified size * @param size * @return list of partitions * @throws IOException */ public List<byte[]> scanPartitions(int size) throws IOException { try (RocksIterator it = db.newIterator()) { List<byte[]> l = new ArrayList<byte[]>(); byte[] k = new byte[size]; while(true) { it.seek(k); if(!it.isValid()) { break; } byte[]found = it.key(); if(found.length<size) { throw new IOException("Found key smaller than the partition length: "+found.length+" vs "+size+". Database corruption?"); } l.add(Arrays.copyOf(found, size)); System.arraycopy(found, 0, k, 0, size); int i = size-1; while(i>=0 && k[i] == -1) { k[i] = 0; i--; } if(i<0) { break; } else { k[i] = (byte) (Byte.toUnsignedInt(k[i])+1); } } it.close(); return l; } } }