Fix dboptions init (#2391)

For the block cache to be shared between column families, the options instance must be shared between the various column families being created. This also ensures that there is only one source of truth for configuration options instead of having two different sets depending on how the tables were initialized. This PR also removes the re-opening mechanism which can double startup time - every time the database is opened, the log is replayed - a large log file will take a long time to open. Finally, several options got correclty implemented as column family options, including an one that puts a hash index in the SST files.
2024-06-19 10:55:57 +02:00 · 2024-06-19 10:55:57 +02:00 · 41cf81f80b
parent 83f6f89869
commit 41cf81f80b
12 changed files with 180 additions and 285 deletions
--- a/nimbus/db/aristo/aristo_init/persistent.nim
+++ b/nimbus/db/aristo/aristo_init/persistent.nim
@ -38,21 +38,23 @@ export
 proc newAristoRdbDbRef(
    basePath: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef;
-      ): Result[AristoDbRef, AristoError]=
+    cfOpts: ColFamilyOptionsRef;
    guestCFs: openArray[ColFamilyDescriptor];
      ): Result[(AristoDbRef, seq[ColFamilyReadWrite]), AristoError]=
  let
-    be = ? rocksDbBackend(basePath, opts)
+    (be, oCfs) = ? rocksDbBackend(basePath, dbOpts, cfOpts, guestCFs)
    vTop = block:
      let rc = be.getTuvFn()
      if rc.isErr:
        be.closeFn(eradicate = false)
        return err(rc.error)
      rc.value
-  ok AristoDbRef(
+  ok((AristoDbRef(
    top: LayerRef(
      delta: LayerDeltaRef(vTop: vTop),
      final: LayerFinalRef()),
-    backend: be)
+    backend: be), oCfs))
 # ------------------------------------------------------------------------------
 # Public database constuctors, destructor
@ -62,36 +64,14 @@ proc init*(
    T: type AristoDbRef;
    B: type RdbBackendRef;
    basePath: string;
-    opts: DbOptions
+    dbOpts: DbOptionsRef;
-      ): Result[T, AristoError] =
+    cfOpts: ColFamilyOptionsRef;
    guestCFs: openArray[ColFamilyDescriptor];
      ): Result[(T, seq[ColFamilyReadWrite]), AristoError] =
  ## Generic constructor, `basePath` argument is ignored for memory backend
  ## databases (which also unconditionally succeed initialising.)
  ##
-  basePath.newAristoRdbDbRef opts
+  basePath.newAristoRdbDbRef dbOpts, cfOpts, guestCFs
 proc reinit*(
    db: AristoDbRef;
    cfs: openArray[ColFamilyDescriptor];
      ): Result[seq[ColFamilyReadWrite],AristoError] =
  ## Re-initialise the `RocksDb` backend database with additional or changed
  ## column family settings. This can be used to make space for guest use of
  ## the backend used by `Aristo`. The function returns a list of column family
  ## descriptors in the same order as the `cfs` argument.
  ##
  ## The argument `cfs` list replaces and extends the CFs already on disk by
  ## its options except for the ones defined for use with `Aristo`.
  ##
  ## Even though tx layers and filters might not be affected by this function,
  ## it is prudent to have them clean and saved on the backend database before
  ## changing it. On error conditions, data might get lost.
  ##
  case db.backend.kind:
  of BackendRocksDB:
    db.backend.rocksDbUpdateCfs cfs
  of BackendRdbHosting:
    err(RdbBeWrTriggerActiveAlready)
  else:
    return err(RdbBeTypeUnsupported)
 proc activateWrTrigger*(
    db: AristoDbRef;
--- a/nimbus/db/aristo/aristo_init/rocks_db.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db.nim
@ -250,19 +250,22 @@ proc putBegHostingFn(db: RdbBackendRef): PutBegFn =
 proc rocksDbBackend*(
    path: string;
-    opts: DbOptions
+    dbOpts: DbOptionsRef;
-      ): Result[BackendRef,AristoError] =
+    cfOpts: ColFamilyOptionsRef;
    guestCFs: openArray[ColFamilyDescriptor];
      ): Result[(BackendRef, seq[ColFamilyReadWrite]),AristoError] =
  let db = RdbBackendRef(
    beKind: BackendRocksDB)
  # Initialise RocksDB
-  block:
+  let oCfs = block:
-    let rc = db.rdb.init(path, opts)
+    let rc = db.rdb.init(path, dbOpts, cfOpts, guestCFs)
    if rc.isErr:
      when extraTraceMessages:
        trace logTxt "constructor failed",
           error=rc.error[0], info=rc.error[1]
      return err(rc.error[0])
    rc.value()
  db.getVtxFn = getVtxFn db
  db.getKeyFn = getKeyFn db
@ -277,19 +280,7 @@ proc rocksDbBackend*(
  db.putEndFn = putEndFn db
  db.closeFn = closeFn db
-  ok db
+  ok((db, oCfs))
 proc rocksDbUpdateCfs*(
    be: BackendRef;
    cfs: openArray[ColFamilyDescriptor];
      ): Result[seq[ColFamilyReadWrite],AristoError] =
  ## Reopen with extended column families given as argument.
  let
    db = RdbBackendRef(be)
    rCfs = db.rdb.reinit(cfs).valueOr:
      return err(error[0])
  ok rCfs
 proc rocksDbSetEventTrigger*(
--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim
@ -18,7 +18,6 @@ import
  eth/common,
  rocksdb,
  stew/[endians2, keyed_queue],
  ../../../opts,
  ../../aristo_desc,
  ../init_common
@ -43,7 +42,6 @@ type
    rdVtxLru*: KeyedQueue[VertexID,VertexRef] ## Read cache
    basePath*: string                  ## Database directory
    opts*: DbOptions                   ## Just a copy here for re-opening
    trgWriteEvent*: RdbWriteEventCb    ## Database piggiback call back handler
  AristoCFs* = enum
--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_init.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_init.nim
@ -25,115 +25,17 @@ import
 # Private constructor
 # ------------------------------------------------------------------------------
 proc getInitOptions(
    opts: DbOptions;
      ): tuple[cfOpts: ColFamilyOptionsRef, dbOpts: DbOptionsRef] =
  # TODO the configuration options below have not been tuned but are rather
  #      based on gut feeling, guesses and by looking at other clients - it
  #      would make sense to test different settings and combinations once the
  #      data model itself has settled down as their optimal values will depend
  #      on the shape of the data - it'll also be different per column family..
  let cfOpts = defaultColFamilyOptions()
  if opts.writeBufferSize > 0:
    cfOpts.setWriteBufferSize(opts.writeBufferSize)
  # When data is written to rocksdb, it is first put in an in-memory table
  # whose index is a skip list. Since the mem table holds the most recent data,
  # all reads must go through this skiplist which results in slow lookups for
  # already-written data.
  # We enable a bloom filter on the mem table to avoid this lookup in the cases
  # where the data is actually on disk already (ie wasn't updated recently).
  # TODO there's also a hashskiplist that has both a hash index and a skip list
  #      which maybe could be used - uses more memory, requires a key prefix
  #      extractor
  cfOpts.setMemtableWholeKeyFiltering(true)
  cfOpts.setMemtablePrefixBloomSizeRatio(0.1)
  # LZ4 seems to cut database size to 2/3 roughly, at the time of writing
  # Using it for the bottom-most level means it applies to 90% of data but
  # delays compression until data has settled a bit, which seems like a
  # reasonable tradeoff.
  # TODO evaluate zstd compression with a trained dictionary
  # https://github.com/facebook/rocksdb/wiki/Compression
  cfOpts.setBottommostCompression(Compression.lz4Compression)
  let dbOpts = defaultDbOptions()
  dbOpts.setMaxOpenFiles(opts.maxOpenFiles)
  dbOpts.setMaxBytesForLevelBase(opts.writeBufferSize)
  if opts.rowCacheSize > 0:
    # Good for GET queries, which is what we do most of the time - if we start
    # using range queries, we should probably give more attention to the block
    # cache
    # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L1276
    dbOpts.setRowCache(cacheCreateLRU(opts.rowCacheSize))
  # We mostly look up data we know is there, so we don't need filters at the
  # last level of the database - this option saves 90% bloom filter memory usage
  # TODO verify this point
  # https://github.com/EighteenZi/rocksdb_wiki/blob/master/Memory-usage-in-RocksDB.md#indexes-and-filter-blocks
  # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/advanced_options.h#L696
  dbOpts.setOptimizeFiltersForHits(true)
  # Without this option, WAL files might never get removed since a small column
  # family (like the admin CF) with only tiny writes might keep it open - this
  # negatively affects startup times since the WAL is replayed on every startup.
  # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L719
  # Flushing the oldest
  let writeBufferSize =
    if opts.writeBufferSize > 0:
      opts.writeBufferSize
    else:
      64 * 1024 * 1024 # TODO read from rocksdb?
  dbOpts.setMaxTotalWalSize(2 * writeBufferSize)
  let tableOpts = defaultTableOptions()
  # This bloom filter helps avoid having to read multiple SST files when looking
  # for a value.
  # A 9.9-bits-per-key ribbon filter takes ~7 bits per key and has a 1% false
  # positive rate which feels like a good enough starting point, though this
  # should be better investigated.
  # https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#ribbon-filter
  # https://github.com/facebook/rocksdb/blob/d64eac28d32a025770cba641ea04e697f475cdd6/include/rocksdb/filter_policy.h#L208
  tableOpts.setFilterPolicy(createRibbonHybrid(9.9))
  if opts.blockCacheSize > 0:
    tableOpts.setBlockCache(cacheCreateLRU(opts.rowCacheSize))
  # Single-level indices might cause long stalls due to their large size -
  # two-level indexing allows the first level to be kept in memory at all times
  # while the second level is partitioned resulting in smoother loading
  # https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters#how-to-use-it
  tableOpts.setIndexType(IndexType.twoLevelIndexSearch)
  tableOpts.setPinTopLevelIndexAndFilter(true)
  tableOpts.setCacheIndexAndFilterBlocksWithHighPriority(true)
  tableOpts.setPartitionFilters(true) # TODO do we need this?
  # This option adds a small hash index to each data block, presumably speeding
  # up Get queries (but again not range queries) - takes up space, apparently
  # a good tradeoff for most workloads
  # https://github.com/facebook/rocksdb/wiki/Data-Block-Hash-Index
  tableOpts.setDataBlockIndexType(DataBlockIndexType.binarySearchAndHash)
  tableOpts.setDataBlockHashRatio(0.75)
  dbOpts.setBlockBasedTableFactory(tableOpts)
  (cfOpts,dbOpts)
 proc initImpl(
    rdb: var RdbInst;
    basePath: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef,
    cfOpts: ColFamilyOptionsRef;
    guestCFs: openArray[ColFamilyDescriptor] = [];
-      ): Result[void,(AristoError,string)] =
+      ): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
  ## Database backend constructor
  const initFailed = "RocksDB/init() failed"
  rdb.basePath = basePath
  rdb.opts = opts
  let
    dataDir = rdb.dataDir
@ -142,9 +44,6 @@ proc initImpl(
  except OSError, IOError:
    return err((RdbBeCantCreateDataDir, ""))
  # Expand argument `opts` to rocksdb options
  let (cfOpts, dbOpts) = opts.getInitOptions()
  # Column familiy names to allocate when opening the database. This list
  # might be extended below.
  var useCFs = AristoCFs.mapIt($it).toHashSet
@ -182,7 +81,7 @@ proc initImpl(
  rdb.keyCol = baseDb.withColFamily($KeyCF).valueOr:
    raiseAssert initFailed & " cannot initialise KeyCF descriptor: " & error
-  ok()
+  ok(guestCFs.mapIt(baseDb.withColFamily(it.name).expect("loaded cf")))
 # ------------------------------------------------------------------------------
 # Public constructor
@ -191,43 +90,12 @@ proc initImpl(
 proc init*(
    rdb: var RdbInst;
    basePath: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef;
-      ): Result[void,(AristoError,string)] =
+    cfOpts: ColFamilyOptionsRef;
-  ## Temporarily define a guest CF list here.
+    guestCFs: openArray[ColFamilyDescriptor];
  rdb.initImpl(basePath, opts)
 proc reinit*(
    rdb: var RdbInst;
    cfs: openArray[ColFamilyDescriptor];
      ): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
-  ## Re-open database with changed parameters. Even though tx layers and
+  ## Temporarily define a guest CF list here.
-  ## filters might not be affected it is prudent to have them clean and
+  rdb.initImpl(basePath, dbOpts, cfOpts, guestCFs)
  ## saved on the backend database before changing it.
  ##
  ## The function returns a list of column family descriptors in the same
  ## order as the `cfs` argument.
  ##
  ## The `cfs` list replaces and extends the CFs already on disk by its
  ## options except for the ones defined with `AristoCFs`.
  ##
  const initFailed = "RocksDB/reinit() failed"
  if not rdb.session.isNil:
    return err((RdbBeWrSessionUnfinished,""))
  if not rdb.baseDb.isClosed():
    rdb.baseDb.close()
  rdb.initImpl(rdb.basePath, rdb.opts, cfs).isOkOr:
    return err(error)
  # Assemble list of column family descriptors
  var guestCols = newSeq[ColFamilyReadWrite](cfs.len)
  for n,col in cfs:
    guestCols[n] = rdb.baseDb.withColFamily(col.name).valueOr:
      raiseAssert initFailed & " cannot initialise " &
        col.name & " descriptor: " & error
  ok guestCols
 proc destroy*(rdb: var RdbInst; eradicate: bool) =
--- a/nimbus/db/core_db/backend/aristo_rocksdb.nim
+++ b/nimbus/db/core_db/backend/aristo_rocksdb.nim
@ -11,20 +11,22 @@
 {.push raises: [].}
 import
  std/sequtils,
  eth/common,
  rocksdb,
  results,
  ../../aristo,
-  ../../aristo/aristo_persistent as use_ari,
+  ../../aristo/aristo_init/rocks_db as use_ari,
  ../../aristo/[aristo_desc, aristo_walk/persistent, aristo_tx],
  ../../kvt,
  ../../kvt/kvt_persistent as use_kvt,
  ../../kvt/kvt_init/rocks_db/rdb_init,
  ../base,
  ./aristo_db,
  ./aristo_db/[common_desc, handlers_aristo],
  ../../opts
-include
+include ./aristo_db/aristo_replicate
  ./aristo_db/aristo_replicate
 const
  # Expectation messages
@ -34,16 +36,122 @@ const
 # Annotation helper(s)
 {.pragma: rlpRaise, gcsafe, raises: [AristoApiRlpError].}
 proc toRocksDb*(
    opts: DbOptions
 ): tuple[dbOpts: DbOptionsRef, cfOpts: ColFamilyOptionsRef] =
  # TODO the configuration options below have not been tuned but are rather
  #      based on gut feeling, guesses and by looking at other clients - it
  #      would make sense to test different settings and combinations once the
  #      data model itself has settled down as their optimal values will depend
  #      on the shape of the data - it'll also be different per column family..
  let tableOpts = defaultTableOptions()
  # This bloom filter helps avoid having to read multiple SST files when looking
  # for a value.
  # A 9.9-bits-per-key ribbon filter takes ~7 bits per key and has a 1% false
  # positive rate which feels like a good enough starting point, though this
  # should be better investigated.
  # https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#ribbon-filter
  # https://github.com/facebook/rocksdb/blob/d64eac28d32a025770cba641ea04e697f475cdd6/include/rocksdb/filter_policy.h#L208
  tableOpts.filterPolicy = createRibbonHybrid(9.9)
  if opts.blockCacheSize > 0:
    # Share a single block cache instance between all column families
    tableOpts.blockCache = cacheCreateLRU(opts.blockCacheSize)
  # Single-level indices might cause long stalls due to their large size -
  # two-level indexing allows the first level to be kept in memory at all times
  # while the second level is partitioned resulting in smoother loading
  # https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters#how-to-use-it
  tableOpts.indexType = IndexType.twoLevelIndexSearch
  tableOpts.pinTopLevelIndexAndFilter = true
  tableOpts.cacheIndexAndFilterBlocksWithHighPriority = true
  tableOpts.partitionFilters = true # TODO do we need this?
  # This option adds a small hash index to each data block, presumably speeding
  # up Get queries (but again not range queries) - takes up space, apparently
  # a good tradeoff for most workloads
  # https://github.com/facebook/rocksdb/wiki/Data-Block-Hash-Index
  tableOpts.dataBlockIndexType = DataBlockIndexType.binarySearchAndHash
  tableOpts.dataBlockHashRatio = 0.75
  let cfOpts = defaultColFamilyOptions()
  cfOpts.blockBasedTableFactory = tableOpts
  if opts.writeBufferSize > 0:
    cfOpts.writeBufferSize = opts.writeBufferSize
  # When data is written to rocksdb, it is first put in an in-memory table
  # whose index is a skip list. Since the mem table holds the most recent data,
  # all reads must go through this skiplist which results in slow lookups for
  # already-written data.
  # We enable a bloom filter on the mem table to avoid this lookup in the cases
  # where the data is actually on disk already (ie wasn't updated recently).
  # TODO there's also a hashskiplist that has both a hash index and a skip list
  #      which maybe could be used - uses more memory, requires a key prefix
  #      extractor
  cfOpts.memtableWholeKeyFiltering = true
  cfOpts.memtablePrefixBloomSizeRatio = 0.1
  # LZ4 seems to cut database size to 2/3 roughly, at the time of writing
  # Using it for the bottom-most level means it applies to 90% of data but
  # delays compression until data has settled a bit, which seems like a
  # reasonable tradeoff.
  # TODO evaluate zstd compression with a trained dictionary
  # https://github.com/facebook/rocksdb/wiki/Compression
  cfOpts.bottommostCompression = Compression.lz4Compression
  # We mostly look up data we know is there, so we don't need filters at the
  # last level of the database - this option saves 90% bloom filter memory usage
  # TODO verify this point
  # https://github.com/EighteenZi/rocksdb_wiki/blob/master/Memory-usage-in-RocksDB.md#indexes-and-filter-blocks
  # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/advanced_options.h#L696
  cfOpts.optimizeFiltersForHits = true
  cfOpts.maxBytesForLevelBase = opts.writeBufferSize
  let dbOpts = defaultDbOptions()
  dbOpts.maxOpenFiles = opts.maxOpenFiles
  if opts.rowCacheSize > 0:
    # Good for GET queries, which is what we do most of the time - if we start
    # using range queries, we should probably give more attention to the block
    # cache
    # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L1276
    dbOpts.rowCache = cacheCreateLRU(opts.rowCacheSize)
  # Without this option, WAL files might never get removed since a small column
  # family (like the admin CF) with only tiny writes might keep it open - this
  # negatively affects startup times since the WAL is replayed on every startup.
  # https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L719
  # Flushing the oldest
  let writeBufferSize =
    if opts.writeBufferSize > 0:
      opts.writeBufferSize
    else:
      cfOpts.writeBufferSize
  dbOpts.maxTotalWalSize = 2 * writeBufferSize
  dbOpts.keepLogFileNum = 16 # No point keeping 1000 log files around...
  (dbOpts, cfOpts)
 # ------------------------------------------------------------------------------
 # Public constructor
 # ------------------------------------------------------------------------------
 proc newAristoRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
  ## This funcion piggybacks the `KVT` on the `Aristo` backend.
  let
-    adb = AristoDbRef.init(use_ari.RdbBackendRef, path, opts).valueOr:
+    # Sharing opts means we also share caches between column families!
    (dbOpts, cfOpts) = opts.toRocksDb()
    guestCFs = RdbInst.guestCFs(cfOpts)
    (adb, oCfs) = AristoDbRef.init(use_ari.RdbBackendRef, path, dbOpts, cfOpts, guestCFs).valueOr:
      raiseAssert aristoFail & ": " & $error
-    kdb = KvtDbRef.init(use_kvt.RdbBackendRef, adb, opts).valueOr:
+    kdb = KvtDbRef.init(use_kvt.RdbBackendRef, adb, oCfs).valueOr:
      raiseAssert kvtFail & ": " & $error
  AristoDbRocks.create(kdb, adb)
@ -51,9 +159,10 @@ proc newAristoDualRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
  ## This is only for debugging. The KVT is run on a completely separate
  ## database backend.
  let
-    adb = AristoDbRef.init(use_ari.RdbBackendRef, path, opts).valueOr:
+    (dbOpts, cfOpts) = opts.toRocksDb()
    (adb, _) = AristoDbRef.init(use_ari.RdbBackendRef, path, dbOpts, cfOpts, []).valueOr:
      raiseAssert aristoFail & ": " & $error
-    kdb = KvtDbRef.init(use_kvt.RdbBackendRef, path, opts).valueOr:
+    kdb = KvtDbRef.init(use_kvt.RdbBackendRef, path, dbOpts, cfOpts).valueOr:
      raiseAssert kvtFail & ": " & $error
  AristoDbRocks.create(kdb, adb)
@ -61,10 +170,10 @@ proc newAristoDualRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
 # Public aristo iterators
 # ------------------------------------------------------------------------------
-iterator aristoReplicateRdb*(dsc: CoreDxMptRef): (Blob,Blob) {.rlpRaise.} =
+iterator aristoReplicateRdb*(dsc: CoreDxMptRef): (Blob, Blob) {.rlpRaise.} =
  ## Instantiation for `VoidBackendRef`
-  for k,v in aristoReplicate[use_ari.RdbBackendRef](dsc):
+  for k, v in aristoReplicate[use_ari.RdbBackendRef](dsc):
-    yield (k,v)
+    yield (k, v)
 # ------------------------------------------------------------------------------
 # End
--- a/nimbus/db/kvstore_rocksdb.nim
+++ b/nimbus/db/kvstore_rocksdb.nim
@ -18,8 +18,6 @@ import
 export kvstore
 const maxOpenFiles = 512
 type
  RocksStoreRef* = ref object of RootObj
    db: RocksDbReadWriteRef
@ -86,7 +84,6 @@ proc init*(
    return err("RocksStoreRef: cannot create database directory")
  let dbOpts = defaultDbOptions()
  dbOpts.setMaxOpenFiles(maxOpenFiles)
  let db = ? openRocksDb(dataDir, dbOpts,
      columnFamilies = namespaces.mapIt(initColFamilyDescriptor(it)))
--- a/nimbus/db/kvt/kvt_init/persistent.nim
+++ b/nimbus/db/kvt/kvt_init/persistent.nim
@ -19,6 +19,7 @@
 {.push raises: [].}
 import
  rocksdb,
  results,
  ../../aristo,
  ../../opts,
@ -44,19 +45,20 @@ proc init*(
    T: type KvtDbRef;
    B: type RdbBackendRef;
    basePath: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef;
    cfOpts: ColFamilyOptionsRef;
      ): Result[KvtDbRef,KvtError] =
  ## Generic constructor for `RocksDb` backend
  ##
  ok KvtDbRef(
    top: LayerRef.init(),
-    backend: ? rocksDbKvtBackend(basePath, opts).mapErr toErr0)
+    backend: ? rocksDbKvtBackend(basePath, dbOpts, cfOpts).mapErr toErr0)
 proc init*(
    T: type KvtDbRef;
    B: type RdbBackendRef;
    adb: AristoDbRef;
-    opts: DbOptions;
+    oCfs: openArray[ColFamilyReadWrite];
      ): Result[KvtDbRef,KvtError] =
  ## Constructor for `RocksDb` backend which piggybacks on the `Aristo`
  ## backend. The following changes will occur after successful instantiation:
@ -83,7 +85,7 @@ proc init*(
  ##
  ok KvtDbRef(
    top: LayerRef.init(),
-    backend: ? rocksDbKvtTriggeredBackend(adb, opts).mapErr toErr0)
+    backend: ? rocksDbKvtTriggeredBackend(adb, oCfs).mapErr toErr0)
 # ------------------------------------------------------------------------------
 # End
--- a/nimbus/db/kvt/kvt_init/rocks_db.nim
+++ b/nimbus/db/kvt/kvt_init/rocks_db.nim
@ -255,13 +255,14 @@ proc writeEvCb(db: RdbBackendRef): RdbWriteEventCb =
 proc rocksDbKvtBackend*(
    path: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef;
    cfOpts: ColFamilyOptionsRef;
      ): Result[BackendRef,(KvtError,string)] =
  let db = RdbBackendRef(
    beKind: BackendRocksDB)
  # Initialise RocksDB
-  db.rdb.init(path, opts).isOkOr:
+  db.rdb.init(path, dbOpts, cfOpts).isOkOr:
    when extraTraceMessages:
      trace logTxt "constructor failed", error=error[0], info=error[1]
    return err(error)
@ -280,13 +281,13 @@ proc rocksDbKvtBackend*(
 proc rocksDbKvtTriggeredBackend*(
    adb: AristoDbRef;
-    opts: DbOptions;
+    oCfs: openArray[ColFamilyReadWrite];
      ): Result[BackendRef,(KvtError,string)] =
  let db = RdbBackendRef(
    beKind: BackendRdbTriggered)
  # Initialise RocksDB piggy-backed on `Aristo` backend.
-  db.rdb.init(adb, opts).isOkOr:
+  db.rdb.init(oCfs).isOkOr:
    when extraTraceMessages:
      trace logTxt "constructor failed", error=error[0], info=error[1]
    return err(error)
--- a/nimbus/db/kvt/kvt_init/rocks_db/rdb_init.nim
+++ b/nimbus/db/kvt/kvt_init/rocks_db/rdb_init.nim
@ -17,62 +17,12 @@ import
  std/[sequtils, os],
  rocksdb,
  results,
  ../../../aristo/aristo_init/persistent,
  ../../../opts,
  ../../kvt_desc,
  ../../kvt_desc/desc_error as kdb,
  ./rdb_desc
-# ------------------------------------------------------------------------------
+export rdb_desc, results
 # Private helpers
 # ------------------------------------------------------------------------------
 proc getCFInitOptions(opts: DbOptions): ColFamilyOptionsRef =
 # TODO the configuration options below have not been tuned but are rather
  #      based on gut feeling, guesses and by looking at other clients - it
  #      would make sense to test different settings and combinations once the
  #      data model itself has settled down as their optimal values will depend
  #      on the shape of the data - it'll also be different per column family..
  let cfOpts = defaultColFamilyOptions()
  if opts.writeBufferSize > 0:
    cfOpts.setWriteBufferSize(opts.writeBufferSize)
  # When data is written to rocksdb, it is first put in an in-memory table
  # whose index is a skip list. Since the mem table holds the most recent data,
  # all reads must go through this skiplist which results in slow lookups for
  # already-written data.
  # We enable a bloom filter on the mem table to avoid this lookup in the cases
  # where the data is actually on disk already (ie wasn't updated recently).
  # TODO there's also a hashskiplist that has both a hash index and a skip list
  #      which maybe could be used - uses more memory, requires a key prefix
  #      extractor
  cfOpts.setMemtableWholeKeyFiltering(true)
  cfOpts.setMemtablePrefixBloomSizeRatio(0.1)
  # LZ4 seems to cut database size to 2/3 roughly, at the time of writing
  # Using it for the bottom-most level means it applies to 90% of data but
  # delays compression until data has settled a bit, which seems like a
  # reasonable tradeoff.
  # TODO evaluate zstd compression with a trained dictionary
  # https://github.com/facebook/rocksdb/wiki/Compression
  cfOpts.setBottommostCompression(Compression.lz4Compression)
  cfOpts
 proc getDbInitOptions(opts: DbOptions): DbOptionsRef =
  result = defaultDbOptions()
  result.setMaxOpenFiles(opts.maxOpenFiles)
  result.setMaxBytesForLevelBase(opts.writeBufferSize)
  if opts.rowCacheSize > 0:
    result.setRowCache(cacheCreateLRU(opts.rowCacheSize))
  if opts.blockCacheSize > 0:
    let tableOpts = defaultTableOptions()
    tableOpts.setBlockCache(cacheCreateLRU(opts.rowCacheSize))
    result.setBlockBasedTableFactory(tableOpts)
 # ------------------------------------------------------------------------------
 # Public constructor
@ -81,7 +31,8 @@ proc getDbInitOptions(opts: DbOptions): DbOptionsRef =
 proc init*(
    rdb: var RdbInst;
    basePath: string;
-    opts: DbOptions;
+    dbOpts: DbOptionsRef;
    cfOpts: ColFamilyOptionsRef;
      ): Result[void,(KvtError,string)] =
  ## Database backend constructor for stand-alone version
  ##
@ -96,9 +47,6 @@ proc init*(
  except OSError, IOError:
    return err((kdb.RdbBeCantCreateDataDir, ""))
  # Expand argument `opts` to rocksdb options
  let (cfOpts, dbOpts) = (opts.getCFInitOptions, opts.getDbInitOptions)
  # Column familiy names to allocate when opening the database.
  let cfs = KvtCFs.mapIt(($it).initColFamilyDescriptor cfOpts)
@ -113,20 +61,15 @@ proc init*(
        $col & " descriptor: " & error
  ok()
 proc guestCFs*(T: type RdbInst, cfOpts: ColFamilyOptionsRef): seq =
   KvtCFs.toSeq.mapIt(initColFamilyDescriptor($it, cfOpts))
 proc init*(
    rdb: var RdbInst;
-    adb: AristoDbRef;
+    oCfs: openArray[ColFamilyReadWrite];
    opts: DbOptions;
      ): Result[void,(KvtError,string)] =
  ## Initalise column handlers piggy-backing on the `Aristo` backend.
  ##
  let
    cfOpts = opts.getCFInitOptions()
    iCfs = KvtCFs.toSeq.mapIt(initColFamilyDescriptor($it, cfOpts))
    oCfs = adb.reinit(iCfs).valueOr:
      return err((RdbBeHostError,$error))
  # Collect column family descriptors (this stores implicitely `baseDb`)
  for n in KvtCFs:
    assert oCfs[n.ord].name != "" # debugging only
--- a/tests/test_aristo/test_filter.nim
+++ b/tests/test_aristo/test_filter.nim
@ -17,6 +17,7 @@ import
  results,
  unittest2,
  ../../nimbus/db/opts,
  ../../nimbus/db/core_db/backend/aristo_rocksdb,
  ../../nimbus/db/aristo/[
    aristo_check,
    aristo_debug,
@ -104,10 +105,11 @@ iterator quadripartite(td: openArray[ProofTrieData]): LeafQuartet =
 proc dbTriplet(w: LeafQuartet; rdbPath: string): Result[DbTriplet,AristoError] =
  let db = block:
    if 0 < rdbPath.len:
-      let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
+      let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
      let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
      xCheckRc rc.error == 0:
        result = err(rc.error)
-      rc.value
+      rc.value()[0]
    else:
      AristoDbRef.init MemBackendRef
--- a/tests/test_aristo/test_tx.nim
+++ b/tests/test_aristo/test_tx.nim
@ -17,6 +17,7 @@ import
  unittest2,
  stew/endians2,
  ../../nimbus/db/opts,
  ../../nimbus/db/core_db/backend/aristo_rocksdb,
  ../../nimbus/db/aristo/[
    aristo_check,
    aristo_debug,
@ -330,9 +331,10 @@ proc testTxMergeAndDeleteOneByOne*(
    # Start with brand new persistent database.
    db = block:
      if 0 < rdbPath.len:
-        let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
+        let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
        let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
        xCheckRc rc.error == 0
-        rc.value
+        rc.value()[0]
      else:
        AristoDbRef.init(MemBackendRef)
@ -441,9 +443,10 @@ proc testTxMergeAndDeleteSubTree*(
    # Start with brand new persistent database.
    db = block:
      if 0 < rdbPath.len:
-        let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
+        let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
        let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
        xCheckRc rc.error == 0
-        rc.value
+        rc.value()[0]
      else:
        AristoDbRef.init(MemBackendRef)
@ -545,9 +548,10 @@ proc testTxMergeProofAndKvpList*(
      db = block:
        # New DB with disabled filter slots management
        if 0 < rdbPath.len:
-          let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
+          let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
          let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
          xCheckRc rc.error == 0
-          rc.value
+          rc.value()[0]
        else:
          AristoDbRef.init(MemBackendRef)
--- a/vendor/nim-rocksdb
+++ b/vendor/nim-rocksdb
@ -1 +1 @@
-Subproject commit 293dc0745ea8386237546acb352a265a4bc874b5
+Subproject commit f5dcb34ae83648bf5868618bc7fe916073b4455f
		`@ -1 +1 @@`
			`Subproject commit 293dc0745ea8386237546acb352a265a4bc874b5`				`Subproject commit f5dcb34ae83648bf5868618bc7fe916073b4455f`