Fix dboptions init (#2391)

For the block cache to be shared between column families, the options
instance must be shared between the various column families being
created. This also ensures that there is only one source of truth for
configuration options instead of having two different sets depending on
how the tables were initialized.

This PR also removes the re-opening mechanism which can double startup
time - every time the database is opened, the log is replayed - a large
log file will take a long time to open.

Finally, several options got correclty implemented as column family
options, including an one that puts a hash index in the SST files.
This commit is contained in:
Jacek Sieka 2024-06-19 10:55:57 +02:00 committed by GitHub
parent 83f6f89869
commit 41cf81f80b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 180 additions and 285 deletions

View File

@ -38,21 +38,23 @@ export
proc newAristoRdbDbRef(
basePath: string;
opts: DbOptions;
): Result[AristoDbRef, AristoError]=
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
guestCFs: openArray[ColFamilyDescriptor];
): Result[(AristoDbRef, seq[ColFamilyReadWrite]), AristoError]=
let
be = ? rocksDbBackend(basePath, opts)
(be, oCfs) = ? rocksDbBackend(basePath, dbOpts, cfOpts, guestCFs)
vTop = block:
let rc = be.getTuvFn()
if rc.isErr:
be.closeFn(eradicate = false)
return err(rc.error)
rc.value
ok AristoDbRef(
ok((AristoDbRef(
top: LayerRef(
delta: LayerDeltaRef(vTop: vTop),
final: LayerFinalRef()),
backend: be)
backend: be), oCfs))
# ------------------------------------------------------------------------------
# Public database constuctors, destructor
@ -62,36 +64,14 @@ proc init*(
T: type AristoDbRef;
B: type RdbBackendRef;
basePath: string;
opts: DbOptions
): Result[T, AristoError] =
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
guestCFs: openArray[ColFamilyDescriptor];
): Result[(T, seq[ColFamilyReadWrite]), AristoError] =
## Generic constructor, `basePath` argument is ignored for memory backend
## databases (which also unconditionally succeed initialising.)
##
basePath.newAristoRdbDbRef opts
proc reinit*(
db: AristoDbRef;
cfs: openArray[ColFamilyDescriptor];
): Result[seq[ColFamilyReadWrite],AristoError] =
## Re-initialise the `RocksDb` backend database with additional or changed
## column family settings. This can be used to make space for guest use of
## the backend used by `Aristo`. The function returns a list of column family
## descriptors in the same order as the `cfs` argument.
##
## The argument `cfs` list replaces and extends the CFs already on disk by
## its options except for the ones defined for use with `Aristo`.
##
## Even though tx layers and filters might not be affected by this function,
## it is prudent to have them clean and saved on the backend database before
## changing it. On error conditions, data might get lost.
##
case db.backend.kind:
of BackendRocksDB:
db.backend.rocksDbUpdateCfs cfs
of BackendRdbHosting:
err(RdbBeWrTriggerActiveAlready)
else:
return err(RdbBeTypeUnsupported)
basePath.newAristoRdbDbRef dbOpts, cfOpts, guestCFs
proc activateWrTrigger*(
db: AristoDbRef;

View File

@ -250,19 +250,22 @@ proc putBegHostingFn(db: RdbBackendRef): PutBegFn =
proc rocksDbBackend*(
path: string;
opts: DbOptions
): Result[BackendRef,AristoError] =
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
guestCFs: openArray[ColFamilyDescriptor];
): Result[(BackendRef, seq[ColFamilyReadWrite]),AristoError] =
let db = RdbBackendRef(
beKind: BackendRocksDB)
# Initialise RocksDB
block:
let rc = db.rdb.init(path, opts)
let oCfs = block:
let rc = db.rdb.init(path, dbOpts, cfOpts, guestCFs)
if rc.isErr:
when extraTraceMessages:
trace logTxt "constructor failed",
error=rc.error[0], info=rc.error[1]
return err(rc.error[0])
rc.value()
db.getVtxFn = getVtxFn db
db.getKeyFn = getKeyFn db
@ -277,19 +280,7 @@ proc rocksDbBackend*(
db.putEndFn = putEndFn db
db.closeFn = closeFn db
ok db
proc rocksDbUpdateCfs*(
be: BackendRef;
cfs: openArray[ColFamilyDescriptor];
): Result[seq[ColFamilyReadWrite],AristoError] =
## Reopen with extended column families given as argument.
let
db = RdbBackendRef(be)
rCfs = db.rdb.reinit(cfs).valueOr:
return err(error[0])
ok rCfs
ok((db, oCfs))
proc rocksDbSetEventTrigger*(

View File

@ -18,7 +18,6 @@ import
eth/common,
rocksdb,
stew/[endians2, keyed_queue],
../../../opts,
../../aristo_desc,
../init_common
@ -43,7 +42,6 @@ type
rdVtxLru*: KeyedQueue[VertexID,VertexRef] ## Read cache
basePath*: string ## Database directory
opts*: DbOptions ## Just a copy here for re-opening
trgWriteEvent*: RdbWriteEventCb ## Database piggiback call back handler
AristoCFs* = enum

View File

@ -25,115 +25,17 @@ import
# Private constructor
# ------------------------------------------------------------------------------
proc getInitOptions(
opts: DbOptions;
): tuple[cfOpts: ColFamilyOptionsRef, dbOpts: DbOptionsRef] =
# TODO the configuration options below have not been tuned but are rather
# based on gut feeling, guesses and by looking at other clients - it
# would make sense to test different settings and combinations once the
# data model itself has settled down as their optimal values will depend
# on the shape of the data - it'll also be different per column family..
let cfOpts = defaultColFamilyOptions()
if opts.writeBufferSize > 0:
cfOpts.setWriteBufferSize(opts.writeBufferSize)
# When data is written to rocksdb, it is first put in an in-memory table
# whose index is a skip list. Since the mem table holds the most recent data,
# all reads must go through this skiplist which results in slow lookups for
# already-written data.
# We enable a bloom filter on the mem table to avoid this lookup in the cases
# where the data is actually on disk already (ie wasn't updated recently).
# TODO there's also a hashskiplist that has both a hash index and a skip list
# which maybe could be used - uses more memory, requires a key prefix
# extractor
cfOpts.setMemtableWholeKeyFiltering(true)
cfOpts.setMemtablePrefixBloomSizeRatio(0.1)
# LZ4 seems to cut database size to 2/3 roughly, at the time of writing
# Using it for the bottom-most level means it applies to 90% of data but
# delays compression until data has settled a bit, which seems like a
# reasonable tradeoff.
# TODO evaluate zstd compression with a trained dictionary
# https://github.com/facebook/rocksdb/wiki/Compression
cfOpts.setBottommostCompression(Compression.lz4Compression)
let dbOpts = defaultDbOptions()
dbOpts.setMaxOpenFiles(opts.maxOpenFiles)
dbOpts.setMaxBytesForLevelBase(opts.writeBufferSize)
if opts.rowCacheSize > 0:
# Good for GET queries, which is what we do most of the time - if we start
# using range queries, we should probably give more attention to the block
# cache
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L1276
dbOpts.setRowCache(cacheCreateLRU(opts.rowCacheSize))
# We mostly look up data we know is there, so we don't need filters at the
# last level of the database - this option saves 90% bloom filter memory usage
# TODO verify this point
# https://github.com/EighteenZi/rocksdb_wiki/blob/master/Memory-usage-in-RocksDB.md#indexes-and-filter-blocks
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/advanced_options.h#L696
dbOpts.setOptimizeFiltersForHits(true)
# Without this option, WAL files might never get removed since a small column
# family (like the admin CF) with only tiny writes might keep it open - this
# negatively affects startup times since the WAL is replayed on every startup.
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L719
# Flushing the oldest
let writeBufferSize =
if opts.writeBufferSize > 0:
opts.writeBufferSize
else:
64 * 1024 * 1024 # TODO read from rocksdb?
dbOpts.setMaxTotalWalSize(2 * writeBufferSize)
let tableOpts = defaultTableOptions()
# This bloom filter helps avoid having to read multiple SST files when looking
# for a value.
# A 9.9-bits-per-key ribbon filter takes ~7 bits per key and has a 1% false
# positive rate which feels like a good enough starting point, though this
# should be better investigated.
# https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#ribbon-filter
# https://github.com/facebook/rocksdb/blob/d64eac28d32a025770cba641ea04e697f475cdd6/include/rocksdb/filter_policy.h#L208
tableOpts.setFilterPolicy(createRibbonHybrid(9.9))
if opts.blockCacheSize > 0:
tableOpts.setBlockCache(cacheCreateLRU(opts.rowCacheSize))
# Single-level indices might cause long stalls due to their large size -
# two-level indexing allows the first level to be kept in memory at all times
# while the second level is partitioned resulting in smoother loading
# https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters#how-to-use-it
tableOpts.setIndexType(IndexType.twoLevelIndexSearch)
tableOpts.setPinTopLevelIndexAndFilter(true)
tableOpts.setCacheIndexAndFilterBlocksWithHighPriority(true)
tableOpts.setPartitionFilters(true) # TODO do we need this?
# This option adds a small hash index to each data block, presumably speeding
# up Get queries (but again not range queries) - takes up space, apparently
# a good tradeoff for most workloads
# https://github.com/facebook/rocksdb/wiki/Data-Block-Hash-Index
tableOpts.setDataBlockIndexType(DataBlockIndexType.binarySearchAndHash)
tableOpts.setDataBlockHashRatio(0.75)
dbOpts.setBlockBasedTableFactory(tableOpts)
(cfOpts,dbOpts)
proc initImpl(
rdb: var RdbInst;
basePath: string;
opts: DbOptions;
dbOpts: DbOptionsRef,
cfOpts: ColFamilyOptionsRef;
guestCFs: openArray[ColFamilyDescriptor] = [];
): Result[void,(AristoError,string)] =
): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
## Database backend constructor
const initFailed = "RocksDB/init() failed"
rdb.basePath = basePath
rdb.opts = opts
let
dataDir = rdb.dataDir
@ -142,9 +44,6 @@ proc initImpl(
except OSError, IOError:
return err((RdbBeCantCreateDataDir, ""))
# Expand argument `opts` to rocksdb options
let (cfOpts, dbOpts) = opts.getInitOptions()
# Column familiy names to allocate when opening the database. This list
# might be extended below.
var useCFs = AristoCFs.mapIt($it).toHashSet
@ -182,7 +81,7 @@ proc initImpl(
rdb.keyCol = baseDb.withColFamily($KeyCF).valueOr:
raiseAssert initFailed & " cannot initialise KeyCF descriptor: " & error
ok()
ok(guestCFs.mapIt(baseDb.withColFamily(it.name).expect("loaded cf")))
# ------------------------------------------------------------------------------
# Public constructor
@ -191,43 +90,12 @@ proc initImpl(
proc init*(
rdb: var RdbInst;
basePath: string;
opts: DbOptions;
): Result[void,(AristoError,string)] =
## Temporarily define a guest CF list here.
rdb.initImpl(basePath, opts)
proc reinit*(
rdb: var RdbInst;
cfs: openArray[ColFamilyDescriptor];
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
guestCFs: openArray[ColFamilyDescriptor];
): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
## Re-open database with changed parameters. Even though tx layers and
## filters might not be affected it is prudent to have them clean and
## saved on the backend database before changing it.
##
## The function returns a list of column family descriptors in the same
## order as the `cfs` argument.
##
## The `cfs` list replaces and extends the CFs already on disk by its
## options except for the ones defined with `AristoCFs`.
##
const initFailed = "RocksDB/reinit() failed"
if not rdb.session.isNil:
return err((RdbBeWrSessionUnfinished,""))
if not rdb.baseDb.isClosed():
rdb.baseDb.close()
rdb.initImpl(rdb.basePath, rdb.opts, cfs).isOkOr:
return err(error)
# Assemble list of column family descriptors
var guestCols = newSeq[ColFamilyReadWrite](cfs.len)
for n,col in cfs:
guestCols[n] = rdb.baseDb.withColFamily(col.name).valueOr:
raiseAssert initFailed & " cannot initialise " &
col.name & " descriptor: " & error
ok guestCols
## Temporarily define a guest CF list here.
rdb.initImpl(basePath, dbOpts, cfOpts, guestCFs)
proc destroy*(rdb: var RdbInst; eradicate: bool) =

View File

@ -11,20 +11,22 @@
{.push raises: [].}
import
std/sequtils,
eth/common,
rocksdb,
results,
../../aristo,
../../aristo/aristo_persistent as use_ari,
../../aristo/aristo_init/rocks_db as use_ari,
../../aristo/[aristo_desc, aristo_walk/persistent, aristo_tx],
../../kvt,
../../kvt/kvt_persistent as use_kvt,
../../kvt/kvt_init/rocks_db/rdb_init,
../base,
./aristo_db,
./aristo_db/[common_desc, handlers_aristo],
../../opts
include
./aristo_db/aristo_replicate
include ./aristo_db/aristo_replicate
const
# Expectation messages
@ -34,16 +36,122 @@ const
# Annotation helper(s)
{.pragma: rlpRaise, gcsafe, raises: [AristoApiRlpError].}
proc toRocksDb*(
opts: DbOptions
): tuple[dbOpts: DbOptionsRef, cfOpts: ColFamilyOptionsRef] =
# TODO the configuration options below have not been tuned but are rather
# based on gut feeling, guesses and by looking at other clients - it
# would make sense to test different settings and combinations once the
# data model itself has settled down as their optimal values will depend
# on the shape of the data - it'll also be different per column family..
let tableOpts = defaultTableOptions()
# This bloom filter helps avoid having to read multiple SST files when looking
# for a value.
# A 9.9-bits-per-key ribbon filter takes ~7 bits per key and has a 1% false
# positive rate which feels like a good enough starting point, though this
# should be better investigated.
# https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#ribbon-filter
# https://github.com/facebook/rocksdb/blob/d64eac28d32a025770cba641ea04e697f475cdd6/include/rocksdb/filter_policy.h#L208
tableOpts.filterPolicy = createRibbonHybrid(9.9)
if opts.blockCacheSize > 0:
# Share a single block cache instance between all column families
tableOpts.blockCache = cacheCreateLRU(opts.blockCacheSize)
# Single-level indices might cause long stalls due to their large size -
# two-level indexing allows the first level to be kept in memory at all times
# while the second level is partitioned resulting in smoother loading
# https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters#how-to-use-it
tableOpts.indexType = IndexType.twoLevelIndexSearch
tableOpts.pinTopLevelIndexAndFilter = true
tableOpts.cacheIndexAndFilterBlocksWithHighPriority = true
tableOpts.partitionFilters = true # TODO do we need this?
# This option adds a small hash index to each data block, presumably speeding
# up Get queries (but again not range queries) - takes up space, apparently
# a good tradeoff for most workloads
# https://github.com/facebook/rocksdb/wiki/Data-Block-Hash-Index
tableOpts.dataBlockIndexType = DataBlockIndexType.binarySearchAndHash
tableOpts.dataBlockHashRatio = 0.75
let cfOpts = defaultColFamilyOptions()
cfOpts.blockBasedTableFactory = tableOpts
if opts.writeBufferSize > 0:
cfOpts.writeBufferSize = opts.writeBufferSize
# When data is written to rocksdb, it is first put in an in-memory table
# whose index is a skip list. Since the mem table holds the most recent data,
# all reads must go through this skiplist which results in slow lookups for
# already-written data.
# We enable a bloom filter on the mem table to avoid this lookup in the cases
# where the data is actually on disk already (ie wasn't updated recently).
# TODO there's also a hashskiplist that has both a hash index and a skip list
# which maybe could be used - uses more memory, requires a key prefix
# extractor
cfOpts.memtableWholeKeyFiltering = true
cfOpts.memtablePrefixBloomSizeRatio = 0.1
# LZ4 seems to cut database size to 2/3 roughly, at the time of writing
# Using it for the bottom-most level means it applies to 90% of data but
# delays compression until data has settled a bit, which seems like a
# reasonable tradeoff.
# TODO evaluate zstd compression with a trained dictionary
# https://github.com/facebook/rocksdb/wiki/Compression
cfOpts.bottommostCompression = Compression.lz4Compression
# We mostly look up data we know is there, so we don't need filters at the
# last level of the database - this option saves 90% bloom filter memory usage
# TODO verify this point
# https://github.com/EighteenZi/rocksdb_wiki/blob/master/Memory-usage-in-RocksDB.md#indexes-and-filter-blocks
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/advanced_options.h#L696
cfOpts.optimizeFiltersForHits = true
cfOpts.maxBytesForLevelBase = opts.writeBufferSize
let dbOpts = defaultDbOptions()
dbOpts.maxOpenFiles = opts.maxOpenFiles
if opts.rowCacheSize > 0:
# Good for GET queries, which is what we do most of the time - if we start
# using range queries, we should probably give more attention to the block
# cache
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L1276
dbOpts.rowCache = cacheCreateLRU(opts.rowCacheSize)
# Without this option, WAL files might never get removed since a small column
# family (like the admin CF) with only tiny writes might keep it open - this
# negatively affects startup times since the WAL is replayed on every startup.
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/options.h#L719
# Flushing the oldest
let writeBufferSize =
if opts.writeBufferSize > 0:
opts.writeBufferSize
else:
cfOpts.writeBufferSize
dbOpts.maxTotalWalSize = 2 * writeBufferSize
dbOpts.keepLogFileNum = 16 # No point keeping 1000 log files around...
(dbOpts, cfOpts)
# ------------------------------------------------------------------------------
# Public constructor
# ------------------------------------------------------------------------------
proc newAristoRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
## This funcion piggybacks the `KVT` on the `Aristo` backend.
let
adb = AristoDbRef.init(use_ari.RdbBackendRef, path, opts).valueOr:
# Sharing opts means we also share caches between column families!
(dbOpts, cfOpts) = opts.toRocksDb()
guestCFs = RdbInst.guestCFs(cfOpts)
(adb, oCfs) = AristoDbRef.init(use_ari.RdbBackendRef, path, dbOpts, cfOpts, guestCFs).valueOr:
raiseAssert aristoFail & ": " & $error
kdb = KvtDbRef.init(use_kvt.RdbBackendRef, adb, opts).valueOr:
kdb = KvtDbRef.init(use_kvt.RdbBackendRef, adb, oCfs).valueOr:
raiseAssert kvtFail & ": " & $error
AristoDbRocks.create(kdb, adb)
@ -51,9 +159,10 @@ proc newAristoDualRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
## This is only for debugging. The KVT is run on a completely separate
## database backend.
let
adb = AristoDbRef.init(use_ari.RdbBackendRef, path, opts).valueOr:
(dbOpts, cfOpts) = opts.toRocksDb()
(adb, _) = AristoDbRef.init(use_ari.RdbBackendRef, path, dbOpts, cfOpts, []).valueOr:
raiseAssert aristoFail & ": " & $error
kdb = KvtDbRef.init(use_kvt.RdbBackendRef, path, opts).valueOr:
kdb = KvtDbRef.init(use_kvt.RdbBackendRef, path, dbOpts, cfOpts).valueOr:
raiseAssert kvtFail & ": " & $error
AristoDbRocks.create(kdb, adb)
@ -61,10 +170,10 @@ proc newAristoDualRocksDbCoreDbRef*(path: string, opts: DbOptions): CoreDbRef =
# Public aristo iterators
# ------------------------------------------------------------------------------
iterator aristoReplicateRdb*(dsc: CoreDxMptRef): (Blob,Blob) {.rlpRaise.} =
iterator aristoReplicateRdb*(dsc: CoreDxMptRef): (Blob, Blob) {.rlpRaise.} =
## Instantiation for `VoidBackendRef`
for k,v in aristoReplicate[use_ari.RdbBackendRef](dsc):
yield (k,v)
for k, v in aristoReplicate[use_ari.RdbBackendRef](dsc):
yield (k, v)
# ------------------------------------------------------------------------------
# End

View File

@ -18,8 +18,6 @@ import
export kvstore
const maxOpenFiles = 512
type
RocksStoreRef* = ref object of RootObj
db: RocksDbReadWriteRef
@ -86,7 +84,6 @@ proc init*(
return err("RocksStoreRef: cannot create database directory")
let dbOpts = defaultDbOptions()
dbOpts.setMaxOpenFiles(maxOpenFiles)
let db = ? openRocksDb(dataDir, dbOpts,
columnFamilies = namespaces.mapIt(initColFamilyDescriptor(it)))

View File

@ -19,6 +19,7 @@
{.push raises: [].}
import
rocksdb,
results,
../../aristo,
../../opts,
@ -44,19 +45,20 @@ proc init*(
T: type KvtDbRef;
B: type RdbBackendRef;
basePath: string;
opts: DbOptions;
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
): Result[KvtDbRef,KvtError] =
## Generic constructor for `RocksDb` backend
##
ok KvtDbRef(
top: LayerRef.init(),
backend: ? rocksDbKvtBackend(basePath, opts).mapErr toErr0)
backend: ? rocksDbKvtBackend(basePath, dbOpts, cfOpts).mapErr toErr0)
proc init*(
T: type KvtDbRef;
B: type RdbBackendRef;
adb: AristoDbRef;
opts: DbOptions;
oCfs: openArray[ColFamilyReadWrite];
): Result[KvtDbRef,KvtError] =
## Constructor for `RocksDb` backend which piggybacks on the `Aristo`
## backend. The following changes will occur after successful instantiation:
@ -83,7 +85,7 @@ proc init*(
##
ok KvtDbRef(
top: LayerRef.init(),
backend: ? rocksDbKvtTriggeredBackend(adb, opts).mapErr toErr0)
backend: ? rocksDbKvtTriggeredBackend(adb, oCfs).mapErr toErr0)
# ------------------------------------------------------------------------------
# End

View File

@ -255,13 +255,14 @@ proc writeEvCb(db: RdbBackendRef): RdbWriteEventCb =
proc rocksDbKvtBackend*(
path: string;
opts: DbOptions;
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
): Result[BackendRef,(KvtError,string)] =
let db = RdbBackendRef(
beKind: BackendRocksDB)
# Initialise RocksDB
db.rdb.init(path, opts).isOkOr:
db.rdb.init(path, dbOpts, cfOpts).isOkOr:
when extraTraceMessages:
trace logTxt "constructor failed", error=error[0], info=error[1]
return err(error)
@ -280,13 +281,13 @@ proc rocksDbKvtBackend*(
proc rocksDbKvtTriggeredBackend*(
adb: AristoDbRef;
opts: DbOptions;
oCfs: openArray[ColFamilyReadWrite];
): Result[BackendRef,(KvtError,string)] =
let db = RdbBackendRef(
beKind: BackendRdbTriggered)
# Initialise RocksDB piggy-backed on `Aristo` backend.
db.rdb.init(adb, opts).isOkOr:
db.rdb.init(oCfs).isOkOr:
when extraTraceMessages:
trace logTxt "constructor failed", error=error[0], info=error[1]
return err(error)

View File

@ -17,62 +17,12 @@ import
std/[sequtils, os],
rocksdb,
results,
../../../aristo/aristo_init/persistent,
../../../opts,
../../kvt_desc,
../../kvt_desc/desc_error as kdb,
./rdb_desc
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
proc getCFInitOptions(opts: DbOptions): ColFamilyOptionsRef =
# TODO the configuration options below have not been tuned but are rather
# based on gut feeling, guesses and by looking at other clients - it
# would make sense to test different settings and combinations once the
# data model itself has settled down as their optimal values will depend
# on the shape of the data - it'll also be different per column family..
let cfOpts = defaultColFamilyOptions()
if opts.writeBufferSize > 0:
cfOpts.setWriteBufferSize(opts.writeBufferSize)
# When data is written to rocksdb, it is first put in an in-memory table
# whose index is a skip list. Since the mem table holds the most recent data,
# all reads must go through this skiplist which results in slow lookups for
# already-written data.
# We enable a bloom filter on the mem table to avoid this lookup in the cases
# where the data is actually on disk already (ie wasn't updated recently).
# TODO there's also a hashskiplist that has both a hash index and a skip list
# which maybe could be used - uses more memory, requires a key prefix
# extractor
cfOpts.setMemtableWholeKeyFiltering(true)
cfOpts.setMemtablePrefixBloomSizeRatio(0.1)
# LZ4 seems to cut database size to 2/3 roughly, at the time of writing
# Using it for the bottom-most level means it applies to 90% of data but
# delays compression until data has settled a bit, which seems like a
# reasonable tradeoff.
# TODO evaluate zstd compression with a trained dictionary
# https://github.com/facebook/rocksdb/wiki/Compression
cfOpts.setBottommostCompression(Compression.lz4Compression)
cfOpts
proc getDbInitOptions(opts: DbOptions): DbOptionsRef =
result = defaultDbOptions()
result.setMaxOpenFiles(opts.maxOpenFiles)
result.setMaxBytesForLevelBase(opts.writeBufferSize)
if opts.rowCacheSize > 0:
result.setRowCache(cacheCreateLRU(opts.rowCacheSize))
if opts.blockCacheSize > 0:
let tableOpts = defaultTableOptions()
tableOpts.setBlockCache(cacheCreateLRU(opts.rowCacheSize))
result.setBlockBasedTableFactory(tableOpts)
export rdb_desc, results
# ------------------------------------------------------------------------------
# Public constructor
@ -81,7 +31,8 @@ proc getDbInitOptions(opts: DbOptions): DbOptionsRef =
proc init*(
rdb: var RdbInst;
basePath: string;
opts: DbOptions;
dbOpts: DbOptionsRef;
cfOpts: ColFamilyOptionsRef;
): Result[void,(KvtError,string)] =
## Database backend constructor for stand-alone version
##
@ -96,9 +47,6 @@ proc init*(
except OSError, IOError:
return err((kdb.RdbBeCantCreateDataDir, ""))
# Expand argument `opts` to rocksdb options
let (cfOpts, dbOpts) = (opts.getCFInitOptions, opts.getDbInitOptions)
# Column familiy names to allocate when opening the database.
let cfs = KvtCFs.mapIt(($it).initColFamilyDescriptor cfOpts)
@ -113,20 +61,15 @@ proc init*(
$col & " descriptor: " & error
ok()
proc guestCFs*(T: type RdbInst, cfOpts: ColFamilyOptionsRef): seq =
KvtCFs.toSeq.mapIt(initColFamilyDescriptor($it, cfOpts))
proc init*(
rdb: var RdbInst;
adb: AristoDbRef;
opts: DbOptions;
oCfs: openArray[ColFamilyReadWrite];
): Result[void,(KvtError,string)] =
## Initalise column handlers piggy-backing on the `Aristo` backend.
##
let
cfOpts = opts.getCFInitOptions()
iCfs = KvtCFs.toSeq.mapIt(initColFamilyDescriptor($it, cfOpts))
oCfs = adb.reinit(iCfs).valueOr:
return err((RdbBeHostError,$error))
# Collect column family descriptors (this stores implicitely `baseDb`)
for n in KvtCFs:
assert oCfs[n.ord].name != "" # debugging only

View File

@ -17,6 +17,7 @@ import
results,
unittest2,
../../nimbus/db/opts,
../../nimbus/db/core_db/backend/aristo_rocksdb,
../../nimbus/db/aristo/[
aristo_check,
aristo_debug,
@ -104,10 +105,11 @@ iterator quadripartite(td: openArray[ProofTrieData]): LeafQuartet =
proc dbTriplet(w: LeafQuartet; rdbPath: string): Result[DbTriplet,AristoError] =
let db = block:
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
xCheckRc rc.error == 0:
result = err(rc.error)
rc.value
rc.value()[0]
else:
AristoDbRef.init MemBackendRef

View File

@ -17,6 +17,7 @@ import
unittest2,
stew/endians2,
../../nimbus/db/opts,
../../nimbus/db/core_db/backend/aristo_rocksdb,
../../nimbus/db/aristo/[
aristo_check,
aristo_debug,
@ -330,9 +331,10 @@ proc testTxMergeAndDeleteOneByOne*(
# Start with brand new persistent database.
db = block:
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
xCheckRc rc.error == 0
rc.value
rc.value()[0]
else:
AristoDbRef.init(MemBackendRef)
@ -441,9 +443,10 @@ proc testTxMergeAndDeleteSubTree*(
# Start with brand new persistent database.
db = block:
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
xCheckRc rc.error == 0
rc.value
rc.value()[0]
else:
AristoDbRef.init(MemBackendRef)
@ -545,9 +548,10 @@ proc testTxMergeProofAndKvpList*(
db = block:
# New DB with disabled filter slots management
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
let (dbOpts, cfOpts) = DbOptions.init().toRocksDb()
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, dbOpts, cfOpts, [])
xCheckRc rc.error == 0
rc.value
rc.value()[0]
else:
AristoDbRef.init(MemBackendRef)

2
vendor/nim-rocksdb vendored

@ -1 +1 @@
Subproject commit 293dc0745ea8386237546acb352a265a4bc874b5
Subproject commit f5dcb34ae83648bf5868618bc7fe916073b4455f