Add force pruning for the storage-capacity option (#1901)

This commit is contained in:
Kim De Mey 2023-11-23 11:44:57 +01:00 committed by GitHub
parent a7bb52e5b5
commit c46706ac75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 17 deletions

View File

@ -1,4 +1,4 @@
# Nimbus # Fluffy
# Copyright (c) 2021-2023 Status Research & Development GmbH # Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of # Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
@ -235,7 +235,7 @@ type
hidden hidden
desc: "Force the pruning of the database. This should be used when the " & desc: "Force the pruning of the database. This should be used when the " &
"database is decreased in size, e.g. when a lower static radius " & "database is decreased in size, e.g. when a lower static radius " &
"is set. Only supported for statically set radius." "or a lower storage capacity is set."
defaultValue: false defaultValue: false
name: "force-prune" .}: bool name: "force-prune" .}: bool

View File

@ -63,6 +63,7 @@ type
contentSizeStmt: SqliteStmt[NoParams, int64] contentSizeStmt: SqliteStmt[NoParams, int64]
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo] getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void] deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void]
largestDistanceStmt: SqliteStmt[array[32, byte], array[32, byte]]
PutResultType* = enum PutResultType* = enum
ContentStored, DbPruned ContentStored, DbPruned
@ -129,6 +130,10 @@ proc new*(
"DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0", "DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0",
(array[32, byte], array[32, byte]), void).get() (array[32, byte], array[32, byte]), void).get()
let largestDistanceStmt = db.prepareStmt(
"SELECT max(xorDistance(?, key)) FROM kvstore",
array[32, byte], array[32, byte]).get()
ContentDB( ContentDB(
kv: kvStore, kv: kvStore,
backend: db, backend: db,
@ -140,7 +145,8 @@ proc new*(
contentSizeStmt: contentSizeStmt, contentSizeStmt: contentSizeStmt,
contentCountStmt: contentCountStmt, contentCountStmt: contentCountStmt,
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt, getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt,
deleteOutOfRadiusStmt: deleteOutOfRadiusStmt deleteOutOfRadiusStmt: deleteOutOfRadiusStmt,
largestDistanceStmt: largestDistanceStmt
) )
template disposeSafe(s: untyped): untyped = template disposeSafe(s: untyped): untyped =
@ -156,6 +162,7 @@ proc close*(db: ContentDB) =
db.contentSizeStmt.disposeSafe() db.contentSizeStmt.disposeSafe()
db.getAllOrderedByDistanceStmt.disposeSafe() db.getAllOrderedByDistanceStmt.disposeSafe()
db.deleteOutOfRadiusStmt.disposeSafe() db.deleteOutOfRadiusStmt.disposeSafe()
db.largestDistanceStmt.disposeSafe()
discard db.kv.close() discard db.kv.close()
## Private KvStoreRef Calls ## Private KvStoreRef Calls
@ -267,6 +274,27 @@ proc contentCount*(db: ContentDB): int64 =
## Pruning related calls ## Pruning related calls
proc getLargestDistance*(db: ContentDB, localId: UInt256): UInt256 =
var distanceBytes: array[32, byte]
discard (db.largestDistanceStmt.exec(localId.toBytesBE(),
proc(res: array[32, byte]) =
distanceBytes = res
)).expectDb()
return UInt256.fromBytesBE(distanceBytes)
func estimateNewRadius(
currentSize: uint64, storageCapacity: uint64,
currentRadius: UInt256): UInt256 =
let sizeRatio = currentSize div storageCapacity
if sizeRatio > 0:
currentRadius div sizeRatio.stuint(256)
else:
currentRadius
func estimateNewRadius*(db: ContentDB, currentRadius: UInt256): UInt256 =
estimateNewRadius(uint64(db.usedSize()), db.storageCapacity, currentRadius)
proc deleteContentFraction*( proc deleteContentFraction*(
db: ContentDB, db: ContentDB,
target: UInt256, target: UInt256,
@ -330,7 +358,6 @@ proc forcePrune*(db: ContentDB, localId: UInt256, radius: UInt256) =
if db.manualCheckpoint: if db.manualCheckpoint:
notice "Truncating WAL file" notice "Truncating WAL file"
db.backend.checkpoint(SqStoreCheckpointKind.truncate) db.backend.checkpoint(SqStoreCheckpointKind.truncate)
db.close()
notice "Finished database pruning" notice "Finished database pruning"
proc put*( proc put*(
@ -344,18 +371,28 @@ proc put*(
# size will reach the size specified in db.storageCapacity and will stay # size will reach the size specified in db.storageCapacity and will stay
# around that size throughout the node's lifetime, as after content deletion # around that size throughout the node's lifetime, as after content deletion
# due to pruning, the free pages will be re-used. # due to pruning, the free pages will be re-used.
# TODO: #
# 1. Devise vacuum strategy - after few pruning cycles database can become # Note:
# fragmented which may impact performance, so at some point in time `VACUUM` # The `forcePrune` call must be used when database storage capacity is lowered
# will need to be run to defragment the db. # either when setting a lower `storageCapacity` or when lowering a configured
# 2. Deal with the edge case where a user configures max db size lower than # static radius.
# current db.size(). With such config the database would try to prune itself # When not using the `forcePrune` functionality, pruning to the required
# with each addition. # capacity will not be very effictive and free pages will not be returned.
let dbSize = db.usedSize() let dbSize = db.usedSize()
if dbSize < int64(db.storageCapacity): if dbSize < int64(db.storageCapacity):
return PutResult(kind: ContentStored) return PutResult(kind: ContentStored)
else: else:
# Note:
# An approach of a deleting a full fraction is chosen here, in an attempt
# to not continiously require radius updates, which could have a negative
# impact on the network. However this should be further investigated, as
# doing a large fraction deletion could cause a temporary node performance
# degradation. The `contentDeletionFraction` might need further tuning or
# one could opt for a much more granular approach using sql statement
# in the trend of:
# "SELECT key FROM kvstore ORDER BY xorDistance(?, key) DESC LIMIT 1"
# Potential adjusting the LIMIT for how many items require deletion.
let ( let (
distanceOfFurthestElement, distanceOfFurthestElement,
deletedBytes, deletedBytes,

View File

@ -124,12 +124,29 @@ proc run(config: PortalConf) {.raises: [CatchableError].} =
d.open() d.open()
# Force pruning # Force pruning
if config.forcePrune and config.radiusConfig.kind == Static: if config.forcePrune:
let db = ContentDB.new(config.dataDir / "db" / "contentdb_" & let db = ContentDB.new(config.dataDir / "db" / "contentdb_" &
d.localNode.id.toBytesBE().toOpenArray(0, 8).toHex(), d.localNode.id.toBytesBE().toOpenArray(0, 8).toHex(),
storageCapacity = config.storageCapacityMB * 1_000_000, storageCapacity = config.storageCapacityMB * 1_000_000,
manualCheckpoint = true) manualCheckpoint = true)
db.forcePrune(d.localNode.id, UInt256.fromLogRadius(config.radiusConfig.logRadius))
let radius =
if config.radiusConfig.kind == Static:
UInt256.fromLogRadius(config.radiusConfig.logRadius)
else:
let oldRadiusApproximation = db.getLargestDistance(d.localNode.id)
db.estimateNewRadius(oldRadiusApproximation)
# Note: In the case of dynamical radius this is all an approximation that
# heavily relies on uniformly distributed content and thus will always
# have an error margin, either down or up of the requested capacity.
# TODO I: Perhaps we want to add an offset to counter the latter.
# TODO II: Perhaps for dynamical radius, we want to also apply the vacuum
# without the forcePrune flag and purely by checking the amount of free
# space versus the pruning fraction.
# TODO III: Adding Radius metadata to the db could be yet another way to
# decide whether or not to force prune, instead of this flag.
db.forcePrune(d.localNode.id, radius)
db.close() db.close()
# Store the database at contentdb prefixed with the first 8 chars of node id. # Store the database at contentdb prefixed with the first 8 chars of node id.

View File

@ -1,4 +1,4 @@
# Nimbus # Fluffy
# Copyright (c) 2021-2023 Status Research & Development GmbH # Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of # Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
@ -96,8 +96,8 @@ suite "Content Database":
# Need to rework either this test, or the pruning mechanism, or probably # Need to rework either this test, or the pruning mechanism, or probably
# both. # both.
let let
maxDbSize = uint32(100_000) storageCapacity = 100_000'u64
db = ContentDB.new("", maxDbSize, inMemory = true) db = ContentDB.new("", storageCapacity, inMemory = true)
furthestElement = u256(40) furthestElement = u256(40)
secondFurthest = u256(30) secondFurthest = u256(30)
@ -129,10 +129,49 @@ suite "Content Database":
check: check:
pr10.deletedElements == 2 pr10.deletedElements == 2
uint32(db.usedSize()) < maxDbSize uint64(db.usedSize()) < storageCapacity
# With the current settings the 2 furthest elements will be deleted, # With the current settings the 2 furthest elements will be deleted,
# i.e key 30 and 40. The furthest non deleted one will have key 20. # i.e key 30 and 40. The furthest non deleted one will have key 20.
pr10.distanceOfFurthestElement == thirdFurthest pr10.distanceOfFurthestElement == thirdFurthest
db.get(furthestElement).isNone() db.get(furthestElement).isNone()
db.get(secondFurthest).isNone() db.get(secondFurthest).isNone()
db.get(thirdFurthest).isSome() db.get(thirdFurthest).isSome()
test "ContentDB force pruning":
const
# This start capacity doesn't really matter here as we are directly
# putting data in the db without additional size checks.
startCapacity = 14_159_872'u64
endCapacity = 500_000'u64
amountOfItems = 10_000
let
rng = newRng()
db = ContentDB.new("", startCapacity, inMemory = true)
localId = UInt256.fromHex(
"30994892f3e4889d99deb5340050510d1842778acc7a7948adffa475fed51d6e")
content = genByteSeq(1000)
# Note: We could randomly generate the above localId and the content keys
# that are added to the database below. However we opt for a more
# deterministic test case as the randomness makes it difficult to chose a
# reasonable value to check if pruning was succesful.
let
increment = UInt256.high div amountOfItems
remainder = UInt256.high mod amountOfItems
var id = u256(0)
while id < UInt256.high - remainder:
db.put(id, content)
id = id + increment
db.storageCapacity = endCapacity
let
oldRadiusApproximation = db.getLargestDistance(localId)
newRadius = db.estimateNewRadius(oldRadiusApproximation)
db.forcePrune(localId, newRadius)
let diff = abs(db.size() - int64(db.storageCapacity))
# Quite a big marging (20%) is added as it is all an approximation.
check diff < int64(float(db.storageCapacity) * 0.20)