Add force pruning for the storage-capacity option (#1901)

This commit is contained in:
Kim De Mey 2023-11-23 11:44:57 +01:00 committed by GitHub
parent a7bb52e5b5
commit c46706ac75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 17 deletions

View File

@ -1,4 +1,4 @@
# Nimbus
# Fluffy
# Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
@ -235,7 +235,7 @@ type
hidden
desc: "Force the pruning of the database. This should be used when the " &
"database is decreased in size, e.g. when a lower static radius " &
"is set. Only supported for statically set radius."
"or a lower storage capacity is set."
defaultValue: false
name: "force-prune" .}: bool

View File

@ -63,6 +63,7 @@ type
contentSizeStmt: SqliteStmt[NoParams, int64]
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void]
largestDistanceStmt: SqliteStmt[array[32, byte], array[32, byte]]
PutResultType* = enum
ContentStored, DbPruned
@ -129,6 +130,10 @@ proc new*(
"DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0",
(array[32, byte], array[32, byte]), void).get()
let largestDistanceStmt = db.prepareStmt(
"SELECT max(xorDistance(?, key)) FROM kvstore",
array[32, byte], array[32, byte]).get()
ContentDB(
kv: kvStore,
backend: db,
@ -140,7 +145,8 @@ proc new*(
contentSizeStmt: contentSizeStmt,
contentCountStmt: contentCountStmt,
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt,
deleteOutOfRadiusStmt: deleteOutOfRadiusStmt
deleteOutOfRadiusStmt: deleteOutOfRadiusStmt,
largestDistanceStmt: largestDistanceStmt
)
template disposeSafe(s: untyped): untyped =
@ -156,6 +162,7 @@ proc close*(db: ContentDB) =
db.contentSizeStmt.disposeSafe()
db.getAllOrderedByDistanceStmt.disposeSafe()
db.deleteOutOfRadiusStmt.disposeSafe()
db.largestDistanceStmt.disposeSafe()
discard db.kv.close()
## Private KvStoreRef Calls
@ -267,6 +274,27 @@ proc contentCount*(db: ContentDB): int64 =
## Pruning related calls
proc getLargestDistance*(db: ContentDB, localId: UInt256): UInt256 =
var distanceBytes: array[32, byte]
discard (db.largestDistanceStmt.exec(localId.toBytesBE(),
proc(res: array[32, byte]) =
distanceBytes = res
)).expectDb()
return UInt256.fromBytesBE(distanceBytes)
func estimateNewRadius(
currentSize: uint64, storageCapacity: uint64,
currentRadius: UInt256): UInt256 =
let sizeRatio = currentSize div storageCapacity
if sizeRatio > 0:
currentRadius div sizeRatio.stuint(256)
else:
currentRadius
func estimateNewRadius*(db: ContentDB, currentRadius: UInt256): UInt256 =
estimateNewRadius(uint64(db.usedSize()), db.storageCapacity, currentRadius)
proc deleteContentFraction*(
db: ContentDB,
target: UInt256,
@ -330,7 +358,6 @@ proc forcePrune*(db: ContentDB, localId: UInt256, radius: UInt256) =
if db.manualCheckpoint:
notice "Truncating WAL file"
db.backend.checkpoint(SqStoreCheckpointKind.truncate)
db.close()
notice "Finished database pruning"
proc put*(
@ -344,18 +371,28 @@ proc put*(
# size will reach the size specified in db.storageCapacity and will stay
# around that size throughout the node's lifetime, as after content deletion
# due to pruning, the free pages will be re-used.
# TODO:
# 1. Devise vacuum strategy - after few pruning cycles database can become
# fragmented which may impact performance, so at some point in time `VACUUM`
# will need to be run to defragment the db.
# 2. Deal with the edge case where a user configures max db size lower than
# current db.size(). With such config the database would try to prune itself
# with each addition.
#
# Note:
# The `forcePrune` call must be used when database storage capacity is lowered
# either when setting a lower `storageCapacity` or when lowering a configured
# static radius.
# When not using the `forcePrune` functionality, pruning to the required
# capacity will not be very effictive and free pages will not be returned.
let dbSize = db.usedSize()
if dbSize < int64(db.storageCapacity):
return PutResult(kind: ContentStored)
else:
# Note:
# An approach of a deleting a full fraction is chosen here, in an attempt
# to not continiously require radius updates, which could have a negative
# impact on the network. However this should be further investigated, as
# doing a large fraction deletion could cause a temporary node performance
# degradation. The `contentDeletionFraction` might need further tuning or
# one could opt for a much more granular approach using sql statement
# in the trend of:
# "SELECT key FROM kvstore ORDER BY xorDistance(?, key) DESC LIMIT 1"
# Potential adjusting the LIMIT for how many items require deletion.
let (
distanceOfFurthestElement,
deletedBytes,

View File

@ -124,12 +124,29 @@ proc run(config: PortalConf) {.raises: [CatchableError].} =
d.open()
# Force pruning
if config.forcePrune and config.radiusConfig.kind == Static:
if config.forcePrune:
let db = ContentDB.new(config.dataDir / "db" / "contentdb_" &
d.localNode.id.toBytesBE().toOpenArray(0, 8).toHex(),
storageCapacity = config.storageCapacityMB * 1_000_000,
manualCheckpoint = true)
db.forcePrune(d.localNode.id, UInt256.fromLogRadius(config.radiusConfig.logRadius))
let radius =
if config.radiusConfig.kind == Static:
UInt256.fromLogRadius(config.radiusConfig.logRadius)
else:
let oldRadiusApproximation = db.getLargestDistance(d.localNode.id)
db.estimateNewRadius(oldRadiusApproximation)
# Note: In the case of dynamical radius this is all an approximation that
# heavily relies on uniformly distributed content and thus will always
# have an error margin, either down or up of the requested capacity.
# TODO I: Perhaps we want to add an offset to counter the latter.
# TODO II: Perhaps for dynamical radius, we want to also apply the vacuum
# without the forcePrune flag and purely by checking the amount of free
# space versus the pruning fraction.
# TODO III: Adding Radius metadata to the db could be yet another way to
# decide whether or not to force prune, instead of this flag.
db.forcePrune(d.localNode.id, radius)
db.close()
# Store the database at contentdb prefixed with the first 8 chars of node id.

View File

@ -1,4 +1,4 @@
# Nimbus
# Fluffy
# Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
@ -96,8 +96,8 @@ suite "Content Database":
# Need to rework either this test, or the pruning mechanism, or probably
# both.
let
maxDbSize = uint32(100_000)
db = ContentDB.new("", maxDbSize, inMemory = true)
storageCapacity = 100_000'u64
db = ContentDB.new("", storageCapacity, inMemory = true)
furthestElement = u256(40)
secondFurthest = u256(30)
@ -129,10 +129,49 @@ suite "Content Database":
check:
pr10.deletedElements == 2
uint32(db.usedSize()) < maxDbSize
uint64(db.usedSize()) < storageCapacity
# With the current settings the 2 furthest elements will be deleted,
# i.e key 30 and 40. The furthest non deleted one will have key 20.
pr10.distanceOfFurthestElement == thirdFurthest
db.get(furthestElement).isNone()
db.get(secondFurthest).isNone()
db.get(thirdFurthest).isSome()
test "ContentDB force pruning":
const
# This start capacity doesn't really matter here as we are directly
# putting data in the db without additional size checks.
startCapacity = 14_159_872'u64
endCapacity = 500_000'u64
amountOfItems = 10_000
let
rng = newRng()
db = ContentDB.new("", startCapacity, inMemory = true)
localId = UInt256.fromHex(
"30994892f3e4889d99deb5340050510d1842778acc7a7948adffa475fed51d6e")
content = genByteSeq(1000)
# Note: We could randomly generate the above localId and the content keys
# that are added to the database below. However we opt for a more
# deterministic test case as the randomness makes it difficult to chose a
# reasonable value to check if pruning was succesful.
let
increment = UInt256.high div amountOfItems
remainder = UInt256.high mod amountOfItems
var id = u256(0)
while id < UInt256.high - remainder:
db.put(id, content)
id = id + increment
db.storageCapacity = endCapacity
let
oldRadiusApproximation = db.getLargestDistance(localId)
newRadius = db.estimateNewRadius(oldRadiusApproximation)
db.forcePrune(localId, newRadius)
let diff = abs(db.size() - int64(db.storageCapacity))
# Quite a big marging (20%) is added as it is all an approximation.
check diff < int64(float(db.storageCapacity) * 0.20)