From c46706ac75a37657e68100c7ca12e41dab533267 Mon Sep 17 00:00:00 2001 From: Kim De Mey Date: Thu, 23 Nov 2023 11:44:57 +0100 Subject: [PATCH] Add force pruning for the storage-capacity option (#1901) --- fluffy/conf.nim | 4 +-- fluffy/content_db.nim | 55 ++++++++++++++++++++++++++------ fluffy/fluffy.nim | 21 ++++++++++-- fluffy/tests/test_content_db.nim | 47 ++++++++++++++++++++++++--- 4 files changed, 110 insertions(+), 17 deletions(-) diff --git a/fluffy/conf.nim b/fluffy/conf.nim index 6a66a56f0..3985aa7cf 100644 --- a/fluffy/conf.nim +++ b/fluffy/conf.nim @@ -1,4 +1,4 @@ -# Nimbus +# Fluffy # Copyright (c) 2021-2023 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). @@ -235,7 +235,7 @@ type hidden desc: "Force the pruning of the database. This should be used when the " & "database is decreased in size, e.g. when a lower static radius " & - "is set. Only supported for statically set radius." + "or a lower storage capacity is set." defaultValue: false name: "force-prune" .}: bool diff --git a/fluffy/content_db.nim b/fluffy/content_db.nim index b34d39b8e..24481d1fc 100644 --- a/fluffy/content_db.nim +++ b/fluffy/content_db.nim @@ -63,6 +63,7 @@ type contentSizeStmt: SqliteStmt[NoParams, int64] getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo] deleteOutOfRadiusStmt: SqliteStmt[(array[32, byte], array[32, byte]), void] + largestDistanceStmt: SqliteStmt[array[32, byte], array[32, byte]] PutResultType* = enum ContentStored, DbPruned @@ -129,6 +130,10 @@ proc new*( "DELETE FROM kvstore WHERE isInRadius(?, key, ?) == 0", (array[32, byte], array[32, byte]), void).get() + let largestDistanceStmt = db.prepareStmt( + "SELECT max(xorDistance(?, key)) FROM kvstore", + array[32, byte], array[32, byte]).get() + ContentDB( kv: kvStore, backend: db, @@ -140,7 +145,8 @@ proc new*( contentSizeStmt: contentSizeStmt, contentCountStmt: contentCountStmt, getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt, - deleteOutOfRadiusStmt: deleteOutOfRadiusStmt + deleteOutOfRadiusStmt: deleteOutOfRadiusStmt, + largestDistanceStmt: largestDistanceStmt ) template disposeSafe(s: untyped): untyped = @@ -156,6 +162,7 @@ proc close*(db: ContentDB) = db.contentSizeStmt.disposeSafe() db.getAllOrderedByDistanceStmt.disposeSafe() db.deleteOutOfRadiusStmt.disposeSafe() + db.largestDistanceStmt.disposeSafe() discard db.kv.close() ## Private KvStoreRef Calls @@ -267,6 +274,27 @@ proc contentCount*(db: ContentDB): int64 = ## Pruning related calls +proc getLargestDistance*(db: ContentDB, localId: UInt256): UInt256 = + var distanceBytes: array[32, byte] + discard (db.largestDistanceStmt.exec(localId.toBytesBE(), + proc(res: array[32, byte]) = + distanceBytes = res + )).expectDb() + + return UInt256.fromBytesBE(distanceBytes) + +func estimateNewRadius( + currentSize: uint64, storageCapacity: uint64, + currentRadius: UInt256): UInt256 = + let sizeRatio = currentSize div storageCapacity + if sizeRatio > 0: + currentRadius div sizeRatio.stuint(256) + else: + currentRadius + +func estimateNewRadius*(db: ContentDB, currentRadius: UInt256): UInt256 = + estimateNewRadius(uint64(db.usedSize()), db.storageCapacity, currentRadius) + proc deleteContentFraction*( db: ContentDB, target: UInt256, @@ -330,7 +358,6 @@ proc forcePrune*(db: ContentDB, localId: UInt256, radius: UInt256) = if db.manualCheckpoint: notice "Truncating WAL file" db.backend.checkpoint(SqStoreCheckpointKind.truncate) - db.close() notice "Finished database pruning" proc put*( @@ -344,18 +371,28 @@ proc put*( # size will reach the size specified in db.storageCapacity and will stay # around that size throughout the node's lifetime, as after content deletion # due to pruning, the free pages will be re-used. - # TODO: - # 1. Devise vacuum strategy - after few pruning cycles database can become - # fragmented which may impact performance, so at some point in time `VACUUM` - # will need to be run to defragment the db. - # 2. Deal with the edge case where a user configures max db size lower than - # current db.size(). With such config the database would try to prune itself - # with each addition. + # + # Note: + # The `forcePrune` call must be used when database storage capacity is lowered + # either when setting a lower `storageCapacity` or when lowering a configured + # static radius. + # When not using the `forcePrune` functionality, pruning to the required + # capacity will not be very effictive and free pages will not be returned. let dbSize = db.usedSize() if dbSize < int64(db.storageCapacity): return PutResult(kind: ContentStored) else: + # Note: + # An approach of a deleting a full fraction is chosen here, in an attempt + # to not continiously require radius updates, which could have a negative + # impact on the network. However this should be further investigated, as + # doing a large fraction deletion could cause a temporary node performance + # degradation. The `contentDeletionFraction` might need further tuning or + # one could opt for a much more granular approach using sql statement + # in the trend of: + # "SELECT key FROM kvstore ORDER BY xorDistance(?, key) DESC LIMIT 1" + # Potential adjusting the LIMIT for how many items require deletion. let ( distanceOfFurthestElement, deletedBytes, diff --git a/fluffy/fluffy.nim b/fluffy/fluffy.nim index 753660d5c..c0c5b3d9a 100644 --- a/fluffy/fluffy.nim +++ b/fluffy/fluffy.nim @@ -124,12 +124,29 @@ proc run(config: PortalConf) {.raises: [CatchableError].} = d.open() # Force pruning - if config.forcePrune and config.radiusConfig.kind == Static: + if config.forcePrune: let db = ContentDB.new(config.dataDir / "db" / "contentdb_" & d.localNode.id.toBytesBE().toOpenArray(0, 8).toHex(), storageCapacity = config.storageCapacityMB * 1_000_000, manualCheckpoint = true) - db.forcePrune(d.localNode.id, UInt256.fromLogRadius(config.radiusConfig.logRadius)) + + let radius = + if config.radiusConfig.kind == Static: + UInt256.fromLogRadius(config.radiusConfig.logRadius) + else: + let oldRadiusApproximation = db.getLargestDistance(d.localNode.id) + db.estimateNewRadius(oldRadiusApproximation) + + # Note: In the case of dynamical radius this is all an approximation that + # heavily relies on uniformly distributed content and thus will always + # have an error margin, either down or up of the requested capacity. + # TODO I: Perhaps we want to add an offset to counter the latter. + # TODO II: Perhaps for dynamical radius, we want to also apply the vacuum + # without the forcePrune flag and purely by checking the amount of free + # space versus the pruning fraction. + # TODO III: Adding Radius metadata to the db could be yet another way to + # decide whether or not to force prune, instead of this flag. + db.forcePrune(d.localNode.id, radius) db.close() # Store the database at contentdb prefixed with the first 8 chars of node id. diff --git a/fluffy/tests/test_content_db.nim b/fluffy/tests/test_content_db.nim index 025b3b24a..c8f6fa9d0 100644 --- a/fluffy/tests/test_content_db.nim +++ b/fluffy/tests/test_content_db.nim @@ -1,4 +1,4 @@ -# Nimbus +# Fluffy # Copyright (c) 2021-2023 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). @@ -96,8 +96,8 @@ suite "Content Database": # Need to rework either this test, or the pruning mechanism, or probably # both. let - maxDbSize = uint32(100_000) - db = ContentDB.new("", maxDbSize, inMemory = true) + storageCapacity = 100_000'u64 + db = ContentDB.new("", storageCapacity, inMemory = true) furthestElement = u256(40) secondFurthest = u256(30) @@ -129,10 +129,49 @@ suite "Content Database": check: pr10.deletedElements == 2 - uint32(db.usedSize()) < maxDbSize + uint64(db.usedSize()) < storageCapacity # With the current settings the 2 furthest elements will be deleted, # i.e key 30 and 40. The furthest non deleted one will have key 20. pr10.distanceOfFurthestElement == thirdFurthest db.get(furthestElement).isNone() db.get(secondFurthest).isNone() db.get(thirdFurthest).isSome() + + test "ContentDB force pruning": + const + # This start capacity doesn't really matter here as we are directly + # putting data in the db without additional size checks. + startCapacity = 14_159_872'u64 + endCapacity = 500_000'u64 + amountOfItems = 10_000 + + let + rng = newRng() + db = ContentDB.new("", startCapacity, inMemory = true) + localId = UInt256.fromHex( + "30994892f3e4889d99deb5340050510d1842778acc7a7948adffa475fed51d6e") + content = genByteSeq(1000) + + # Note: We could randomly generate the above localId and the content keys + # that are added to the database below. However we opt for a more + # deterministic test case as the randomness makes it difficult to chose a + # reasonable value to check if pruning was succesful. + let + increment = UInt256.high div amountOfItems + remainder = UInt256.high mod amountOfItems + var id = u256(0) + while id < UInt256.high - remainder: + db.put(id, content) + id = id + increment + + db.storageCapacity = endCapacity + + let + oldRadiusApproximation = db.getLargestDistance(localId) + newRadius = db.estimateNewRadius(oldRadiusApproximation) + + db.forcePrune(localId, newRadius) + + let diff = abs(db.size() - int64(db.storageCapacity)) + # Quite a big marging (20%) is added as it is all an approximation. + check diff < int64(float(db.storageCapacity) * 0.20)