Do not use vacuum when pruning (#1103)

* Do not use vacuum when pruning
This commit is contained in:
KonradStaniec 2022-05-26 08:26:08 +02:00 committed by GitHub
parent 8b0d700b45
commit af10e8f179
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 7 deletions

View File

@ -49,6 +49,7 @@ type
kv: KvStoreRef
maxSize: uint32
sizeStmt: SqliteStmt[NoParams, int64]
unusedSizeStmt: SqliteStmt[NoParams, int64]
vacStmt: SqliteStmt[NoParams, void]
getAll: SqliteStmt[NoParams, RowInfo]
@ -85,6 +86,10 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
"SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();",
NoParams, int64).get()
let unusedSize = db.prepareStmt(
"SELECT freelist_count * page_size as size FROM pragma_freelist_count(), pragma_page_size();",
NoParams, int64).get()
let vacStmt = db.prepareStmt(
"VACUUM;",
NoParams, void).get()
@ -99,7 +104,13 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
).get()
ContentDB(
kv: kvStore, maxSize: maxSize, sizeStmt: getSizeStmt, vacStmt: vacStmt, getAll: getKeysStmt)
kv: kvStore,
maxSize: maxSize,
sizeStmt: getSizeStmt,
vacStmt: vacStmt,
getAll: getKeysStmt,
unusedSizeStmt: unusedSize
)
proc getNFurthestElements*(
db: ContentDB, target: UInt256, n: uint64): (seq[ObjInfo], int64) =
@ -172,6 +183,18 @@ proc size*(db: ContentDB): int64 =
size = res).expectDb()
return size
proc unusedSize(db: ContentDB): int64 =
## Returns the total size of the pages which are unused by the database,
## i.e they can be re-used for new content.
var size: int64 = 0
discard (db.unusedSizeStmt.exec do(res: int64):
size = res).expectDb()
return size
proc realSize*(db: ContentDB): int64 =
db.size() - db.unusedSize()
proc get*(db: ContentDB, key: openArray[byte]): Option[seq[byte]] =
var res: Option[seq[byte]]
proc onData(data: openArray[byte]) = res = some(@data)
@ -210,7 +233,7 @@ proc contains*(db: ContentDB, key: ContentId): bool =
proc del*(db: ContentDB, key: ContentId) =
db.del(key.toByteArrayBE())
proc deleteFractionOfContent(
proc deleteFractionOfContent*(
db: ContentDB,
target: Uint256,
targetFraction: float64): (UInt256, int64, int64, int64) =
@ -253,8 +276,18 @@ proc put*(
target: UInt256): PutResult =
db.put(key, value)
let dbSize = db.size()
# We use real size for our pruning threshold, which means that database file
# will reach size specified in db.maxSize, and will stay that size thorough
# node life time, as after content deletion free pages will be re used.
# TODO:
# 1. Devise vacuum strategy - after few pruning cycles database can become
# fragmented which may impact performance, so at some point in time `VACUUM`
# will need to be run to defragment the db.
# 2. Deal with the edge case where a user configures max db size lower than
# current db.size(). With such config the database would try to prune itself with
# each addition.
let dbSize = db.realSize()
if dbSize < int64(db.maxSize):
return PutResult(kind: ContentStored)
@ -270,8 +303,6 @@ proc put*(
let deletedFraction = float64(deletedBytes) / float64(totalContentSize)
db.reclaimSpace()
return PutResult(
kind: DbPruned,
furthestStoredElementDistance: furthestNonDeletedElement,

View File

@ -72,27 +72,35 @@ suite "Content Database":
let size3 = db.size()
discard db.put(u256(2), genByteSeq(numBytes), testId)
let size4 = db.size()
let realSize = db.realSize()
check:
size2 > size1
size3 > size2
size3 == size4
realSize == size4
db.del(u256(2))
db.del(u256(1))
let realSize1 = db.realSize()
let size5 = db.size()
check:
size4 == size5
# real size will be smaller as after del, there are free pages in sqlite
# which can be re-used for further additions
realSize1 < size5
db.reclaimSpace()
let size6 = db.size()
let realSize2 = db.realSize()
check:
# After space reclamation size of db should be equal to initial size
size6 == size1
realSize2 == size6
type TestCase = object
keys: seq[UInt256]
@ -183,7 +191,7 @@ suite "Content Database":
check:
pr10.numOfDeletedElements == 2
uint32(db.size()) < maxDbSize
uint32(db.realSize()) < maxDbSize
# With current settings 2 furthers elements will be delted i.e 30 and 40
# so the furthest non deleted one will be 20
pr10.furthestStoredElementDistance == thirdFurthest