mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-02-03 07:45:18 +00:00
Use queries with custom xor function (#1113)
* Use queries with custom xor function
This commit is contained in:
parent
5bd134e2f0
commit
0776f35e0c
@ -29,16 +29,11 @@ export kvstore_sqlite3
|
|||||||
# 3. Or databases are created per network (and kvstores pre content type) and
|
# 3. Or databases are created per network (and kvstores pre content type) and
|
||||||
# thus depending on the network the right db needs to be selected.
|
# thus depending on the network the right db needs to be selected.
|
||||||
|
|
||||||
const
|
|
||||||
# Maximal number of ObjInfo objects held in memory per database scan. 100k
|
|
||||||
# objects should result in memory usage of around 7mb which should be
|
|
||||||
# appropriate for even low resource devices
|
|
||||||
maxObjPerScan = 100000
|
|
||||||
|
|
||||||
type
|
type
|
||||||
RowInfo = tuple
|
RowInfo = tuple
|
||||||
contentId: array[32, byte]
|
contentId: array[32, byte]
|
||||||
payloadLength: int64
|
payloadLength: int64
|
||||||
|
distance: array[32, byte]
|
||||||
|
|
||||||
ObjInfo* = object
|
ObjInfo* = object
|
||||||
contentId*: array[32, byte]
|
contentId*: array[32, byte]
|
||||||
@ -51,7 +46,8 @@ type
|
|||||||
sizeStmt: SqliteStmt[NoParams, int64]
|
sizeStmt: SqliteStmt[NoParams, int64]
|
||||||
unusedSizeStmt: SqliteStmt[NoParams, int64]
|
unusedSizeStmt: SqliteStmt[NoParams, int64]
|
||||||
vacStmt: SqliteStmt[NoParams, void]
|
vacStmt: SqliteStmt[NoParams, void]
|
||||||
getAll: SqliteStmt[NoParams, RowInfo]
|
contentSizeStmt: SqliteStmt[NoParams, int64]
|
||||||
|
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
|
||||||
|
|
||||||
PutResultType* = enum
|
PutResultType* = enum
|
||||||
ContentStored, DbPruned
|
ContentStored, DbPruned
|
||||||
@ -65,9 +61,21 @@ type
|
|||||||
fractionOfDeletedContent*: float64
|
fractionOfDeletedContent*: float64
|
||||||
numOfDeletedElements*: int64
|
numOfDeletedElements*: int64
|
||||||
|
|
||||||
# Objects must be sorted from largest to closest distance
|
func xorDistance(
|
||||||
proc `<`(a, b: ObjInfo): bool =
|
a: openArray[byte],
|
||||||
return a.distFrom < b.distFrom
|
b: openArray[byte]
|
||||||
|
): Result[seq[byte], cstring] {.cdecl.} =
|
||||||
|
var s: seq[byte] = newSeq[byte](32)
|
||||||
|
|
||||||
|
if len(a) != 32 or len(b) != 32:
|
||||||
|
return err("Blobs should have 32 byte length")
|
||||||
|
|
||||||
|
var i = 0
|
||||||
|
while i < 32:
|
||||||
|
s[i] = a[i] xor b[i]
|
||||||
|
inc i
|
||||||
|
|
||||||
|
return ok(s)
|
||||||
|
|
||||||
template expectDb(x: auto): untyped =
|
template expectDb(x: auto): untyped =
|
||||||
# There's no meaningful error handling implemented for a corrupt database or
|
# There's no meaningful error handling implemented for a corrupt database or
|
||||||
@ -82,6 +90,9 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
|
|||||||
else:
|
else:
|
||||||
SqStoreRef.init(path, "fluffy").expectDb()
|
SqStoreRef.init(path, "fluffy").expectDb()
|
||||||
|
|
||||||
|
db.registerCustomScalarFunction("xorDistance", xorDistance)
|
||||||
|
.expect("Couldn't register custom xor function")
|
||||||
|
|
||||||
let getSizeStmt = db.prepareStmt(
|
let getSizeStmt = db.prepareStmt(
|
||||||
"SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();",
|
"SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();",
|
||||||
NoParams, int64).get()
|
NoParams, int64).get()
|
||||||
@ -96,11 +107,14 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
|
|||||||
|
|
||||||
let kvStore = kvStore db.openKvStore().expectDb()
|
let kvStore = kvStore db.openKvStore().expectDb()
|
||||||
|
|
||||||
# This needs to go after `openKvStore`, as it checks whether the table name
|
let contentSizeStmt = db.prepareStmt(
|
||||||
# kvstore already exists.
|
"SELECT SUM(length(value)) FROM kvstore",
|
||||||
let getKeysStmt = db.prepareStmt(
|
NoParams, int64
|
||||||
"SELECT key, length(value) FROM kvstore",
|
).get()
|
||||||
NoParams, RowInfo
|
|
||||||
|
let getAllOrderedByDistanceStmt = db.prepareStmt(
|
||||||
|
"SELECT key, length(value), xorDistance(?, key) as distance FROM kvstore ORDER BY distance DESC",
|
||||||
|
array[32, byte], RowInfo
|
||||||
).get()
|
).get()
|
||||||
|
|
||||||
ContentDB(
|
ContentDB(
|
||||||
@ -108,59 +122,11 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
|
|||||||
maxSize: maxSize,
|
maxSize: maxSize,
|
||||||
sizeStmt: getSizeStmt,
|
sizeStmt: getSizeStmt,
|
||||||
vacStmt: vacStmt,
|
vacStmt: vacStmt,
|
||||||
getAll: getKeysStmt,
|
unusedSizeStmt: unusedSize,
|
||||||
unusedSizeStmt: unusedSize
|
contentSizeStmt: contentSizeStmt,
|
||||||
|
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt
|
||||||
)
|
)
|
||||||
|
|
||||||
proc getNFurthestElements*(
|
|
||||||
db: ContentDB, target: UInt256, n: uint64): (seq[ObjInfo], int64) =
|
|
||||||
## Get at most n furthest elements from db in order from furthest to closest.
|
|
||||||
## Payload lengths are also returned so the caller can decide how many of
|
|
||||||
## those elements need to be deleted.
|
|
||||||
##
|
|
||||||
## Currently it uses xor metric
|
|
||||||
##
|
|
||||||
## Currently works by querying for all elements in database and doing all
|
|
||||||
## necessary work on program level. This is mainly due to two facts:
|
|
||||||
## - sqlite does not have build xor function, also it does not handle bitwise
|
|
||||||
## operations on blobs as expected
|
|
||||||
## - our nim wrapper for sqlite does not support create_function api of sqlite
|
|
||||||
## so we cannot create custom function comparing blobs at sql level. If that
|
|
||||||
## would be possible we may be able to all this work by one sql query
|
|
||||||
|
|
||||||
if n == 0:
|
|
||||||
return (newSeq[ObjInfo](), 0'i64)
|
|
||||||
|
|
||||||
var heap = initHeapQueue[ObjInfo]()
|
|
||||||
var totalContentSize: int64 = 0
|
|
||||||
|
|
||||||
var ri: RowInfo
|
|
||||||
for e in db.getAll.exec(ri):
|
|
||||||
let contentId = UInt256.fromBytesBE(ri.contentId)
|
|
||||||
# TODO: Currently it assumes xor distance, but when we start testing
|
|
||||||
# networks with other distance functions this needs to be adjusted to the
|
|
||||||
# custom distance function
|
|
||||||
let dist = contentId xor target
|
|
||||||
let obj = ObjInfo(
|
|
||||||
contentId: ri.contentId, payloadLength: ri.payloadLength, distFrom: dist)
|
|
||||||
|
|
||||||
if (uint64(len(heap)) < n):
|
|
||||||
heap.push(obj)
|
|
||||||
else:
|
|
||||||
if obj > heap[0]:
|
|
||||||
discard heap.replace(obj)
|
|
||||||
|
|
||||||
totalContentSize = totalContentSize + ri.payloadLength
|
|
||||||
|
|
||||||
var res: seq[ObjInfo] = newSeq[ObjInfo](heap.len())
|
|
||||||
|
|
||||||
var i = heap.len() - 1
|
|
||||||
while heap.len() > 0:
|
|
||||||
res[i] = heap.pop()
|
|
||||||
dec i
|
|
||||||
|
|
||||||
return (res, totalContentSize)
|
|
||||||
|
|
||||||
proc reclaimSpace*(db: ContentDB): void =
|
proc reclaimSpace*(db: ContentDB): void =
|
||||||
## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a
|
## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a
|
||||||
## minimal amount of disk space.
|
## minimal amount of disk space.
|
||||||
@ -195,6 +161,13 @@ proc unusedSize(db: ContentDB): int64 =
|
|||||||
proc realSize*(db: ContentDB): int64 =
|
proc realSize*(db: ContentDB): int64 =
|
||||||
db.size() - db.unusedSize()
|
db.size() - db.unusedSize()
|
||||||
|
|
||||||
|
proc contentSize(db: ContentDB): int64 =
|
||||||
|
## Returns total size of content stored in DB
|
||||||
|
var size: int64 = 0
|
||||||
|
discard (db.contentSizeStmt.exec do(res: int64):
|
||||||
|
size = res).expectDb()
|
||||||
|
return size
|
||||||
|
|
||||||
proc get*(db: ContentDB, key: openArray[byte]): Option[seq[byte]] =
|
proc get*(db: ContentDB, key: openArray[byte]): Option[seq[byte]] =
|
||||||
var res: Option[seq[byte]]
|
var res: Option[seq[byte]]
|
||||||
proc onData(data: openArray[byte]) = res = some(@data)
|
proc onData(data: openArray[byte]) = res = some(@data)
|
||||||
@ -233,41 +206,37 @@ proc contains*(db: ContentDB, key: ContentId): bool =
|
|||||||
proc del*(db: ContentDB, key: ContentId) =
|
proc del*(db: ContentDB, key: ContentId) =
|
||||||
db.del(key.toByteArrayBE())
|
db.del(key.toByteArrayBE())
|
||||||
|
|
||||||
proc deleteFractionOfContent*(
|
proc deleteContentFraction(
|
||||||
db: ContentDB,
|
db: ContentDB,
|
||||||
target: Uint256,
|
target: UInt256,
|
||||||
targetFraction: float64): (UInt256, int64, int64, int64) =
|
fraction: float64): (UInt256, int64, int64, int64) =
|
||||||
## Procedure which tries to delete fraction of database by scanning maxObjPerScan
|
## Deletes at most `fraction` percent of content form database.
|
||||||
## furthest elements.
|
## First, content furthest from provided `target` is deleted.
|
||||||
## If the maxObjPerScan furthest elements, is not enough to attain required fraction
|
|
||||||
## procedure deletes all but one element and report how many bytes have been
|
|
||||||
## deleted
|
|
||||||
## Procedure do not call reclaim space, it is left to the caller.
|
|
||||||
|
|
||||||
let (furthestElements, totalContentSize) = db.getNFurthestElements(target, maxObjPerScan)
|
doAssert(
|
||||||
var bytesDeleted: int64 = 0
|
fraction > 0 and fraction < 1,
|
||||||
let bytesToDelete = int64(targetFraction * float64(totalContentSize))
|
"Deleted fraction should be > 0 and < 1"
|
||||||
let numOfElements = len(furthestElements)
|
)
|
||||||
|
|
||||||
|
let totalContentSize = db.contentSize()
|
||||||
|
let bytesToDelete = int64(fraction * float64(totalContentSize))
|
||||||
var numOfDeletedElements: int64 = 0
|
var numOfDeletedElements: int64 = 0
|
||||||
|
|
||||||
if numOfElements == 0:
|
var ri: RowInfo
|
||||||
# no elements in database, return some zero value
|
var bytesDeleted: int64 = 0
|
||||||
return (UInt256.zero, 0'i64, 0'i64, 0'i64)
|
let targetBytes = target.toByteArrayBE()
|
||||||
|
for e in db.getAllOrderedByDistanceStmt.exec(targetBytes, ri):
|
||||||
let lastIdx = len(furthestElements) - 1
|
if bytesDeleted + ri.payloadLength < bytesToDelete:
|
||||||
|
db.del(ri.contentId)
|
||||||
for i, elem in furthestElements:
|
bytesDeleted = bytesDeleted + ri.payloadLength
|
||||||
if i == lastIdx:
|
|
||||||
# this is our last element, do not delete it and report it as last non deleted
|
|
||||||
# element
|
|
||||||
return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements)
|
|
||||||
|
|
||||||
if bytesDeleted + elem.payloadLength < bytesToDelete:
|
|
||||||
db.del(elem.contentId)
|
|
||||||
bytesDeleted = bytesDeleted + elem.payloadLength
|
|
||||||
inc numOfDeletedElements
|
inc numOfDeletedElements
|
||||||
else:
|
else:
|
||||||
return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements)
|
return (
|
||||||
|
UInt256.fromBytesBE(ri.distance),
|
||||||
|
bytesDeleted,
|
||||||
|
totalContentSize,
|
||||||
|
numOfDeletedElements
|
||||||
|
)
|
||||||
|
|
||||||
proc put*(
|
proc put*(
|
||||||
db: ContentDB,
|
db: ContentDB,
|
||||||
@ -299,7 +268,7 @@ proc put*(
|
|||||||
totalContentSize,
|
totalContentSize,
|
||||||
deletedElements
|
deletedElements
|
||||||
) =
|
) =
|
||||||
db.deleteFractionOfContent(target, 0.25)
|
db.deleteContentFraction(target, 0.25)
|
||||||
|
|
||||||
let deletedFraction = float64(deletedBytes) / float64(totalContentSize)
|
let deletedFraction = float64(deletedBytes) / float64(totalContentSize)
|
||||||
|
|
||||||
|
@ -102,59 +102,6 @@ suite "Content Database":
|
|||||||
size6 == size1
|
size6 == size1
|
||||||
realSize2 == size6
|
realSize2 == size6
|
||||||
|
|
||||||
type TestCase = object
|
|
||||||
keys: seq[UInt256]
|
|
||||||
n: uint64
|
|
||||||
|
|
||||||
proc init(T: type TestCase, keys: seq[UInt256], n: uint64): T =
|
|
||||||
TestCase(keys: keys, n: n)
|
|
||||||
|
|
||||||
proc hasCorrectOrder(s: seq[ObjInfo], expectedOrder: seq[Uint256]): bool =
|
|
||||||
var i = 0
|
|
||||||
for e in s:
|
|
||||||
if (e.distFrom != expectedOrder[i]):
|
|
||||||
return false
|
|
||||||
inc i
|
|
||||||
return true
|
|
||||||
|
|
||||||
test "Get N furthest elements from db":
|
|
||||||
# we check distances from zero as num xor 0 = num, so each uint in sequence is valid
|
|
||||||
# distance
|
|
||||||
let zero = u256(0)
|
|
||||||
let testCases = @[
|
|
||||||
TestCase.init(@[], 10),
|
|
||||||
TestCase.init(@[u256(1), u256(2)], 1),
|
|
||||||
TestCase.init(@[u256(1), u256(2)], 2),
|
|
||||||
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 2),
|
|
||||||
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 4),
|
|
||||||
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 2),
|
|
||||||
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 4),
|
|
||||||
TestCase.init(generateNRandomU256(rng[], 10), 5),
|
|
||||||
TestCase.init(generateNRandomU256(rng[], 10), 10)
|
|
||||||
]
|
|
||||||
|
|
||||||
for testCase in testCases:
|
|
||||||
let
|
|
||||||
db = ContentDB.new("", uint32.high, inMemory = true)
|
|
||||||
|
|
||||||
for elem in testCase.keys:
|
|
||||||
discard db.put(elem, genByteSeq(32), testId)
|
|
||||||
|
|
||||||
let (furthest, _) = db.getNFurthestElements(zero, testCase.n)
|
|
||||||
|
|
||||||
var sortedKeys = testCase.keys
|
|
||||||
|
|
||||||
sortedKeys.sort(SortOrder.Descending)
|
|
||||||
|
|
||||||
if uint64(len(testCase.keys)) < testCase.n:
|
|
||||||
check:
|
|
||||||
len(furthest) == len(testCase.keys)
|
|
||||||
else:
|
|
||||||
check:
|
|
||||||
uint64(len(furthest)) == testCase.n
|
|
||||||
check:
|
|
||||||
furthest.hasCorrectOrder(sortedKeys)
|
|
||||||
|
|
||||||
test "ContentDB pruning":
|
test "ContentDB pruning":
|
||||||
let
|
let
|
||||||
maxDbSize = uint32(100000)
|
maxDbSize = uint32(100000)
|
||||||
|
2
vendor/nim-eth
vendored
2
vendor/nim-eth
vendored
@ -1 +1 @@
|
|||||||
Subproject commit dffaa78cbedd47d3ee00ba1fdf2b130c47e75793
|
Subproject commit dacf827a8653459429623be7ceaf6ecca20fcf35
|
Loading…
x
Reference in New Issue
Block a user