Use queries with custom xor function (#1113)

* Use queries with custom xor function
This commit is contained in:
KonradStaniec 2022-06-03 13:44:42 +02:00 committed by GitHub
parent 5bd134e2f0
commit 0776f35e0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 151 deletions

View File

@ -29,16 +29,11 @@ export kvstore_sqlite3
# 3. Or databases are created per network (and kvstores pre content type) and
# thus depending on the network the right db needs to be selected.
const
# Maximal number of ObjInfo objects held in memory per database scan. 100k
# objects should result in memory usage of around 7mb which should be
# appropriate for even low resource devices
maxObjPerScan = 100000
type
RowInfo = tuple
contentId: array[32, byte]
payloadLength: int64
distance: array[32, byte]
ObjInfo* = object
contentId*: array[32, byte]
@ -51,7 +46,8 @@ type
sizeStmt: SqliteStmt[NoParams, int64]
unusedSizeStmt: SqliteStmt[NoParams, int64]
vacStmt: SqliteStmt[NoParams, void]
getAll: SqliteStmt[NoParams, RowInfo]
contentSizeStmt: SqliteStmt[NoParams, int64]
getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo]
PutResultType* = enum
ContentStored, DbPruned
@ -65,9 +61,21 @@ type
fractionOfDeletedContent*: float64
numOfDeletedElements*: int64
# Objects must be sorted from largest to closest distance
proc `<`(a, b: ObjInfo): bool =
return a.distFrom < b.distFrom
func xorDistance(
a: openArray[byte],
b: openArray[byte]
): Result[seq[byte], cstring] {.cdecl.} =
var s: seq[byte] = newSeq[byte](32)
if len(a) != 32 or len(b) != 32:
return err("Blobs should have 32 byte length")
var i = 0
while i < 32:
s[i] = a[i] xor b[i]
inc i
return ok(s)
template expectDb(x: auto): untyped =
# There's no meaningful error handling implemented for a corrupt database or
@ -82,6 +90,9 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
else:
SqStoreRef.init(path, "fluffy").expectDb()
db.registerCustomScalarFunction("xorDistance", xorDistance)
.expect("Couldn't register custom xor function")
let getSizeStmt = db.prepareStmt(
"SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();",
NoParams, int64).get()
@ -96,11 +107,14 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
let kvStore = kvStore db.openKvStore().expectDb()
# This needs to go after `openKvStore`, as it checks whether the table name
# kvstore already exists.
let getKeysStmt = db.prepareStmt(
"SELECT key, length(value) FROM kvstore",
NoParams, RowInfo
let contentSizeStmt = db.prepareStmt(
"SELECT SUM(length(value)) FROM kvstore",
NoParams, int64
).get()
let getAllOrderedByDistanceStmt = db.prepareStmt(
"SELECT key, length(value), xorDistance(?, key) as distance FROM kvstore ORDER BY distance DESC",
array[32, byte], RowInfo
).get()
ContentDB(
@ -108,59 +122,11 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C
maxSize: maxSize,
sizeStmt: getSizeStmt,
vacStmt: vacStmt,
getAll: getKeysStmt,
unusedSizeStmt: unusedSize
unusedSizeStmt: unusedSize,
contentSizeStmt: contentSizeStmt,
getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt
)
proc getNFurthestElements*(
db: ContentDB, target: UInt256, n: uint64): (seq[ObjInfo], int64) =
## Get at most n furthest elements from db in order from furthest to closest.
## Payload lengths are also returned so the caller can decide how many of
## those elements need to be deleted.
##
## Currently it uses xor metric
##
## Currently works by querying for all elements in database and doing all
## necessary work on program level. This is mainly due to two facts:
## - sqlite does not have build xor function, also it does not handle bitwise
## operations on blobs as expected
## - our nim wrapper for sqlite does not support create_function api of sqlite
## so we cannot create custom function comparing blobs at sql level. If that
## would be possible we may be able to all this work by one sql query
if n == 0:
return (newSeq[ObjInfo](), 0'i64)
var heap = initHeapQueue[ObjInfo]()
var totalContentSize: int64 = 0
var ri: RowInfo
for e in db.getAll.exec(ri):
let contentId = UInt256.fromBytesBE(ri.contentId)
# TODO: Currently it assumes xor distance, but when we start testing
# networks with other distance functions this needs to be adjusted to the
# custom distance function
let dist = contentId xor target
let obj = ObjInfo(
contentId: ri.contentId, payloadLength: ri.payloadLength, distFrom: dist)
if (uint64(len(heap)) < n):
heap.push(obj)
else:
if obj > heap[0]:
discard heap.replace(obj)
totalContentSize = totalContentSize + ri.payloadLength
var res: seq[ObjInfo] = newSeq[ObjInfo](heap.len())
var i = heap.len() - 1
while heap.len() > 0:
res[i] = heap.pop()
dec i
return (res, totalContentSize)
proc reclaimSpace*(db: ContentDB): void =
## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a
## minimal amount of disk space.
@ -195,6 +161,13 @@ proc unusedSize(db: ContentDB): int64 =
proc realSize*(db: ContentDB): int64 =
db.size() - db.unusedSize()
proc contentSize(db: ContentDB): int64 =
## Returns total size of content stored in DB
var size: int64 = 0
discard (db.contentSizeStmt.exec do(res: int64):
size = res).expectDb()
return size
proc get*(db: ContentDB, key: openArray[byte]): Option[seq[byte]] =
var res: Option[seq[byte]]
proc onData(data: openArray[byte]) = res = some(@data)
@ -233,41 +206,37 @@ proc contains*(db: ContentDB, key: ContentId): bool =
proc del*(db: ContentDB, key: ContentId) =
db.del(key.toByteArrayBE())
proc deleteFractionOfContent*(
db: ContentDB,
target: Uint256,
targetFraction: float64): (UInt256, int64, int64, int64) =
## Procedure which tries to delete fraction of database by scanning maxObjPerScan
## furthest elements.
## If the maxObjPerScan furthest elements, is not enough to attain required fraction
## procedure deletes all but one element and report how many bytes have been
## deleted
## Procedure do not call reclaim space, it is left to the caller.
proc deleteContentFraction(
db: ContentDB,
target: UInt256,
fraction: float64): (UInt256, int64, int64, int64) =
## Deletes at most `fraction` percent of content form database.
## First, content furthest from provided `target` is deleted.
let (furthestElements, totalContentSize) = db.getNFurthestElements(target, maxObjPerScan)
var bytesDeleted: int64 = 0
let bytesToDelete = int64(targetFraction * float64(totalContentSize))
let numOfElements = len(furthestElements)
doAssert(
fraction > 0 and fraction < 1,
"Deleted fraction should be > 0 and < 1"
)
let totalContentSize = db.contentSize()
let bytesToDelete = int64(fraction * float64(totalContentSize))
var numOfDeletedElements: int64 = 0
if numOfElements == 0:
# no elements in database, return some zero value
return (UInt256.zero, 0'i64, 0'i64, 0'i64)
let lastIdx = len(furthestElements) - 1
for i, elem in furthestElements:
if i == lastIdx:
# this is our last element, do not delete it and report it as last non deleted
# element
return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements)
if bytesDeleted + elem.payloadLength < bytesToDelete:
db.del(elem.contentId)
bytesDeleted = bytesDeleted + elem.payloadLength
var ri: RowInfo
var bytesDeleted: int64 = 0
let targetBytes = target.toByteArrayBE()
for e in db.getAllOrderedByDistanceStmt.exec(targetBytes, ri):
if bytesDeleted + ri.payloadLength < bytesToDelete:
db.del(ri.contentId)
bytesDeleted = bytesDeleted + ri.payloadLength
inc numOfDeletedElements
else:
return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements)
return (
UInt256.fromBytesBE(ri.distance),
bytesDeleted,
totalContentSize,
numOfDeletedElements
)
proc put*(
db: ContentDB,
@ -299,7 +268,7 @@ proc put*(
totalContentSize,
deletedElements
) =
db.deleteFractionOfContent(target, 0.25)
db.deleteContentFraction(target, 0.25)
let deletedFraction = float64(deletedBytes) / float64(totalContentSize)

View File

@ -102,59 +102,6 @@ suite "Content Database":
size6 == size1
realSize2 == size6
type TestCase = object
keys: seq[UInt256]
n: uint64
proc init(T: type TestCase, keys: seq[UInt256], n: uint64): T =
TestCase(keys: keys, n: n)
proc hasCorrectOrder(s: seq[ObjInfo], expectedOrder: seq[Uint256]): bool =
var i = 0
for e in s:
if (e.distFrom != expectedOrder[i]):
return false
inc i
return true
test "Get N furthest elements from db":
# we check distances from zero as num xor 0 = num, so each uint in sequence is valid
# distance
let zero = u256(0)
let testCases = @[
TestCase.init(@[], 10),
TestCase.init(@[u256(1), u256(2)], 1),
TestCase.init(@[u256(1), u256(2)], 2),
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 2),
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 4),
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 2),
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 4),
TestCase.init(generateNRandomU256(rng[], 10), 5),
TestCase.init(generateNRandomU256(rng[], 10), 10)
]
for testCase in testCases:
let
db = ContentDB.new("", uint32.high, inMemory = true)
for elem in testCase.keys:
discard db.put(elem, genByteSeq(32), testId)
let (furthest, _) = db.getNFurthestElements(zero, testCase.n)
var sortedKeys = testCase.keys
sortedKeys.sort(SortOrder.Descending)
if uint64(len(testCase.keys)) < testCase.n:
check:
len(furthest) == len(testCase.keys)
else:
check:
uint64(len(furthest)) == testCase.n
check:
furthest.hasCorrectOrder(sortedKeys)
test "ContentDB pruning":
let
maxDbSize = uint32(100000)

2
vendor/nim-eth vendored

@ -1 +1 @@
Subproject commit dffaa78cbedd47d3ee00ba1fdf2b130c47e75793
Subproject commit dacf827a8653459429623be7ceaf6ecca20fcf35