Add api to get n furthest elements from db (#1026)

* Add api to get n furthest elements from db
This commit is contained in:
KonradStaniec 2022-04-03 15:14:44 +02:00 committed by GitHub
parent e04c69df18
commit b3570fae6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 136 additions and 2 deletions

View File

@ -8,7 +8,7 @@
{.push raises: [Defect].}
import
std/options,
std/[options, heapqueue],
eth/db/kvstore,
eth/db/kvstore_sqlite3,
stint,
@ -30,10 +30,24 @@ export kvstore_sqlite3
# thus depending on the network the right db needs to be selected.
type
RowInfo = tuple
contentId: array[32, byte]
payloadLength: int64
ObjInfo* = object
contentId*: array[32, byte]
payloadLength*: int64
distFrom*: UInt256
ContentDB* = ref object
kv: KvStoreRef
sizeStmt: SqliteStmt[NoParams, int64]
vacStmt: SqliteStmt[NoParams, void]
getAll: SqliteStmt[NoParams, RowInfo]
# we want objects to be sorted from largest distance to closests
proc `<`(a, b: ObjInfo): bool =
return a.distFrom < b.distFrom
template expectDb(x: auto): untyped =
# There's no meaningful error handling implemented for a corrupt database or
@ -56,7 +70,59 @@ proc new*(T: type ContentDB, path: string, inMemory = false): ContentDB =
"VACUUM;",
NoParams, void).get()
ContentDB(kv: kvStore db.openKvStore().expectDb(), sizeStmt: getSizeStmt, vacStmt: vacStmt)
let kvStore = kvStore db.openKvStore().expectDb()
# this need to go after `openKvStore`, as it checks that the table name kvstore
# already exists.
let getKeysStmt = db.prepareStmt(
"SELECT key, length(value) FROM kvstore",
NoParams, RowInfo
).get()
ContentDB(kv: kvStore, sizeStmt: getSizeStmt, vacStmt: vacStmt, getAll: getKeysStmt)
proc getNFurthestElements*(db: ContentDB, target: UInt256, n: uint64): seq[ObjInfo] =
## Get at most n furthest elements from database in order from furthest to closest.
## We are also returning payload lengths so caller can decide how many of those elements
## need to be deleted.
##
## Currently it uses xor metric
##
## Currently works by querying for all elements in database and doing all necessary
## work on program level. This is mainly due to two facts:
## - sqlite does not have build xor function, also it does not handle bitwise
## operations on blobs as expected
## - our nim wrapper for sqlite does not support create_function api of sqlite
## so we cannot create custom function comparing blobs at sql level. If that
## would be possible we may be able to all this work by one sql query
if n == 0:
return newSeq[ObjInfo]()
var heap = initHeapQueue[ObjInfo]()
var ri: RowInfo
for e in db.getAll.exec(ri):
let contentId = UInt256.fromBytesBE(ri.contentId)
# TODO: Currently it assumes xor distance, but when we start testing networks with
# other distance functions this needs to be adjusted to the custom distance function
let dist = contentId xor target
let obj = ObjInfo(contentId: ri.contentId, payloadLength: ri.payloadLength, distFrom: dist)
if (uint64(len(heap)) < n):
heap.push(obj)
else:
if obj > heap[0]:
discard heap.replace(obj)
var res: seq[ObjInfo] = newSeq[ObjInfo](heap.len())
var i = heap.len() - 1
while heap.len() > 0:
res[i] = heap.pop()
dec i
return res
proc reclaimSpace*(db: ContentDB): void =
## Runs sqlie VACUMM commands which rebuilds db, repacking it into a minimal amount of disk space

View File

@ -8,7 +8,9 @@
{.used.}
import
std/algorithm,
unittest2, stint,
eth/keys,
../network/state/state_content,
../content_db
@ -20,7 +22,20 @@ proc genByteSeq(length: int): seq[byte] =
inc i
return resultSeq
proc generateNRandomU256(rng: var BrHmacDrbgContext, n: int): seq[UInt256] =
var i = 0
var res = newSeq[Uint256]()
while i < n:
var bytes = newSeq[byte](32)
brHmacDrbgGenerate(rng, bytes)
let num = Uint256.fromBytesBE(bytes)
res.add(num)
inc i
return res
suite "Content Database":
let rng = newRng()
# Note: We are currently not really testing something new here just basic
# underlying kvstore.
test "ContentDB basic API":
@ -85,3 +100,56 @@ suite "Content Database":
check:
# After space reclamation size of db should be equal to initial size
size6 == size1
type TestCase = object
keys: seq[UInt256]
n: uint64
proc init(T: type TestCase, keys: seq[UInt256], n: uint64): T =
TestCase(keys: keys, n: n)
proc hasCorrectOrder(s: seq[ObjInfo], expectedOrder: seq[Uint256]): bool =
var i = 0
for e in s:
if (e.distFrom != expectedOrder[i]):
return false
inc i
return true
test "Get N furthest elements from db":
# we check distances from zero as num xor 0 = num, so each uint in sequence is valid
# distance
let zero = u256(0)
let testCases = @[
TestCase.init(@[], 10),
TestCase.init(@[u256(1), u256(2)], 1),
TestCase.init(@[u256(1), u256(2)], 2),
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 2),
TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 4),
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 2),
TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 4),
TestCase.init(generateNRandomU256(rng[], 10), 5),
TestCase.init(generateNRandomU256(rng[], 10), 10)
]
for testCase in testCases:
let
db = ContentDB.new("", inMemory = true)
for elem in testCase.keys:
db.put(elem, genByteSeq(32))
let furthest = db.getNFurthestElements(zero, testCase.n)
var sortedKeys = testCase.keys
sortedKeys.sort(SortOrder.Descending)
if uint64(len(testCase.keys)) < testCase.n:
check:
len(furthest) == len(testCase.keys)
else:
check:
uint64(len(furthest)) == testCase.n
check:
furthest.hasCorrectOrder(sortedKeys)