Jacek Sieka ac5e3c733b
chore: bump taskpools
.. to support generic workers which allows getting rid of MerkleTask
2025-12-17 13:53:18 +01:00

397 lines
12 KiB
Nim

## Nim-Codex
## Copyright (c) 2023 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
{.push raises: [].}
import std/[bitops, atomics, sequtils]
import stew/assign2
import pkg/questionable/results
import pkg/taskpools
import pkg/chronos
import pkg/chronos/threadsync
import ../errors
import ../utils/sharedbuf
export sharedbuf
template nodeData(
data: openArray[byte], offsets: openArray[int], nodeSize, i, j: int
): openArray[byte] =
## Bytes of the j'th entry of the i'th level in the tree, starting with the
## leaves (at level 0).
let start = (offsets[i] + j) * nodeSize
data.toOpenArray(start, start + nodeSize - 1)
type
# TODO hash functions don't fail - removing the ?! from this function would
# significantly simplify the flow below
CompressFn*[H, K] = proc(x, y: H, key: K): ?!H {.noSideEffect, raises: [].}
CompressData[H, K] = object
fn: CompressFn[H, K]
nodeSize: int
zero: H
MerkleTreeObj*[H, K] = object of RootObj
store*: seq[byte]
## Flattened merkle tree where hashes are assumed to be trivial bytes and
## uniform in size.
##
## Each layer of the tree is stored serially starting with the leaves and
## ending with the root.
##
## Beacuse the tree might not be balanced, `layerOffsets` contains the
## index of the starting point of each level, for easy lookup.
layerOffsets*: seq[int]
## Starting point of each level in the tree, starting from the leaves -
## multiplied by the entry size, this is the offset in the payload where
## the entries of that level start
##
## For example, a tree with 4 leaves will have [0, 4, 6] stored here.
##
## See nodesPerLevel function, from whic this sequence is derived
compress*: CompressData[H, K]
MerkleTree*[H, K] = ref MerkleTreeObj[H, K]
MerkleProof*[H, K] = ref object of RootObj
index*: int # linear index of the leaf, starting from 0
path*: seq[H] # order: from the bottom to the top
nleaves*: int # number of leaves in the tree (=size of input)
compress*: CompressFn[H, K] # compress function
zero*: H # zero value
func levels*[H, K](self: MerkleTree[H, K]): int =
return self.layerOffsets.len
func depth*[H, K](self: MerkleTree[H, K]): int =
return self.levels() - 1
func nodesInLayer(offsets: openArray[int], layer: int): int =
if layer == offsets.high:
1
else:
offsets[layer + 1] - offsets[layer]
func nodesInLayer(self: MerkleTree | MerkleTreeObj, layer: int): int =
self.layerOffsets.nodesInLayer(layer)
func leavesCount*[H, K](self: MerkleTree[H, K]): int =
return self.nodesInLayer(0)
func nodesPerLevel(nleaves: int): seq[int] =
## Given a number of leaves, return a seq with the number of nodes at each
## layer of the tree (from the bottom/leaves to the root)
##
## Ie For a tree of 4 leaves, return `[4, 2, 1]`
if nleaves <= 0:
return @[]
elif nleaves == 1:
return @[1, 1] # leaf and root
var nodes: seq[int] = @[]
var m = nleaves
while true:
nodes.add(m)
if m == 1:
break
# Next layer size is ceil(m/2)
m = (m + 1) shr 1
nodes
func layerOffsets(nleaves: int): seq[int] =
## Given a number of leaves, return a seq of the starting offsets of each
## layer in the node store that results from flattening the binary tree
##
## Ie For a tree of 4 leaves, return `[0, 4, 6]`
let nodes = nodesPerLevel(nleaves)
var tot = 0
let offsets = nodes.mapIt:
let cur = tot
tot += it
cur
offsets
template nodeData(self: MerkleTreeObj, i, j: int): openArray[byte] =
## Bytes of the j'th node of the i'th level in the tree, starting with the
## leaves (at level 0).
self.store.nodeData(self.layerOffsets, self.compress.nodeSize, i, j)
func layer*[H, K](
self: MerkleTree[H, K], layer: int
): seq[H] {.deprecated: "Expensive".} =
var nodes = newSeq[H](self.nodesInLayer(layer))
for i, h in nodes.mpairs:
assign(h, self[].nodeData(layer, i))
return nodes
func leaves*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} =
self.layer(0)
iterator layers*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} =
for i in 0 ..< self.layerOffsets.len:
yield self.layer(i)
proc layers*[H, K](self: MerkleTree[H, K]): seq[seq[H]] {.deprecated: "Expensive".} =
for l in self.layers():
result.add l
iterator nodes*[H, K](self: MerkleTree[H, K]): H =
## Iterate over the nodes of each layer starting with the leaves
var node: H
for i in 0 ..< self.layerOffsets.len:
let nodesInLayer = self.nodesInLayer(i)
for j in 0 ..< nodesInLayer:
assign(node, self[].nodeData(i, j))
yield node
func root*[H, K](self: MerkleTree[H, K]): ?!H =
mixin assign
if self.layerOffsets.len == 0:
return failure "invalid tree"
var h: H
assign(h, self[].nodeData(self.layerOffsets.high(), 0))
return success h
func getProof*[H, K](
self: MerkleTree[H, K], index: int, proof: MerkleProof[H, K]
): ?!void =
let depth = self.depth
let nleaves = self.leavesCount
if not (index >= 0 and index < nleaves):
return failure "index out of bounds"
var path: seq[H] = newSeq[H](depth)
var k = index
var m = nleaves
for i in 0 ..< depth:
let j = k xor 1
if (j < m):
assign(path[i], self[].nodeData(i, j))
else:
path[i] = self.compress.zero
k = k shr 1
m = (m + 1) shr 1
proof.index = index
proof.path = path
proof.nleaves = nleaves
proof.compress = self.compress.fn
success()
func getProof*[H, K](self: MerkleTree[H, K], index: int): ?!MerkleProof[H, K] =
var proof = MerkleProof[H, K]()
?self.getProof(index, proof)
success proof
func reconstructRoot*[H, K](proof: MerkleProof[H, K], leaf: H): ?!H =
var
m = proof.nleaves
j = proof.index
h = leaf
bottomFlag = K.KeyBottomLayer
for p in proof.path:
let oddIndex: bool = (bitand(j, 1) != 0)
if oddIndex:
# the index of the child is odd, so the node itself can't be odd (a bit counterintuitive, yeah :)
h = ?proof.compress(p, h, bottomFlag)
else:
if j == m - 1:
# single child => odd node
h = ?proof.compress(h, p, K(bottomFlag.ord + 2))
else:
# even node
h = ?proof.compress(h, p, bottomFlag)
bottomFlag = K.KeyNone
j = j shr 1
m = (m + 1) shr 1
return success h
func verify*[H, K](proof: MerkleProof[H, K], leaf: H, root: H): ?!bool =
success bool(root == ?proof.reconstructRoot(leaf))
func fromNodes*[H, K](
self: MerkleTree[H, K],
compressor: CompressFn,
zero: H,
nodes: openArray[H],
nleaves: int,
): ?!void =
mixin assign
if nodes.len < 2: # At least leaf and root
return failure "Not enough nodes"
if nleaves == 0:
return failure "No leaves"
self.compress = CompressData[H, K](fn: compressor, nodeSize: nodes[0].len, zero: zero)
self.layerOffsets = layerOffsets(nleaves)
if self.layerOffsets[^1] + 1 != nodes.len:
return failure "bad node count"
self.store = newSeqUninit[byte](nodes.len * self.compress.nodeSize)
for i in 0 ..< nodes.len:
assign(
self[].store.toOpenArray(
i * self.compress.nodeSize, (i + 1) * self.compress.nodeSize - 1
),
nodes[i],
)
success()
func merkleTreeWorker[H, K](
store: var openArray[byte],
offsets: openArray[int],
compress: CompressData[H, K],
layer: int,
isBottomLayer: static bool,
): ?!void =
## Worker used to compute the merkle tree from the leaves that are assumed to
## already be stored at the beginning of the `store`, as done by `prepare`.
# Throughout, we use `assign` to convert from H to bytes and back, assuming
# this assignment can be done somewhat efficiently (ie memcpy) - because
# the code must work with multihash where len(H) is can differ, we cannot
# simply use a fixed-size array here.
mixin assign
template nodeData(i, j: int): openArray[byte] =
# Pick out the bytes of node j in layer i
store.nodeData(offsets, compress.nodeSize, i, j)
let m = offsets.nodesInLayer(layer)
when not isBottomLayer:
if m == 1:
return success()
let halfn: int = m div 2
let n: int = 2 * halfn
let isOdd: bool = (n != m)
# Because the compression function we work with works with H and not bytes,
# we need to extract H from the raw data - a little abstraction tax that
# ensures that properties like alignment of H are respected.
var a, b, tmp: H
for i in 0 ..< halfn:
const key = when isBottomLayer: K.KeyBottomLayer else: K.KeyNone
assign(a, nodeData(layer, i * 2))
assign(b, nodeData(layer, i * 2 + 1))
tmp = ?compress.fn(a, b, key = key)
assign(nodeData(layer + 1, i), tmp)
if isOdd:
const key = when isBottomLayer: K.KeyOddAndBottomLayer else: K.KeyOdd
assign(a, nodeData(layer, n))
tmp = ?compress.fn(a, compress.zero, key = key)
assign(nodeData(layer + 1, halfn), tmp)
merkleTreeWorker(store, offsets, compress, layer + 1, false)
proc merkleTreeWorker[H, K](
store: SharedBuf[byte],
offsets: SharedBuf[int],
compress: ptr CompressData[H, K],
signal: ThreadSignalPtr,
): bool =
defer:
discard signal.fireSync()
let res = merkleTreeWorker(
store.toOpenArray(), offsets.toOpenArray(), compress[], 0, isBottomLayer = true
)
return res.isOk()
func prepare*[H, K](
self: MerkleTree[H, K], compressor: CompressFn, zero: H, leaves: openArray[H]
): ?!void =
## Prepare the instance for computing the merkle tree of the given leaves using
## the given compression function. After preparation, `compute` should be
## called to perform the actual computation. `leaves` will be copied into the
## tree so they can be freed after the call.
if leaves.len == 0:
return failure "No leaves"
self.compress =
CompressData[H, K](fn: compressor, nodeSize: leaves[0].len, zero: zero)
self.layerOffsets = layerOffsets(leaves.len)
self.store = newSeqUninit[byte]((self.layerOffsets[^1] + 1) * self.compress.nodeSize)
for j in 0 ..< leaves.len:
assign(self[].nodeData(0, j), leaves[j])
return success()
proc compute*[H, K](self: MerkleTree[H, K]): ?!void =
merkleTreeWorker(
self.store, self.layerOffsets, self.compress, 0, isBottomLayer = true
)
proc compute*[H, K](
self: MerkleTree[H, K], tp: Taskpool
): Future[?!void] {.async: (raises: []).} =
if tp.numThreads == 1:
# With a single thread, there's no point creating a separate task
return self.compute()
# TODO this signal would benefit from reuse across computations
without signal =? ThreadSignalPtr.new():
return failure("Unable to create thread signal")
defer:
signal.close().expect("closing once works")
let res = tp.spawn merkleTreeWorker(
SharedBuf.view(self.store),
SharedBuf.view(self.layerOffsets),
addr self.compress,
signal,
)
# To support cancellation, we'd have to ensure the task we posted to taskpools
# exits early - since we're not doing that, block cancellation attempts
try:
await noCancel signal.wait()
except AsyncError as exc:
# Since we initialized the signal, the OS or chronos is misbehaving. In any
# case, it would mean the task is still running which would cause a memory
# a memory violation if we let it run - panic instead
raiseAssert "Could not wait for signal, was it initialized? " & exc.msg
if not res.sync():
return failure("merkle tree task failed")
return success()