From d627b1738bf00d156a1f5287af15e55e45adc463 Mon Sep 17 00:00:00 2001 From: E M <5089238+emizzle@users.noreply.github.com> Date: Mon, 19 Jan 2026 17:01:30 +1100 Subject: [PATCH] Move base merkletree implementation to nim-merkletree submodule - use nim-merkletree as base merkletree implementation - CodexTree uses nim-merkletree as the base merkletree impl --- .gitmodules | 5 +- codex/merkletree.nim | 4 +- codex/merkletree/{codex => }/coders.nim | 7 +- codex/merkletree/codex.nim | 4 - codex/merkletree/codex/codex.nim | 249 ----------- codex/merkletree/merkletree.nim | 563 +++++++++--------------- tests/codex/stores/testrepostore.nim | 2 +- vendor/nim-merkletree | 1 + 8 files changed, 219 insertions(+), 616 deletions(-) rename codex/merkletree/{codex => }/coders.nim (98%) delete mode 100644 codex/merkletree/codex.nim delete mode 100644 codex/merkletree/codex/codex.nim create mode 160000 vendor/nim-merkletree diff --git a/.gitmodules b/.gitmodules index b8260d32..240f4b5c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -206,4 +206,7 @@ path = vendor/nim-ngtcp2 url = https://github.com/vacp2p/nim-ngtcp2.git ignore = untracked - branch = main \ No newline at end of file + branch = main +[submodule "vendor/nim-merkletree"] + path = vendor/nim-merkletree + url = https://github.com/logos-storage/nim-merkletree diff --git a/codex/merkletree.nim b/codex/merkletree.nim index 2f4a4472..fde7910a 100644 --- a/codex/merkletree.nim +++ b/codex/merkletree.nim @@ -1,4 +1,4 @@ import ./merkletree/merkletree -import ./merkletree/codex +import ./merkletree/coders -export codex, merkletree +export merkletree, coders \ No newline at end of file diff --git a/codex/merkletree/codex/coders.nim b/codex/merkletree/coders.nim similarity index 98% rename from codex/merkletree/codex/coders.nim rename to codex/merkletree/coders.nim index d81979c1..3125e338 100644 --- a/codex/merkletree/codex/coders.nim +++ b/codex/merkletree/coders.nim @@ -15,10 +15,9 @@ import pkg/questionable/results import pkg/stew/byteutils import pkg/serde/json -import ../../units -import ../../errors - -import ./codex +import ../units +import ../errors +import ./merkletree const MaxMerkleTreeSize = 100.MiBs.uint const MaxMerkleProofSize = 1.MiBs.uint diff --git a/codex/merkletree/codex.nim b/codex/merkletree/codex.nim deleted file mode 100644 index 0e9a5874..00000000 --- a/codex/merkletree/codex.nim +++ /dev/null @@ -1,4 +0,0 @@ -import ./codex/codex -import ./codex/coders - -export codex, coders diff --git a/codex/merkletree/codex/codex.nim b/codex/merkletree/codex/codex.nim deleted file mode 100644 index 173befd0..00000000 --- a/codex/merkletree/codex/codex.nim +++ /dev/null @@ -1,249 +0,0 @@ -## Logos Storage -## Copyright (c) 2023 Status Research & Development GmbH -## Licensed under either of -## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) -## * MIT license ([LICENSE-MIT](LICENSE-MIT)) -## at your option. -## This file may not be copied, modified, or distributed except according to -## those terms. - -{.push raises: [].} - -import std/bitops -import std/[atomics, sequtils] - -import pkg/questionable -import pkg/questionable/results -import pkg/libp2p/[cid, multicodec, multihash] -import pkg/constantine/hashes -import pkg/taskpools -import pkg/chronos/threadsync -import ../../utils -import ../../rng -import ../../errors -import ../../codextypes - -from ../../utils/digest import digestBytes - -import ../merkletree - -export merkletree - -logScope: - topics = "codex merkletree" - -type - ByteTreeKey* {.pure.} = enum - KeyNone = 0x0.byte - KeyBottomLayer = 0x1.byte - KeyOdd = 0x2.byte - KeyOddAndBottomLayer = 0x3.byte - - ByteHash* = seq[byte] - ByteTree* = MerkleTree[ByteHash, ByteTreeKey] - ByteProof* = MerkleProof[ByteHash, ByteTreeKey] - - CodexTree* = ref object of ByteTree - mcodec*: MultiCodec - - CodexProof* = ref object of ByteProof - mcodec*: MultiCodec - -func getProof*(self: CodexTree, index: int): ?!CodexProof = - var proof = CodexProof(mcodec: self.mcodec) - - ?self.getProof(index, proof) - - success proof - -func verify*(self: CodexProof, leaf: MultiHash, root: MultiHash): ?!bool = - ## Verify hash - ## - - let - rootBytes = root.digestBytes - leafBytes = leaf.digestBytes - - if self.mcodec != root.mcodec or self.mcodec != leaf.mcodec: - return failure "Hash codec mismatch" - - if rootBytes.len != root.size and leafBytes.len != leaf.size: - return failure "Invalid hash length" - - self.verify(leafBytes, rootBytes) - -func verify*(self: CodexProof, leaf: Cid, root: Cid): ?!bool = - self.verify(?leaf.mhash.mapFailure, ?leaf.mhash.mapFailure) - -proc rootCid*(self: CodexTree, version = CIDv1, dataCodec = DatasetRootCodec): ?!Cid = - if (?self.root).len == 0: - return failure "Empty root" - - let mhash = ?MultiHash.init(self.mcodec, ?self.root).mapFailure - - Cid.init(version, DatasetRootCodec, mhash).mapFailure - -func getLeafCid*( - self: CodexTree, i: Natural, version = CIDv1, dataCodec = BlockCodec -): ?!Cid = - if i >= self.leavesCount: - return failure "Invalid leaf index " & $i - - let - leaf = self.leaves[i] - mhash = ?MultiHash.init($self.mcodec, leaf).mapFailure - - Cid.init(version, dataCodec, mhash).mapFailure - -proc `$`*(self: CodexTree): string = - let root = - if self.root.isOk: - byteutils.toHex(self.root.get) - else: - "none" - "CodexTree(" & " root: " & root & ", leavesCount: " & $self.leavesCount & ", levels: " & - $self.levels & ", mcodec: " & $self.mcodec & " )" - -proc `$`*(self: CodexProof): string = - "CodexProof(" & " nleaves: " & $self.nleaves & ", index: " & $self.index & ", path: " & - $self.path.mapIt(byteutils.toHex(it)) & ", mcodec: " & $self.mcodec & " )" - -func compress*(x, y: openArray[byte], key: ByteTreeKey, codec: MultiCodec): ?!ByteHash = - ## Compress two hashes - ## - let input = @x & @y & @[key.byte] - let digest = ?MultiHash.digest(codec, input).mapFailure - success digest.digestBytes - -func initTree(mcodec: MultiCodec, leaves: openArray[ByteHash]): ?!CodexTree = - if leaves.len == 0: - return failure "Empty leaves" - - let - compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = - compress(x, y, key, mcodec) - digestSize = ?mcodec.digestSize.mapFailure - Zero: ByteHash = newSeq[byte](digestSize) - - if digestSize != leaves[0].len: - return failure "Invalid hash length" - - var self = CodexTree(mcodec: mcodec) - ?self.prepare(compressor, Zero, leaves) - success self - -func init*( - _: type CodexTree, mcodec: MultiCodec = Sha256HashCodec, leaves: openArray[ByteHash] -): ?!CodexTree = - let tree = ?initTree(mcodec, leaves) - ?tree.compute() - success tree - -proc init*( - _: type CodexTree, - tp: Taskpool, - mcodec: MultiCodec = Sha256HashCodec, - leaves: seq[ByteHash], -): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = - let tree = ?initTree(mcodec, leaves) - ?await tree.compute(tp) - success tree - -func init*(_: type CodexTree, leaves: openArray[MultiHash]): ?!CodexTree = - if leaves.len == 0: - return failure "Empty leaves" - - let - mcodec = leaves[0].mcodec - leaves = leaves.mapIt(it.digestBytes) - - CodexTree.init(mcodec, leaves) - -proc init*( - _: type CodexTree, tp: Taskpool, leaves: seq[MultiHash] -): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = - if leaves.len == 0: - return failure "Empty leaves" - - let - mcodec = leaves[0].mcodec - leaves = leaves.mapIt(it.digestBytes) - - await CodexTree.init(tp, mcodec, leaves) - -func init*(_: type CodexTree, leaves: openArray[Cid]): ?!CodexTree = - if leaves.len == 0: - return failure "Empty leaves" - - let - mcodec = (?leaves[0].mhash.mapFailure).mcodec - leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) - - CodexTree.init(mcodec, leaves) - -proc init*( - _: type CodexTree, tp: Taskpool, leaves: seq[Cid] -): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = - if leaves.len == 0: - return failure("Empty leaves") - - let - mcodec = (?leaves[0].mhash.mapFailure).mcodec - leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) - - await CodexTree.init(tp, mcodec, leaves) - -proc fromNodes*( - _: type CodexTree, - mcodec: MultiCodec = Sha256HashCodec, - nodes: openArray[ByteHash], - nleaves: int, -): ?!CodexTree = - if nodes.len == 0: - return failure "Empty nodes" - - let - digestSize = ?mcodec.digestSize.mapFailure - Zero = newSeq[byte](digestSize) - compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = - compress(x, y, key, mcodec) - - if digestSize != nodes[0].len: - return failure "Invalid hash length" - - var self = CodexTree(mcodec: mcodec) - ?self.fromNodes(compressor, Zero, nodes, nleaves) - - let - index = Rng.instance.rand(nleaves - 1) - proof = ?self.getProof(index) - - if not ?proof.verify(self.leaves[index], ?self.root): # sanity check - return failure "Unable to verify tree built from nodes" - - success self - -func init*( - _: type CodexProof, - mcodec: MultiCodec = Sha256HashCodec, - index: int, - nleaves: int, - nodes: openArray[ByteHash], -): ?!CodexProof = - if nodes.len == 0: - return failure "Empty nodes" - - let - digestSize = ?mcodec.digestSize.mapFailure - Zero = newSeq[byte](digestSize) - compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!seq[byte] {.noSideEffect.} = - compress(x, y, key, mcodec) - - success CodexProof( - compress: compressor, - zero: Zero, - mcodec: mcodec, - index: index, - nleaves: nleaves, - path: @nodes, - ) diff --git a/codex/merkletree/merkletree.nim b/codex/merkletree/merkletree.nim index 56491d84..1eb88a59 100644 --- a/codex/merkletree/merkletree.nim +++ b/codex/merkletree/merkletree.nim @@ -9,388 +9,241 @@ {.push raises: [].} -import std/[bitops, atomics, sequtils] -import stew/assign2 +import std/bitops +import std/[atomics, sequtils] +import pkg/questionable import pkg/questionable/results +import pkg/libp2p/[cid, multicodec, multihash] +import pkg/constantine/hashes import pkg/taskpools -import pkg/chronos import pkg/chronos/threadsync - +import pkg/merkletree +import ../utils +import ../rng import ../errors -import ../utils/sharedbuf +import ../codextypes -export sharedbuf +from ../utils/digest import digestBytes -template nodeData( - data: openArray[byte], offsets: openArray[int], nodeSize, i, j: int -): openArray[byte] = - ## Bytes of the j'th entry of the i'th level in the tree, starting with the - ## leaves (at level 0). - let start = (offsets[i] + j) * nodeSize - data.toOpenArray(start, start + nodeSize - 1) + +export merkletree + +logScope: + topics = "codex merkletree" type - # TODO hash functions don't fail - removing the ?! from this function would - # significantly simplify the flow below - CompressFn*[H, K] = proc(x, y: H, key: K): ?!H {.noSideEffect, raises: [].} + ByteTreeKey* {.pure.} = enum + KeyNone = 0x0.byte + KeyBottomLayer = 0x1.byte + KeyOdd = 0x2.byte + KeyOddAndBottomLayer = 0x3.byte - CompressData[H, K] = object - fn: CompressFn[H, K] - nodeSize: int - zero: H + ByteHash* = seq[byte] + ByteTree* = MerkleTree[ByteHash, ByteTreeKey] + ByteProof* = MerkleProof[ByteHash, ByteTreeKey] - MerkleTreeObj*[H, K] = object of RootObj - store*: seq[byte] - ## Flattened merkle tree where hashes are assumed to be trivial bytes and - ## uniform in size. - ## - ## Each layer of the tree is stored serially starting with the leaves and - ## ending with the root. - ## - ## Beacuse the tree might not be balanced, `layerOffsets` contains the - ## index of the starting point of each level, for easy lookup. - layerOffsets*: seq[int] - ## Starting point of each level in the tree, starting from the leaves - - ## multiplied by the entry size, this is the offset in the payload where - ## the entries of that level start - ## - ## For example, a tree with 4 leaves will have [0, 4, 6] stored here. - ## - ## See nodesPerLevel function, from whic this sequence is derived - compress*: CompressData[H, K] + CodexTree* = ref object of ByteTree + mcodec*: MultiCodec - MerkleTree*[H, K] = ref MerkleTreeObj[H, K] + CodexProof* = ref object of ByteProof + mcodec*: MultiCodec - MerkleProof*[H, K] = ref object of RootObj - index*: int # linear index of the leaf, starting from 0 - path*: seq[H] # order: from the bottom to the top - nleaves*: int # number of leaves in the tree (=size of input) - compress*: CompressFn[H, K] # compress function - zero*: H # zero value - -func levels*[H, K](self: MerkleTree[H, K]): int = - return self.layerOffsets.len - -func depth*[H, K](self: MerkleTree[H, K]): int = - return self.levels() - 1 - -func nodesInLayer(offsets: openArray[int], layer: int): int = - if layer == offsets.high: - 1 - else: - offsets[layer + 1] - offsets[layer] - -func nodesInLayer(self: MerkleTree | MerkleTreeObj, layer: int): int = - self.layerOffsets.nodesInLayer(layer) - -func leavesCount*[H, K](self: MerkleTree[H, K]): int = - return self.nodesInLayer(0) - -func nodesPerLevel(nleaves: int): seq[int] = - ## Given a number of leaves, return a seq with the number of nodes at each - ## layer of the tree (from the bottom/leaves to the root) - ## - ## Ie For a tree of 4 leaves, return `[4, 2, 1]` - if nleaves <= 0: - return @[] - elif nleaves == 1: - return @[1, 1] # leaf and root - - var nodes: seq[int] = @[] - var m = nleaves - while true: - nodes.add(m) - if m == 1: - break - # Next layer size is ceil(m/2) - m = (m + 1) shr 1 - - nodes - -func layerOffsets(nleaves: int): seq[int] = - ## Given a number of leaves, return a seq of the starting offsets of each - ## layer in the node store that results from flattening the binary tree - ## - ## Ie For a tree of 4 leaves, return `[0, 4, 6]` - let nodes = nodesPerLevel(nleaves) - var tot = 0 - let offsets = nodes.mapIt: - let cur = tot - tot += it - cur - offsets - -template nodeData(self: MerkleTreeObj, i, j: int): openArray[byte] = - ## Bytes of the j'th node of the i'th level in the tree, starting with the - ## leaves (at level 0). - self.store.nodeData(self.layerOffsets, self.compress.nodeSize, i, j) - -func layer*[H, K]( - self: MerkleTree[H, K], layer: int -): seq[H] {.deprecated: "Expensive".} = - var nodes = newSeq[H](self.nodesInLayer(layer)) - for i, h in nodes.mpairs: - assign(h, self[].nodeData(layer, i)) - return nodes - -func leaves*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} = - self.layer(0) - -iterator layers*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} = - for i in 0 ..< self.layerOffsets.len: - yield self.layer(i) - -proc layers*[H, K](self: MerkleTree[H, K]): seq[seq[H]] {.deprecated: "Expensive".} = - for l in self.layers(): - result.add l - -iterator nodes*[H, K](self: MerkleTree[H, K]): H = - ## Iterate over the nodes of each layer starting with the leaves - var node: H - for i in 0 ..< self.layerOffsets.len: - let nodesInLayer = self.nodesInLayer(i) - for j in 0 ..< nodesInLayer: - assign(node, self[].nodeData(i, j)) - yield node - -func root*[H, K](self: MerkleTree[H, K]): ?!H = - mixin assign - if self.layerOffsets.len == 0: - return failure "invalid tree" - - var h: H - assign(h, self[].nodeData(self.layerOffsets.high(), 0)) - return success h - -func getProof*[H, K]( - self: MerkleTree[H, K], index: int, proof: MerkleProof[H, K] -): ?!void = - let depth = self.depth - let nleaves = self.leavesCount - - if not (index >= 0 and index < nleaves): - return failure "index out of bounds" - - var path: seq[H] = newSeq[H](depth) - var k = index - var m = nleaves - for i in 0 ..< depth: - let j = k xor 1 - - if (j < m): - assign(path[i], self[].nodeData(i, j)) - else: - path[i] = self.compress.zero - - k = k shr 1 - m = (m + 1) shr 1 - - proof.index = index - proof.path = path - proof.nleaves = nleaves - proof.compress = self.compress.fn - - success() - -func getProof*[H, K](self: MerkleTree[H, K], index: int): ?!MerkleProof[H, K] = - var proof = MerkleProof[H, K]() +func getProof*(self: CodexTree, index: int): ?!CodexProof = + var proof = CodexProof(mcodec: self.mcodec) ?self.getProof(index, proof) success proof -func reconstructRoot*[H, K](proof: MerkleProof[H, K], leaf: H): ?!H = - var - m = proof.nleaves - j = proof.index - h = leaf - bottomFlag = K.KeyBottomLayer +func verify*(self: CodexProof, leaf: MultiHash, root: MultiHash): ?!bool = + ## Verify hash + ## - for p in proof.path: - let oddIndex: bool = (bitand(j, 1) != 0) - if oddIndex: - # the index of the child is odd, so the node itself can't be odd (a bit counterintuitive, yeah :) - h = ?proof.compress(p, h, bottomFlag) + let + rootBytes = root.digestBytes + leafBytes = leaf.digestBytes + + if self.mcodec != root.mcodec or self.mcodec != leaf.mcodec: + return failure "Hash codec mismatch" + + if rootBytes.len != root.size and leafBytes.len != leaf.size: + return failure "Invalid hash length" + + self.verify(leafBytes, rootBytes) + +func verify*(self: CodexProof, leaf: Cid, root: Cid): ?!bool = + self.verify(?leaf.mhash.mapFailure, ?leaf.mhash.mapFailure) + +proc rootCid*(self: CodexTree, version = CIDv1, dataCodec = DatasetRootCodec): ?!Cid = + if (?self.root).len == 0: + return failure "Empty root" + + let mhash = ?MultiHash.init(self.mcodec, ?self.root).mapFailure + + Cid.init(version, DatasetRootCodec, mhash).mapFailure + +func getLeafCid*( + self: CodexTree, i: Natural, version = CIDv1, dataCodec = BlockCodec +): ?!Cid = + if i >= self.leavesCount: + return failure "Invalid leaf index " & $i + + let + leaf = self.leaves[i] + mhash = ?MultiHash.init($self.mcodec, leaf).mapFailure + + Cid.init(version, dataCodec, mhash).mapFailure + +proc `$`*(self: CodexTree): string = + let root = + if self.root.isOk: + byteutils.toHex(self.root.get) else: - if j == m - 1: - # single child => odd node - h = ?proof.compress(h, p, K(bottomFlag.ord + 2)) - else: - # even node - h = ?proof.compress(h, p, bottomFlag) - bottomFlag = K.KeyNone - j = j shr 1 - m = (m + 1) shr 1 + "none" + "CodexTree(" & " root: " & root & ", leavesCount: " & $self.leavesCount & ", levels: " & + $self.levels & ", mcodec: " & $self.mcodec & " )" - return success h +proc `$`*(self: CodexProof): string = + "CodexProof(" & " nleaves: " & $self.nleaves & ", index: " & $self.index & ", path: " & + $self.path.mapIt(byteutils.toHex(it)) & ", mcodec: " & $self.mcodec & " )" -func verify*[H, K](proof: MerkleProof[H, K], leaf: H, root: H): ?!bool = - success bool(root == ?proof.reconstructRoot(leaf)) - -func fromNodes*[H, K]( - self: MerkleTree[H, K], - compressor: CompressFn, - zero: H, - nodes: openArray[H], - nleaves: int, -): ?!void = - mixin assign - - if nodes.len < 2: # At least leaf and root - return failure "Not enough nodes" - - if nleaves == 0: - return failure "No leaves" - - self.compress = CompressData[H, K](fn: compressor, nodeSize: nodes[0].len, zero: zero) - self.layerOffsets = layerOffsets(nleaves) - - if self.layerOffsets[^1] + 1 != nodes.len: - return failure "bad node count" - - self.store = newSeqUninit[byte](nodes.len * self.compress.nodeSize) - - for i in 0 ..< nodes.len: - assign( - self[].store.toOpenArray( - i * self.compress.nodeSize, (i + 1) * self.compress.nodeSize - 1 - ), - nodes[i], - ) - - success() - -func merkleTreeWorker[H, K]( - store: var openArray[byte], - offsets: openArray[int], - compress: CompressData[H, K], - layer: int, - isBottomLayer: static bool, -): ?!void = - ## Worker used to compute the merkle tree from the leaves that are assumed to - ## already be stored at the beginning of the `store`, as done by `prepare`. - - # Throughout, we use `assign` to convert from H to bytes and back, assuming - # this assignment can be done somewhat efficiently (ie memcpy) - because - # the code must work with multihash where len(H) is can differ, we cannot - # simply use a fixed-size array here. - mixin assign - - template nodeData(i, j: int): openArray[byte] = - # Pick out the bytes of node j in layer i - store.nodeData(offsets, compress.nodeSize, i, j) - - let m = offsets.nodesInLayer(layer) - - when not isBottomLayer: - if m == 1: - return success() - - let halfn: int = m div 2 - let n: int = 2 * halfn - let isOdd: bool = (n != m) - - # Because the compression function we work with works with H and not bytes, - # we need to extract H from the raw data - a little abstraction tax that - # ensures that properties like alignment of H are respected. - var a, b, tmp: H - - for i in 0 ..< halfn: - const key = when isBottomLayer: K.KeyBottomLayer else: K.KeyNone - - assign(a, nodeData(layer, i * 2)) - assign(b, nodeData(layer, i * 2 + 1)) - - tmp = ?compress.fn(a, b, key = key) - - assign(nodeData(layer + 1, i), tmp) - - if isOdd: - const key = when isBottomLayer: K.KeyOddAndBottomLayer else: K.KeyOdd - - assign(a, nodeData(layer, n)) - - tmp = ?compress.fn(a, compress.zero, key = key) - - assign(nodeData(layer + 1, halfn), tmp) - - merkleTreeWorker(store, offsets, compress, layer + 1, false) - -proc merkleTreeWorker[H, K]( - store: SharedBuf[byte], - offsets: SharedBuf[int], - compress: ptr CompressData[H, K], - signal: ThreadSignalPtr, -): bool = - defer: - discard signal.fireSync() - - let res = merkleTreeWorker( - store.toOpenArray(), offsets.toOpenArray(), compress[], 0, isBottomLayer = true - ) - - return res.isOk() - -func prepare*[H, K]( - self: MerkleTree[H, K], compressor: CompressFn, zero: H, leaves: openArray[H] -): ?!void = - ## Prepare the instance for computing the merkle tree of the given leaves using - ## the given compression function. After preparation, `compute` should be - ## called to perform the actual computation. `leaves` will be copied into the - ## tree so they can be freed after the call. +func compress*(x, y: openArray[byte], key: ByteTreeKey, codec: MultiCodec): ?!ByteHash = + ## Compress two hashes + ## + let input = @x & @y & @[key.byte] + let digest = ?MultiHash.digest(codec, input).mapFailure + success digest.digestBytes +func initTree(mcodec: MultiCodec, leaves: openArray[ByteHash]): ?!CodexTree = if leaves.len == 0: - return failure "No leaves" + return failure "Empty leaves" - self.compress = - CompressData[H, K](fn: compressor, nodeSize: leaves[0].len, zero: zero) - self.layerOffsets = layerOffsets(leaves.len) + let + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mcodec) + digestSize = ?mcodec.digestSize.mapFailure + Zero: ByteHash = newSeq[byte](digestSize) - self.store = newSeqUninit[byte]((self.layerOffsets[^1] + 1) * self.compress.nodeSize) + if digestSize != leaves[0].len: + return failure "Invalid hash length" - for j in 0 ..< leaves.len: - assign(self[].nodeData(0, j), leaves[j]) + var self = CodexTree(mcodec: mcodec) + ?self.prepare(compressor, Zero, leaves) + success self - return success() +func init*( + _: type CodexTree, mcodec: MultiCodec = Sha256HashCodec, leaves: openArray[ByteHash] +): ?!CodexTree = + let tree = ?initTree(mcodec, leaves) + ?tree.compute() + success tree -proc compute*[H, K](self: MerkleTree[H, K]): ?!void = - merkleTreeWorker( - self.store, self.layerOffsets, self.compress, 0, isBottomLayer = true +proc init*( + _: type CodexTree, + tp: Taskpool, + mcodec: MultiCodec = Sha256HashCodec, + leaves: seq[ByteHash], +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + let tree = ?initTree(mcodec, leaves) + ?await tree.compute(tp) + success tree + +func init*(_: type CodexTree, leaves: openArray[MultiHash]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt(it.digestBytes) + + CodexTree.init(mcodec, leaves) + +proc init*( + _: type CodexTree, tp: Taskpool, leaves: seq[MultiHash] +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt(it.digestBytes) + + await CodexTree.init(tp, mcodec, leaves) + +func init*(_: type CodexTree, leaves: openArray[Cid]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = (?leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) + + CodexTree.init(mcodec, leaves) + +proc init*( + _: type CodexTree, tp: Taskpool, leaves: seq[Cid] +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + if leaves.len == 0: + return failure("Empty leaves") + + let + mcodec = (?leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) + + await CodexTree.init(tp, mcodec, leaves) + +proc fromNodes*( + _: type CodexTree, + mcodec: MultiCodec = Sha256HashCodec, + nodes: openArray[ByteHash], + nleaves: int, +): ?!CodexTree = + if nodes.len == 0: + return failure "Empty nodes" + + let + digestSize = ?mcodec.digestSize.mapFailure + Zero = newSeq[byte](digestSize) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mcodec) + + if digestSize != nodes[0].len: + return failure "Invalid hash length" + + var self = CodexTree(mcodec: mcodec) + ?self.fromNodes(compressor, Zero, nodes, nleaves) + + let + index = Rng.instance.rand(nleaves - 1) + proof = ?self.getProof(index) + + if not ?proof.verify(self.leaves[index], ?self.root): # sanity check + return failure "Unable to verify tree built from nodes" + + success self + +func init*( + _: type CodexProof, + mcodec: MultiCodec = Sha256HashCodec, + index: int, + nleaves: int, + nodes: openArray[ByteHash], +): ?!CodexProof = + if nodes.len == 0: + return failure "Empty nodes" + + let + digestSize = ?mcodec.digestSize.mapFailure + Zero = newSeq[byte](digestSize) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!seq[byte] {.noSideEffect.} = + compress(x, y, key, mcodec) + + success CodexProof( + compress: compressor, + zero: Zero, + mcodec: mcodec, + index: index, + nleaves: nleaves, + path: @nodes, ) - -proc compute*[H, K]( - self: MerkleTree[H, K], tp: Taskpool -): Future[?!void] {.async: (raises: []).} = - if tp.numThreads == 1: - # With a single thread, there's no point creating a separate task - return self.compute() - - # TODO this signal would benefit from reuse across computations - without signal =? ThreadSignalPtr.new(): - return failure("Unable to create thread signal") - - defer: - signal.close().expect("closing once works") - - let res = tp.spawn merkleTreeWorker( - SharedBuf.view(self.store), - SharedBuf.view(self.layerOffsets), - addr self.compress, - signal, - ) - - # To support cancellation, we'd have to ensure the task we posted to taskpools - # exits early - since we're not doing that, block cancellation attempts - try: - await noCancel signal.wait() - except AsyncError as exc: - # Since we initialized the signal, the OS or chronos is misbehaving. In any - # case, it would mean the task is still running which would cause a memory - # a memory violation if we let it run - panic instead - raiseAssert "Could not wait for signal, was it initialized? " & exc.msg - - if not res.sync(): - return failure("merkle tree task failed") - - return success() diff --git a/tests/codex/stores/testrepostore.nim b/tests/codex/stores/testrepostore.nim index 7eb9fd0d..b524c860 100644 --- a/tests/codex/stores/testrepostore.nim +++ b/tests/codex/stores/testrepostore.nim @@ -16,7 +16,7 @@ import pkg/codex/stores/repostore/operations import pkg/codex/blocktype as bt import pkg/codex/clock import pkg/codex/utils/safeasynciter -import pkg/codex/merkletree/codex +import pkg/codex/merkletree import ../../asynctest import ../helpers diff --git a/vendor/nim-merkletree b/vendor/nim-merkletree new file mode 160000 index 00000000..e465f38e --- /dev/null +++ b/vendor/nim-merkletree @@ -0,0 +1 @@ +Subproject commit e465f38efae72fbbe5f9f941a5c66a11660f3b5f