From dee521f7117c83a213bf301bd9ec82ca2b481b17 Mon Sep 17 00:00:00 2001 From: E M <5089238+emizzle@users.noreply.github.com> Date: Fri, 16 Jan 2026 22:03:42 +1100 Subject: [PATCH] WIP: Add entire merkletree module from codex This commit does not compile. There is a circular dependency importing the MerkleTree type. The interface needs to be defined in such a way that we can avoid this circular dependency. --- .gitignore | 1 + merkletree.nimble | 5 + src/merkletree.nim | 9 +- src/merkletree/{submodule.nim => codex.nim} | 9 +- src/merkletree/codex/coders.nim | 115 ++++++ src/merkletree/codex/codex.nim | 241 ++++++++++++ src/merkletree/merkletree.nim | 394 ++++++++++++++++++++ src/merkletree/utils/digest.nim | 7 + src/merkletree/utils/sharedbuf.nim | 24 ++ tests/helpers.nim | 11 + tests/test1.nim | 12 - tests/testcodexcoders.nim | 44 +++ tests/testcodextree.nim | 93 +++++ tests/testgenerictree.nim | 111 ++++++ 14 files changed, 1052 insertions(+), 24 deletions(-) create mode 100644 .gitignore rename src/merkletree/{submodule.nim => codex.nim} (62%) create mode 100644 src/merkletree/codex/coders.nim create mode 100644 src/merkletree/codex/codex.nim create mode 100644 src/merkletree/merkletree.nim create mode 100644 src/merkletree/utils/digest.nim create mode 100644 src/merkletree/utils/sharedbuf.nim create mode 100644 tests/helpers.nim delete mode 100644 tests/test1.nim create mode 100644 tests/testcodexcoders.nim create mode 100644 tests/testcodextree.nim create mode 100644 tests/testgenerictree.nim diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e0cc33 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +nimble diff --git a/merkletree.nimble b/merkletree.nimble index dc02e2f..76ef1b1 100644 --- a/merkletree.nimble +++ b/merkletree.nimble @@ -10,3 +10,8 @@ srcDir = "src" # Dependencies requires "nim >= 2.2.6" +requires "questionable ~= 0.10.15" +requires "stew ~= 0.4.2" +requires "unittest2 ~= 0.2.5" +requires "libp2p ~= 1.14.3" +requires "taskpools ~= 0.1.0" diff --git a/src/merkletree.nim b/src/merkletree.nim index b7a2480..d6b4be7 100644 --- a/src/merkletree.nim +++ b/src/merkletree.nim @@ -1,7 +1,4 @@ -# This is just an example to get you started. A typical library package -# exports the main API in this file. Note that you cannot rename this file -# but you can remove it if you wish. +import pkg/merkletree/merkletree +import pkg/merkletree/codex -proc add*(x, y: int): int = - ## Adds two numbers together. - return x + y +export codex, merkletree \ No newline at end of file diff --git a/src/merkletree/submodule.nim b/src/merkletree/codex.nim similarity index 62% rename from src/merkletree/submodule.nim rename to src/merkletree/codex.nim index a70ab64..f3240e0 100644 --- a/src/merkletree/submodule.nim +++ b/src/merkletree/codex.nim @@ -3,10 +3,7 @@ # remove this file altogether. You may create additional modules alongside # this file as required. -type - Submodule* = object - name*: string +import ./codex/codex +import ./codex/coders -proc initSubmodule*(): Submodule = - ## Initialises a new ``Submodule`` object. - Submodule(name: "Anonymous") +export codex, coders diff --git a/src/merkletree/codex/coders.nim b/src/merkletree/codex/coders.nim new file mode 100644 index 0000000..f465ff4 --- /dev/null +++ b/src/merkletree/codex/coders.nim @@ -0,0 +1,115 @@ +## Logos Storage +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +{.push raises: [], gcsafe.} + +import pkg/libp2p +import pkg/questionable +import pkg/questionable/results +import pkg/stew/byteutils +import pkg/serde/json + +# import ../../units +# import ../../errors + +# import ./codex + +const MaxMerkleTreeSize = 100.MiBs.uint +const MaxMerkleProofSize = 1.MiBs.uint + +proc encode*(self: CodexTree): seq[byte] = + var pb = initProtoBuffer() + pb.write(1, self.mcodec.uint64) + pb.write(2, self.leavesCount.uint64) + for node in self.nodes: + var nodesPb = initProtoBuffer() + nodesPb.write(1, node) + nodesPb.finish() + pb.write(3, nodesPb) + + pb.finish + pb.buffer + +proc decode*(_: type CodexTree, data: seq[byte]): ?!CodexTree = + var pb = initProtoBuffer(data) + var mcodecCode: uint64 + var leavesCount: uint64 + discard ?pb.getField(1, mcodecCode).mapFailure + discard ?pb.getField(2, leavesCount).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ?pb.getRepeatedField(3, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + discard ?initProtoBuffer(nodeBuff).getField(1, node).mapFailure + nodes.add node + + CodexTree.fromNodes(mcodec, nodes, leavesCount.int) + +proc encode*(self: CodexProof): seq[byte] = + var pb = initProtoBuffer() + pb.write(1, self.mcodec.uint64) + pb.write(2, self.index.uint64) + pb.write(3, self.nleaves.uint64) + + for node in self.path: + var nodesPb = initProtoBuffer() + nodesPb.write(1, node) + nodesPb.finish() + pb.write(4, nodesPb) + + pb.finish + pb.buffer + +proc decode*(_: type CodexProof, data: seq[byte]): ?!CodexProof = + var pb = initProtoBuffer(data) + var mcodecCode: uint64 + var index: uint64 + var nleaves: uint64 + discard ?pb.getField(1, mcodecCode).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + discard ?pb.getField(2, index).mapFailure + discard ?pb.getField(3, nleaves).mapFailure + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ?pb.getRepeatedField(4, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + let nodePb = initProtoBuffer(nodeBuff) + discard ?nodePb.getField(1, node).mapFailure + nodes.add node + + CodexProof.init(mcodec, index.int, nleaves.int, nodes) + +proc fromJson*(_: type CodexProof, json: JsonNode): ?!CodexProof = + expectJsonKind(Cid, JString, json) + var bytes: seq[byte] + try: + bytes = hexToSeqByte(json.str) + except ValueError as err: + return failure(err) + + CodexProof.decode(bytes) + +func `%`*(proof: CodexProof): JsonNode = + %byteutils.toHex(proof.encode()) diff --git a/src/merkletree/codex/codex.nim b/src/merkletree/codex/codex.nim new file mode 100644 index 0000000..29cf582 --- /dev/null +++ b/src/merkletree/codex/codex.nim @@ -0,0 +1,241 @@ +## Logos Storage +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +{.push raises: [].} + +import std/bitops +import std/sequtils + +import pkg/questionable +import pkg/questionable/results +import pkg/libp2p/[cid, multicodec, multihash] +import pkg/merkletree +# import ../../utils +# import ../../rng +# import ../../errors +# import ../../blocktype +import ../utils/digest + +export merkletree + +type + ByteTreeKey* {.pure.} = enum + KeyNone = 0x0.byte + KeyBottomLayer = 0x1.byte + KeyOdd = 0x2.byte + KeyOddAndBottomLayer = 0x3.byte + + ByteHash* = seq[byte] + ByteTree* = MerkleTree[ByteHash, ByteTreeKey] + ByteProof* = MerkleProof[ByteHash, ByteTreeKey] + + CodexTree* = ref object of ByteTree + mcodec*: MultiCodec + + CodexProof* = ref object of ByteProof + mcodec*: MultiCodec + +func getProof*(self: CodexTree, index: int): ?!CodexProof = + var proof = CodexProof(mcodec: self.mcodec) + + ?self.getProof(index, proof) + + success proof + +func verify*(self: CodexProof, leaf: MultiHash, root: MultiHash): ?!bool = + ## Verify hash + ## + + let + rootBytes = root.digestBytes + leafBytes = leaf.digestBytes + + if self.mcodec != root.mcodec or self.mcodec != leaf.mcodec: + return failure "Hash codec mismatch" + + if rootBytes.len != root.size and leafBytes.len != leaf.size: + return failure "Invalid hash length" + + self.verify(leafBytes, rootBytes) + +func verify*(self: CodexProof, leaf: Cid, root: Cid): ?!bool = + self.verify(?leaf.mhash.mapFailure, ?leaf.mhash.mapFailure) + +proc rootCid*(self: CodexTree, version = CIDv1, dataCodec = DatasetRootCodec): ?!Cid = + if (?self.root).len == 0: + return failure "Empty root" + + let mhash = ?MultiHash.init(self.mcodec, ?self.root).mapFailure + + Cid.init(version, DatasetRootCodec, mhash).mapFailure + +func getLeafCid*( + self: CodexTree, i: Natural, version = CIDv1, dataCodec = BlockCodec +): ?!Cid = + if i >= self.leavesCount: + return failure "Invalid leaf index " & $i + + let + leaf = self.leaves[i] + mhash = ?MultiHash.init($self.mcodec, leaf).mapFailure + + Cid.init(version, dataCodec, mhash).mapFailure + +proc `$`*(self: CodexTree): string = + let root = + if self.root.isOk: + byteutils.toHex(self.root.get) + else: + "none" + "CodexTree(" & " root: " & root & ", leavesCount: " & $self.leavesCount & ", levels: " & + $self.levels & ", mcodec: " & $self.mcodec & " )" + +proc `$`*(self: CodexProof): string = + "CodexProof(" & " nleaves: " & $self.nleaves & ", index: " & $self.index & ", path: " & + $self.path.mapIt(byteutils.toHex(it)) & ", mcodec: " & $self.mcodec & " )" + +func compress*(x, y: openArray[byte], key: ByteTreeKey, codec: MultiCodec): ?!ByteHash = + ## Compress two hashes + ## + let input = @x & @y & @[key.byte] + let digest = ?MultiHash.digest(codec, input).mapFailure + success digest.digestBytes + +func initTree(mcodec: MultiCodec, leaves: openArray[ByteHash]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mcodec) + digestSize = ?mcodec.digestSize.mapFailure + Zero: ByteHash = newSeq[byte](digestSize) + + if digestSize != leaves[0].len: + return failure "Invalid hash length" + + var self = CodexTree(mcodec: mcodec) + ?self.prepare(compressor, Zero, leaves) + success self + +func init*( + _: type CodexTree, mcodec: MultiCodec = Sha256HashCodec, leaves: openArray[ByteHash] +): ?!CodexTree = + let tree = ?initTree(mcodec, leaves) + ?tree.compute() + success tree + +proc init*( + _: type CodexTree, + tp: Taskpool, + mcodec: MultiCodec = Sha256HashCodec, + leaves: seq[ByteHash], +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + let tree = ?initTree(mcodec, leaves) + ?await tree.compute(tp) + success tree + +func init*(_: type CodexTree, leaves: openArray[MultiHash]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt(it.digestBytes) + + CodexTree.init(mcodec, leaves) + +proc init*( + _: type CodexTree, tp: Taskpool, leaves: seq[MultiHash] +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt(it.digestBytes) + + await CodexTree.init(tp, mcodec, leaves) + +func init*(_: type CodexTree, leaves: openArray[Cid]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = (?leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) + + CodexTree.init(mcodec, leaves) + +proc init*( + _: type CodexTree, tp: Taskpool, leaves: seq[Cid] +): Future[?!CodexTree] {.async: (raises: [CancelledError]).} = + if leaves.len == 0: + return failure("Empty leaves") + + let + mcodec = (?leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes) + + await CodexTree.init(tp, mcodec, leaves) + +proc fromNodes*( + _: type CodexTree, + mcodec: MultiCodec = Sha256HashCodec, + nodes: openArray[ByteHash], + nleaves: int, +): ?!CodexTree = + if nodes.len == 0: + return failure "Empty nodes" + + let + digestSize = ?mcodec.digestSize.mapFailure + Zero = newSeq[byte](digestSize) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mcodec) + + if digestSize != nodes[0].len: + return failure "Invalid hash length" + + var self = CodexTree(mcodec: mcodec) + ?self.fromNodes(compressor, Zero, nodes, nleaves) + + let + index = Rng.instance.rand(nleaves - 1) + proof = ?self.getProof(index) + + if not ?proof.verify(self.leaves[index], ?self.root): # sanity check + return failure "Unable to verify tree built from nodes" + + success self + +func init*( + _: type CodexProof, + mcodec: MultiCodec = Sha256HashCodec, + index: int, + nleaves: int, + nodes: openArray[ByteHash], +): ?!CodexProof = + if nodes.len == 0: + return failure "Empty nodes" + + let + digestSize = ?mcodec.digestSize.mapFailure + Zero = newSeq[byte](digestSize) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!seq[byte] {.noSideEffect.} = + compress(x, y, key, mcodec) + + success CodexProof( + compress: compressor, + zero: Zero, + mcodec: mcodec, + index: index, + nleaves: nleaves, + path: @nodes, + ) diff --git a/src/merkletree/merkletree.nim b/src/merkletree/merkletree.nim new file mode 100644 index 0000000..a3cc7af --- /dev/null +++ b/src/merkletree/merkletree.nim @@ -0,0 +1,394 @@ +## Logos Storage +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +{.push raises: [].} + +import std/[bitops, atomics, sequtils] +import pkg/taskpools +import pkg/chronos +import pkg/chronos/threadsync +# import pkg/libp2p/[multicodec] +import pkg/stew/assign2 +import pkg/questionable/results +import pkg/merkletree/utils/sharedbuf + +export sharedbuf + +template nodeData( + data: openArray[byte], offsets: openArray[int], nodeSize, i, j: int +): openArray[byte] = + ## Bytes of the j'th entry of the i'th level in the tree, starting with the + ## leaves (at level 0). + let start = (offsets[i] + j) * nodeSize + data.toOpenArray(start, start + nodeSize - 1) + +type + # TODO hash functions don't fail - removing the ?! from this function would + # significantly simplify the flow below + CompressFn*[H, K] = proc(x, y: H, key: K): ?!H {.noSideEffect, raises: [].} + + CompressData[H, K] = object + fn: CompressFn[H, K] + nodeSize: int + zero: H + + MerkleTreeObj*[H, K] = object of RootObj + store*: seq[byte] + ## Flattened merkle tree where hashes are assumed to be trivial bytes and + ## uniform in size. + ## + ## Each layer of the tree is stored serially starting with the leaves and + ## ending with the root. + ## + ## Beacuse the tree might not be balanced, `layerOffsets` contains the + ## index of the starting point of each level, for easy lookup. + layerOffsets*: seq[int] + ## Starting point of each level in the tree, starting from the leaves - + ## multiplied by the entry size, this is the offset in the payload where + ## the entries of that level start + ## + ## For example, a tree with 4 leaves will have [0, 4, 6] stored here. + ## + ## See nodesPerLevel function, from whic this sequence is derived + compress*: CompressData[H, K] + + MerkleTree*[H, K] = ref MerkleTreeObj[H, K] + + MerkleProof*[H, K] = ref object of RootObj + index*: int # linear index of the leaf, starting from 0 + path*: seq[H] # order: from the bottom to the top + nleaves*: int # number of leaves in the tree (=size of input) + compress*: CompressFn[H, K] # compress function + zero*: H # zero value + +func levels*[H, K](self: MerkleTree[H, K]): int = + return self.layerOffsets.len + +func depth*[H, K](self: MerkleTree[H, K]): int = + return self.levels() - 1 + +func nodesInLayer(offsets: openArray[int], layer: int): int = + if layer == offsets.high: + 1 + else: + offsets[layer + 1] - offsets[layer] + +func nodesInLayer(self: MerkleTree | MerkleTreeObj, layer: int): int = + self.layerOffsets.nodesInLayer(layer) + +func leavesCount*[H, K](self: MerkleTree[H, K]): int = + return self.nodesInLayer(0) + +func nodesPerLevel(nleaves: int): seq[int] = + ## Given a number of leaves, return a seq with the number of nodes at each + ## layer of the tree (from the bottom/leaves to the root) + ## + ## Ie For a tree of 4 leaves, return `[4, 2, 1]` + if nleaves <= 0: + return @[] + elif nleaves == 1: + return @[1, 1] # leaf and root + + var nodes: seq[int] = @[] + var m = nleaves + while true: + nodes.add(m) + if m == 1: + break + # Next layer size is ceil(m/2) + m = (m + 1) shr 1 + + nodes + +func layerOffsets(nleaves: int): seq[int] = + ## Given a number of leaves, return a seq of the starting offsets of each + ## layer in the node store that results from flattening the binary tree + ## + ## Ie For a tree of 4 leaves, return `[0, 4, 6]` + let nodes = nodesPerLevel(nleaves) + var tot = 0 + let offsets = nodes.mapIt: + let cur = tot + tot += it + cur + offsets + +template nodeData(self: MerkleTreeObj, i, j: int): openArray[byte] = + ## Bytes of the j'th node of the i'th level in the tree, starting with the + ## leaves (at level 0). + self.store.nodeData(self.layerOffsets, self.compress.nodeSize, i, j) + +func layer*[H, K]( + self: MerkleTree[H, K], layer: int +): seq[H] {.deprecated: "Expensive".} = + var nodes = newSeq[H](self.nodesInLayer(layer)) + for i, h in nodes.mpairs: + assign(h, self[].nodeData(layer, i)) + return nodes + +func leaves*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} = + self.layer(0) + +iterator layers*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} = + for i in 0 ..< self.layerOffsets.len: + yield self.layer(i) + +proc layers*[H, K](self: MerkleTree[H, K]): seq[seq[H]] {.deprecated: "Expensive".} = + for l in self.layers(): + result.add l + +iterator nodes*[H, K](self: MerkleTree[H, K]): H = + ## Iterate over the nodes of each layer starting with the leaves + var node: H + for i in 0 ..< self.layerOffsets.len: + let nodesInLayer = self.nodesInLayer(i) + for j in 0 ..< nodesInLayer: + assign(node, self[].nodeData(i, j)) + yield node + +func root*[H, K](self: MerkleTree[H, K]): ?!H = + mixin assign + if self.layerOffsets.len == 0: + return failure "invalid tree" + + var h: H + assign(h, self[].nodeData(self.layerOffsets.high(), 0)) + return success h + +func getProof*[H, K]( + self: MerkleTree[H, K], index: int, proof: MerkleProof[H, K] +): ?!void = + let depth = self.depth + let nleaves = self.leavesCount + + if not (index >= 0 and index < nleaves): + return failure "index out of bounds" + + var path: seq[H] = newSeq[H](depth) + var k = index + var m = nleaves + for i in 0 ..< depth: + let j = k xor 1 + + if (j < m): + assign(path[i], self[].nodeData(i, j)) + else: + path[i] = self.compress.zero + + k = k shr 1 + m = (m + 1) shr 1 + + proof.index = index + proof.path = path + proof.nleaves = nleaves + proof.compress = self.compress.fn + + success() + +func getProof*[H, K](self: MerkleTree[H, K], index: int): ?!MerkleProof[H, K] = + var proof = MerkleProof[H, K]() + + ?self.getProof(index, proof) + + success proof + +func reconstructRoot*[H, K](proof: MerkleProof[H, K], leaf: H): ?!H = + var + m = proof.nleaves + j = proof.index + h = leaf + bottomFlag = K.KeyBottomLayer + + for p in proof.path: + let oddIndex: bool = (bitand(j, 1) != 0) + if oddIndex: + # the index of the child is odd, so the node itself can't be odd (a bit counterintuitive, yeah :) + h = ?proof.compress(p, h, bottomFlag) + else: + if j == m - 1: + # single child => odd node + h = ?proof.compress(h, p, K(bottomFlag.ord + 2)) + else: + # even node + h = ?proof.compress(h, p, bottomFlag) + bottomFlag = K.KeyNone + j = j shr 1 + m = (m + 1) shr 1 + + return success h + +func verify*[H, K](proof: MerkleProof[H, K], leaf: H, root: H): ?!bool = + success bool(root == ?proof.reconstructRoot(leaf)) + +func fromNodes*[H, K]( + self: MerkleTree[H, K], + compressor: CompressFn, + zero: H, + nodes: openArray[H], + nleaves: int, +): ?!void = + mixin assign + + if nodes.len < 2: # At least leaf and root + return failure "Not enough nodes" + + if nleaves == 0: + return failure "No leaves" + + self.compress = CompressData[H, K](fn: compressor, nodeSize: nodes[0].len, zero: zero) + self.layerOffsets = layerOffsets(nleaves) + + if self.layerOffsets[^1] + 1 != nodes.len: + return failure "bad node count" + + self.store = newSeqUninit[byte](nodes.len * self.compress.nodeSize) + + for i in 0 ..< nodes.len: + assign( + self[].store.toOpenArray( + i * self.compress.nodeSize, (i + 1) * self.compress.nodeSize - 1 + ), + nodes[i], + ) + + success() + +func merkleTreeWorker[H, K]( + store: var openArray[byte], + offsets: openArray[int], + compress: CompressData[H, K], + layer: int, + isBottomLayer: static bool, +): ?!void = + ## Worker used to compute the merkle tree from the leaves that are assumed to + ## already be stored at the beginning of the `store`, as done by `prepare`. + + # Throughout, we use `assign` to convert from H to bytes and back, assuming + # this assignment can be done somewhat efficiently (ie memcpy) - because + # the code must work with multihash where len(H) is can differ, we cannot + # simply use a fixed-size array here. + mixin assign + + template nodeData(i, j: int): openArray[byte] = + # Pick out the bytes of node j in layer i + store.nodeData(offsets, compress.nodeSize, i, j) + + let m = offsets.nodesInLayer(layer) + + when not isBottomLayer: + if m == 1: + return success() + + let halfn: int = m div 2 + let n: int = 2 * halfn + let isOdd: bool = (n != m) + + # Because the compression function we work with works with H and not bytes, + # we need to extract H from the raw data - a little abstraction tax that + # ensures that properties like alignment of H are respected. + var a, b, tmp: H + + for i in 0 ..< halfn: + const key = when isBottomLayer: K.KeyBottomLayer else: K.KeyNone + + assign(a, nodeData(layer, i * 2)) + assign(b, nodeData(layer, i * 2 + 1)) + + tmp = ?compress.fn(a, b, key = key) + + assign(nodeData(layer + 1, i), tmp) + + if isOdd: + const key = when isBottomLayer: K.KeyOddAndBottomLayer else: K.KeyOdd + + assign(a, nodeData(layer, n)) + + tmp = ?compress.fn(a, compress.zero, key = key) + + assign(nodeData(layer + 1, halfn), tmp) + + merkleTreeWorker(store, offsets, compress, layer + 1, false) + +proc merkleTreeWorker[H, K]( + store: SharedBuf[byte], + offsets: SharedBuf[int], + compress: ptr CompressData[H, K], + signal: ThreadSignalPtr, +): bool = + defer: + discard signal.fireSync() + + let res = merkleTreeWorker( + store.toOpenArray(), offsets.toOpenArray(), compress[], 0, isBottomLayer = true + ) + + return res.isOk() + +func prepare*[H, K]( + self: MerkleTree[H, K], compressor: CompressFn, zero: H, leaves: openArray[H] +): ?!void = + ## Prepare the instance for computing the merkle tree of the given leaves using + ## the given compression function. After preparation, `compute` should be + ## called to perform the actual computation. `leaves` will be copied into the + ## tree so they can be freed after the call. + + if leaves.len == 0: + return failure "No leaves" + + self.compress = + CompressData[H, K](fn: compressor, nodeSize: leaves[0].len, zero: zero) + self.layerOffsets = layerOffsets(leaves.len) + + self.store = newSeqUninit[byte]((self.layerOffsets[^1] + 1) * self.compress.nodeSize) + + for j in 0 ..< leaves.len: + assign(self[].nodeData(0, j), leaves[j]) + + return success() + +proc compute*[H, K](self: MerkleTree[H, K]): ?!void = + merkleTreeWorker( + self.store, self.layerOffsets, self.compress, 0, isBottomLayer = true + ) + +proc compute*[H, K]( + self: MerkleTree[H, K], tp: Taskpool +): Future[?!void] {.async: (raises: []).} = + if tp.numThreads == 1: + # With a single thread, there's no point creating a separate task + return self.compute() + + # TODO this signal would benefit from reuse across computations + without signal =? ThreadSignalPtr.new(): + return failure("Unable to create thread signal") + + defer: + signal.close().expect("closing once works") + + let res = tp.spawn merkleTreeWorker( + SharedBuf.view(self.store), + SharedBuf.view(self.layerOffsets), + addr self.compress, + signal, + ) + + # To support cancellation, we'd have to ensure the task we posted to taskpools + # exits early - since we're not doing that, block cancellation attempts + try: + await noCancel signal.wait() + except AsyncError as exc: + # Since we initialized the signal, the OS or chronos is misbehaving. In any + # case, it would mean the task is still running which would cause a memory + # a memory violation if we let it run - panic instead + raiseAssert "Could not wait for signal, was it initialized? " & exc.msg + + if not res.sync(): + return failure("merkle tree task failed") + + return success() diff --git a/src/merkletree/utils/digest.nim b/src/merkletree/utils/digest.nim new file mode 100644 index 0000000..c756da6 --- /dev/null +++ b/src/merkletree/utils/digest.nim @@ -0,0 +1,7 @@ +from pkg/libp2p import MultiHash + +func digestBytes*(mhash: MultiHash): seq[byte] = + ## Extract hash digestBytes + ## + + mhash.data.buffer[mhash.dpos ..< mhash.dpos + mhash.size] \ No newline at end of file diff --git a/src/merkletree/utils/sharedbuf.nim b/src/merkletree/utils/sharedbuf.nim new file mode 100644 index 0000000..186d712 --- /dev/null +++ b/src/merkletree/utils/sharedbuf.nim @@ -0,0 +1,24 @@ +import stew/ptrops + +type SharedBuf*[T] = object + payload*: ptr UncheckedArray[T] + len*: int + +proc view*[T](_: type SharedBuf, v: openArray[T]): SharedBuf[T] = + if v.len > 0: + SharedBuf[T](payload: makeUncheckedArray(addr v[0]), len: v.len) + else: + default(SharedBuf[T]) + +template checkIdx(v: SharedBuf, i: int) = + doAssert i > 0 and i <= v.len + +proc `[]`*[T](v: SharedBuf[T], i: int): var T = + v.checkIdx(i) + v.payload[i] + +template toOpenArray*[T](v: SharedBuf[T]): var openArray[T] = + v.payload.toOpenArray(0, v.len - 1) + +template toOpenArray*[T](v: SharedBuf[T], s, e: int): var openArray[T] = + v.toOpenArray().toOpenArray(s, e) diff --git a/tests/helpers.nim b/tests/helpers.nim new file mode 100644 index 0000000..3715ae1 --- /dev/null +++ b/tests/helpers.nim @@ -0,0 +1,11 @@ +import pkg/merkletree +# import ./helpers + +export merkletree#, helpers + +proc `==`*(a, b: CodexTree): bool = + (a.mcodec == b.mcodec) and (a.leavesCount == b.leavesCount) and (a.levels == b.levels) + +proc `==`*(a, b: CodexProof): bool = + (a.mcodec == b.mcodec) and (a.nleaves == b.nleaves) and (a.path == b.path) and + (a.index == b.index) diff --git a/tests/test1.nim b/tests/test1.nim deleted file mode 100644 index 4bf04d6..0000000 --- a/tests/test1.nim +++ /dev/null @@ -1,12 +0,0 @@ -# This is just an example to get you started. You may wish to put all of your -# tests into a single file, or separate them into multiple `test1`, `test2` -# etc. files (better names are recommended, just make sure the name starts with -# the letter 't'). -# -# To run these tests, simply execute `nimble test`. - -import unittest - -import merkletree -test "can add": - check add(5, 5) == 10 diff --git a/tests/testcodexcoders.nim b/tests/testcodexcoders.nim new file mode 100644 index 0000000..2c5ebfc --- /dev/null +++ b/tests/testcodexcoders.nim @@ -0,0 +1,44 @@ +import pkg/unittest2 + +import pkg/questionable/results +import pkg/stew/byteutils + +import pkg/merkletree +import ./helpers + +const data = [ + "00000000000000000000000000000001".toBytes, + "00000000000000000000000000000002".toBytes, + "00000000000000000000000000000003".toBytes, + "00000000000000000000000000000004".toBytes, + "00000000000000000000000000000005".toBytes, + "00000000000000000000000000000006".toBytes, + "00000000000000000000000000000007".toBytes, + "00000000000000000000000000000008".toBytes, + "00000000000000000000000000000009".toBytes, "00000000000000000000000000000010".toBytes, +] + +suite "merkletree - coders": + test "encoding and decoding a tree yields the same tree": + let + tree = CodexTree.init(Sha256HashCodec, data).tryGet() + encodedBytes = tree.encode() + decodedTree = CodexTree.decode(encodedBytes).tryGet() + + check: + tree == decodedTree + + test "encoding and decoding a proof yields the same proof": + let + tree = CodexTree.init(Sha256HashCodec, data).tryGet() + proof = tree.getProof(4).tryGet() + + check: + proof.verify(tree.leaves[4], tree.root.tryGet).isOk + + let + encodedBytes = proof.encode() + decodedProof = CodexProof.decode(encodedBytes).tryGet() + + check: + proof == decodedProof diff --git a/tests/testcodextree.nim b/tests/testcodextree.nim new file mode 100644 index 0000000..91ccd7f --- /dev/null +++ b/tests/testcodextree.nim @@ -0,0 +1,93 @@ +import std/sequtils + +import pkg/unittest2 +import pkg/questionable/results +import pkg/stew/byteutils +import pkg/libp2p + +import pkg/codex/codextypes +import pkg/codex/merkletree +import pkg/codex/utils/digest + +import ./helpers +import ./generictreetests + +# TODO: Generalize to other hashes + +const + data = [ + "00000000000000000000000000000001".toBytes, + "00000000000000000000000000000002".toBytes, + "00000000000000000000000000000003".toBytes, + "00000000000000000000000000000004".toBytes, + "00000000000000000000000000000005".toBytes, + "00000000000000000000000000000006".toBytes, + "00000000000000000000000000000007".toBytes, + "00000000000000000000000000000008".toBytes, + "00000000000000000000000000000009".toBytes, + "00000000000000000000000000000010".toBytes, + ] + sha256 = Sha256HashCodec + +suite "Test CodexTree": + test "Cannot init tree without any multihash leaves": + check: + CodexTree.init(leaves = newSeq[MultiHash]()).isErr + + test "Cannot init tree without any cid leaves": + check: + CodexTree.init(leaves = newSeq[Cid]()).isErr + + test "Cannot init tree without any byte leaves": + check: + CodexTree.init(sha256, leaves = newSeq[ByteHash]()).isErr + + test "Should build tree from multihash leaves": + var expectedLeaves = data.mapIt(MultiHash.digest($sha256, it).tryGet()) + + var tree = CodexTree.init(leaves = expectedLeaves) + check: + tree.isOk + tree.get().leaves == expectedLeaves.mapIt(it.digestBytes) + tree.get().mcodec == sha256 + + test "Should build tree from cid leaves": + var expectedLeaves = data.mapIt( + Cid.init(CidVersion.CIDv1, BlockCodec, MultiHash.digest($sha256, it).tryGet).tryGet + ) + + let tree = CodexTree.init(leaves = expectedLeaves) + + check: + tree.isOk + tree.get().leaves == expectedLeaves.mapIt(it.mhash.tryGet.digestBytes) + tree.get().mcodec == sha256 + + test "Should build from raw digestbytes (should not hash leaves)": + let tree = CodexTree.init(sha256, leaves = data).tryGet + + check: + tree.mcodec == sha256 + tree.leaves == data + + test "Should build from nodes": + let + tree = CodexTree.init(sha256, leaves = data).tryGet + fromNodes = CodexTree.fromNodes( + nodes = toSeq(tree.nodes), nleaves = tree.leavesCount + ).tryGet + + check: + tree.mcodec == sha256 + tree == fromNodes + +let + digestSize = sha256.digestSize.get + zero: seq[byte] = newSeq[byte](digestSize) + compress = proc(x, y: seq[byte], key: ByteTreeKey): seq[byte] = + compress(x, y, key, sha256).tryGet + + makeTree = proc(data: seq[seq[byte]]): CodexTree = + CodexTree.init(sha256, leaves = data).tryGet + +testGenericTree("CodexTree", @data, zero, compress, makeTree) diff --git a/tests/testgenerictree.nim b/tests/testgenerictree.nim new file mode 100644 index 0000000..9d39864 --- /dev/null +++ b/tests/testgenerictree.nim @@ -0,0 +1,111 @@ +import pkg/unittest2 + +import pkg/merkletree + +proc testGenericTree*[H, K, U]( + name: string, + data: openArray[H], + zero: H, + compress: proc(z, y: H, key: K): H, + makeTree: proc(data: seq[H]): U, +) = + let data = @data + + suite "Correctness tests - " & name: + test "Should build correct tree for even bottom layer": + let expectedRoot = compress( + compress( + compress(data[0], data[1], K.KeyBottomLayer), + compress(data[2], data[3], K.KeyBottomLayer), + K.KeyNone, + ), + compress( + compress(data[4], data[5], K.KeyBottomLayer), + compress(data[6], data[7], K.KeyBottomLayer), + K.KeyNone, + ), + K.KeyNone, + ) + + let tree = makeTree(data[0 .. 7]) + + check: + tree.root.tryGet == expectedRoot + + test "Should build correct tree for odd bottom layer": + let expectedRoot = compress( + compress( + compress(data[0], data[1], K.KeyBottomLayer), + compress(data[2], data[3], K.KeyBottomLayer), + K.KeyNone, + ), + compress( + compress(data[4], data[5], K.KeyBottomLayer), + compress(data[6], zero, K.KeyOddAndBottomLayer), + K.KeyNone, + ), + K.KeyNone, + ) + + let tree = makeTree(data[0 .. 6]) + + check: + tree.root.tryGet == expectedRoot + + test "Should build correct tree for even bottom and odd upper layers": + let expectedRoot = compress( + compress( + compress( + compress(data[0], data[1], K.KeyBottomLayer), + compress(data[2], data[3], K.KeyBottomLayer), + K.KeyNone, + ), + compress( + compress(data[4], data[5], K.KeyBottomLayer), + compress(data[6], data[7], K.KeyBottomLayer), + K.KeyNone, + ), + K.KeyNone, + ), + compress( + compress(compress(data[8], data[9], K.KeyBottomLayer), zero, K.KeyOdd), + zero, + K.KeyOdd, + ), + K.KeyNone, + ) + + let tree = makeTree(data[0 .. 9]) + + check: + tree.root.tryGet == expectedRoot + + test "Should get and validate correct proofs": + let expectedRoot = compress( + compress( + compress( + compress(data[0], data[1], K.KeyBottomLayer), + compress(data[2], data[3], K.KeyBottomLayer), + K.KeyNone, + ), + compress( + compress(data[4], data[5], K.KeyBottomLayer), + compress(data[6], data[7], K.KeyBottomLayer), + K.KeyNone, + ), + K.KeyNone, + ), + compress( + compress(compress(data[8], data[9], K.KeyBottomLayer), zero, K.KeyOdd), + zero, + K.KeyOdd, + ), + K.KeyNone, + ) + + let tree = makeTree(data) + + for i in 0 ..< data.len: + let proof = tree.getProof(i).tryGet + check: + proof.verify(tree.leaves[i], expectedRoot).isOk