From dee521f7117c83a213bf301bd9ec82ca2b481b17 Mon Sep 17 00:00:00 2001
From: E M <5089238+emizzle@users.noreply.github.com>
Date: Fri, 16 Jan 2026 22:03:42 +1100
Subject: [PATCH] WIP: Add entire merkletree module from codex

This commit does not compile.

There is a circular dependency importing the MerkleTree type. The interface needs to be defined in such a way that we can avoid this circular dependency.
---
 .gitignore                                  |   1 +
 merkletree.nimble                           |   5 +
 src/merkletree.nim                          |   9 +-
 src/merkletree/{submodule.nim => codex.nim} |   9 +-
 src/merkletree/codex/coders.nim             | 115 ++++++
 src/merkletree/codex/codex.nim              | 241 ++++++++++++
 src/merkletree/merkletree.nim               | 394 ++++++++++++++++++++
 src/merkletree/utils/digest.nim             |   7 +
 src/merkletree/utils/sharedbuf.nim          |  24 ++
 tests/helpers.nim                           |  11 +
 tests/test1.nim                             |  12 -
 tests/testcodexcoders.nim                   |  44 +++
 tests/testcodextree.nim                     |  93 +++++
 tests/testgenerictree.nim                   | 111 ++++++
 14 files changed, 1052 insertions(+), 24 deletions(-)
 create mode 100644 .gitignore
 rename src/merkletree/{submodule.nim => codex.nim} (62%)
 create mode 100644 src/merkletree/codex/coders.nim
 create mode 100644 src/merkletree/codex/codex.nim
 create mode 100644 src/merkletree/merkletree.nim
 create mode 100644 src/merkletree/utils/digest.nim
 create mode 100644 src/merkletree/utils/sharedbuf.nim
 create mode 100644 tests/helpers.nim
 delete mode 100644 tests/test1.nim
 create mode 100644 tests/testcodexcoders.nim
 create mode 100644 tests/testcodextree.nim
 create mode 100644 tests/testgenerictree.nim

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0e0cc33
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+nimble
diff --git a/merkletree.nimble b/merkletree.nimble
index dc02e2f..76ef1b1 100644
--- a/merkletree.nimble
+++ b/merkletree.nimble
@@ -10,3 +10,8 @@ srcDir        = "src"
 # Dependencies
 
 requires "nim >= 2.2.6"
+requires "questionable ~= 0.10.15"
+requires "stew ~= 0.4.2"
+requires "unittest2 ~= 0.2.5"
+requires "libp2p ~= 1.14.3"
+requires "taskpools ~= 0.1.0"
diff --git a/src/merkletree.nim b/src/merkletree.nim
index b7a2480..d6b4be7 100644
--- a/src/merkletree.nim
+++ b/src/merkletree.nim
@@ -1,7 +1,4 @@
-# This is just an example to get you started. A typical library package
-# exports the main API in this file. Note that you cannot rename this file
-# but you can remove it if you wish.
+import pkg/merkletree/merkletree
+import pkg/merkletree/codex
 
-proc add*(x, y: int): int =
-  ## Adds two numbers together.
-  return x + y
+export codex, merkletree
\ No newline at end of file
diff --git a/src/merkletree/submodule.nim b/src/merkletree/codex.nim
similarity index 62%
rename from src/merkletree/submodule.nim
rename to src/merkletree/codex.nim
index a70ab64..f3240e0 100644
--- a/src/merkletree/submodule.nim
+++ b/src/merkletree/codex.nim
@@ -3,10 +3,7 @@
 # remove this file altogether. You may create additional modules alongside
 # this file as required.
 
-type
-  Submodule* = object
-    name*: string
+import ./codex/codex
+import ./codex/coders
 
-proc initSubmodule*(): Submodule =
-  ## Initialises a new ``Submodule`` object.
-  Submodule(name: "Anonymous")
+export codex, coders
diff --git a/src/merkletree/codex/coders.nim b/src/merkletree/codex/coders.nim
new file mode 100644
index 0000000..f465ff4
--- /dev/null
+++ b/src/merkletree/codex/coders.nim
@@ -0,0 +1,115 @@
+## Logos Storage
+## Copyright (c) 2023 Status Research & Development GmbH
+## Licensed under either of
+##  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
+##  * MIT license ([LICENSE-MIT](LICENSE-MIT))
+## at your option.
+## This file may not be copied, modified, or distributed except according to
+## those terms.
+
+{.push raises: [], gcsafe.}
+
+import pkg/libp2p
+import pkg/questionable
+import pkg/questionable/results
+import pkg/stew/byteutils
+import pkg/serde/json
+
+# import ../../units
+# import ../../errors
+
+# import ./codex
+
+const MaxMerkleTreeSize = 100.MiBs.uint
+const MaxMerkleProofSize = 1.MiBs.uint
+
+proc encode*(self: CodexTree): seq[byte] =
+  var pb = initProtoBuffer()
+  pb.write(1, self.mcodec.uint64)
+  pb.write(2, self.leavesCount.uint64)
+  for node in self.nodes:
+    var nodesPb = initProtoBuffer()
+    nodesPb.write(1, node)
+    nodesPb.finish()
+    pb.write(3, nodesPb)
+
+  pb.finish
+  pb.buffer
+
+proc decode*(_: type CodexTree, data: seq[byte]): ?!CodexTree =
+  var pb = initProtoBuffer(data)
+  var mcodecCode: uint64
+  var leavesCount: uint64
+  discard ?pb.getField(1, mcodecCode).mapFailure
+  discard ?pb.getField(2, leavesCount).mapFailure
+
+  let mcodec = MultiCodec.codec(mcodecCode.int)
+  if mcodec == InvalidMultiCodec:
+    return failure("Invalid MultiCodec code " & $mcodecCode)
+
+  var
+    nodesBuff: seq[seq[byte]]
+    nodes: seq[ByteHash]
+
+  if ?pb.getRepeatedField(3, nodesBuff).mapFailure:
+    for nodeBuff in nodesBuff:
+      var node: ByteHash
+      discard ?initProtoBuffer(nodeBuff).getField(1, node).mapFailure
+      nodes.add node
+
+  CodexTree.fromNodes(mcodec, nodes, leavesCount.int)
+
+proc encode*(self: CodexProof): seq[byte] =
+  var pb = initProtoBuffer()
+  pb.write(1, self.mcodec.uint64)
+  pb.write(2, self.index.uint64)
+  pb.write(3, self.nleaves.uint64)
+
+  for node in self.path:
+    var nodesPb = initProtoBuffer()
+    nodesPb.write(1, node)
+    nodesPb.finish()
+    pb.write(4, nodesPb)
+
+  pb.finish
+  pb.buffer
+
+proc decode*(_: type CodexProof, data: seq[byte]): ?!CodexProof =
+  var pb = initProtoBuffer(data)
+  var mcodecCode: uint64
+  var index: uint64
+  var nleaves: uint64
+  discard ?pb.getField(1, mcodecCode).mapFailure
+
+  let mcodec = MultiCodec.codec(mcodecCode.int)
+  if mcodec == InvalidMultiCodec:
+    return failure("Invalid MultiCodec code " & $mcodecCode)
+
+  discard ?pb.getField(2, index).mapFailure
+  discard ?pb.getField(3, nleaves).mapFailure
+
+  var
+    nodesBuff: seq[seq[byte]]
+    nodes: seq[ByteHash]
+
+  if ?pb.getRepeatedField(4, nodesBuff).mapFailure:
+    for nodeBuff in nodesBuff:
+      var node: ByteHash
+      let nodePb = initProtoBuffer(nodeBuff)
+      discard ?nodePb.getField(1, node).mapFailure
+      nodes.add node
+
+  CodexProof.init(mcodec, index.int, nleaves.int, nodes)
+
+proc fromJson*(_: type CodexProof, json: JsonNode): ?!CodexProof =
+  expectJsonKind(Cid, JString, json)
+  var bytes: seq[byte]
+  try:
+    bytes = hexToSeqByte(json.str)
+  except ValueError as err:
+    return failure(err)
+
+  CodexProof.decode(bytes)
+
+func `%`*(proof: CodexProof): JsonNode =
+  %byteutils.toHex(proof.encode())
diff --git a/src/merkletree/codex/codex.nim b/src/merkletree/codex/codex.nim
new file mode 100644
index 0000000..29cf582
--- /dev/null
+++ b/src/merkletree/codex/codex.nim
@@ -0,0 +1,241 @@
+## Logos Storage
+## Copyright (c) 2023 Status Research & Development GmbH
+## Licensed under either of
+##  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
+##  * MIT license ([LICENSE-MIT](LICENSE-MIT))
+## at your option.
+## This file may not be copied, modified, or distributed except according to
+## those terms.
+
+{.push raises: [].}
+
+import std/bitops
+import std/sequtils
+
+import pkg/questionable
+import pkg/questionable/results
+import pkg/libp2p/[cid, multicodec, multihash]
+import pkg/merkletree
+# import ../../utils
+# import ../../rng
+# import ../../errors
+# import ../../blocktype
+import ../utils/digest
+
+export merkletree
+
+type
+  ByteTreeKey* {.pure.} = enum
+    KeyNone = 0x0.byte
+    KeyBottomLayer = 0x1.byte
+    KeyOdd = 0x2.byte
+    KeyOddAndBottomLayer = 0x3.byte
+
+  ByteHash* = seq[byte]
+  ByteTree* = MerkleTree[ByteHash, ByteTreeKey]
+  ByteProof* = MerkleProof[ByteHash, ByteTreeKey]
+
+  CodexTree* = ref object of ByteTree
+    mcodec*: MultiCodec
+
+  CodexProof* = ref object of ByteProof
+    mcodec*: MultiCodec
+
+func getProof*(self: CodexTree, index: int): ?!CodexProof =
+  var proof = CodexProof(mcodec: self.mcodec)
+
+  ?self.getProof(index, proof)
+
+  success proof
+
+func verify*(self: CodexProof, leaf: MultiHash, root: MultiHash): ?!bool =
+  ## Verify hash
+  ##
+
+  let
+    rootBytes = root.digestBytes
+    leafBytes = leaf.digestBytes
+
+  if self.mcodec != root.mcodec or self.mcodec != leaf.mcodec:
+    return failure "Hash codec mismatch"
+
+  if rootBytes.len != root.size and leafBytes.len != leaf.size:
+    return failure "Invalid hash length"
+
+  self.verify(leafBytes, rootBytes)
+
+func verify*(self: CodexProof, leaf: Cid, root: Cid): ?!bool =
+  self.verify(?leaf.mhash.mapFailure, ?leaf.mhash.mapFailure)
+
+proc rootCid*(self: CodexTree, version = CIDv1, dataCodec = DatasetRootCodec): ?!Cid =
+  if (?self.root).len == 0:
+    return failure "Empty root"
+
+  let mhash = ?MultiHash.init(self.mcodec, ?self.root).mapFailure
+
+  Cid.init(version, DatasetRootCodec, mhash).mapFailure
+
+func getLeafCid*(
+    self: CodexTree, i: Natural, version = CIDv1, dataCodec = BlockCodec
+): ?!Cid =
+  if i >= self.leavesCount:
+    return failure "Invalid leaf index " & $i
+
+  let
+    leaf = self.leaves[i]
+    mhash = ?MultiHash.init($self.mcodec, leaf).mapFailure
+
+  Cid.init(version, dataCodec, mhash).mapFailure
+
+proc `$`*(self: CodexTree): string =
+  let root =
+    if self.root.isOk:
+      byteutils.toHex(self.root.get)
+    else:
+      "none"
+  "CodexTree(" & " root: " & root & ", leavesCount: " & $self.leavesCount & ", levels: " &
+    $self.levels & ", mcodec: " & $self.mcodec & " )"
+
+proc `$`*(self: CodexProof): string =
+  "CodexProof(" & " nleaves: " & $self.nleaves & ", index: " & $self.index & ", path: " &
+    $self.path.mapIt(byteutils.toHex(it)) & ", mcodec: " & $self.mcodec & " )"
+
+func compress*(x, y: openArray[byte], key: ByteTreeKey, codec: MultiCodec): ?!ByteHash =
+  ## Compress two hashes
+  ##
+  let input = @x & @y & @[key.byte]
+  let digest = ?MultiHash.digest(codec, input).mapFailure
+  success digest.digestBytes
+
+func initTree(mcodec: MultiCodec, leaves: openArray[ByteHash]): ?!CodexTree =
+  if leaves.len == 0:
+    return failure "Empty leaves"
+
+  let
+    compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} =
+      compress(x, y, key, mcodec)
+    digestSize = ?mcodec.digestSize.mapFailure
+    Zero: ByteHash = newSeq[byte](digestSize)
+
+  if digestSize != leaves[0].len:
+    return failure "Invalid hash length"
+
+  var self = CodexTree(mcodec: mcodec)
+  ?self.prepare(compressor, Zero, leaves)
+  success self
+
+func init*(
+    _: type CodexTree, mcodec: MultiCodec = Sha256HashCodec, leaves: openArray[ByteHash]
+): ?!CodexTree =
+  let tree = ?initTree(mcodec, leaves)
+  ?tree.compute()
+  success tree
+
+proc init*(
+    _: type CodexTree,
+    tp: Taskpool,
+    mcodec: MultiCodec = Sha256HashCodec,
+    leaves: seq[ByteHash],
+): Future[?!CodexTree] {.async: (raises: [CancelledError]).} =
+  let tree = ?initTree(mcodec, leaves)
+  ?await tree.compute(tp)
+  success tree
+
+func init*(_: type CodexTree, leaves: openArray[MultiHash]): ?!CodexTree =
+  if leaves.len == 0:
+    return failure "Empty leaves"
+
+  let
+    mcodec = leaves[0].mcodec
+    leaves = leaves.mapIt(it.digestBytes)
+
+  CodexTree.init(mcodec, leaves)
+
+proc init*(
+    _: type CodexTree, tp: Taskpool, leaves: seq[MultiHash]
+): Future[?!CodexTree] {.async: (raises: [CancelledError]).} =
+  if leaves.len == 0:
+    return failure "Empty leaves"
+
+  let
+    mcodec = leaves[0].mcodec
+    leaves = leaves.mapIt(it.digestBytes)
+
+  await CodexTree.init(tp, mcodec, leaves)
+
+func init*(_: type CodexTree, leaves: openArray[Cid]): ?!CodexTree =
+  if leaves.len == 0:
+    return failure "Empty leaves"
+
+  let
+    mcodec = (?leaves[0].mhash.mapFailure).mcodec
+    leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes)
+
+  CodexTree.init(mcodec, leaves)
+
+proc init*(
+    _: type CodexTree, tp: Taskpool, leaves: seq[Cid]
+): Future[?!CodexTree] {.async: (raises: [CancelledError]).} =
+  if leaves.len == 0:
+    return failure("Empty leaves")
+
+  let
+    mcodec = (?leaves[0].mhash.mapFailure).mcodec
+    leaves = leaves.mapIt((?it.mhash.mapFailure).digestBytes)
+
+  await CodexTree.init(tp, mcodec, leaves)
+
+proc fromNodes*(
+    _: type CodexTree,
+    mcodec: MultiCodec = Sha256HashCodec,
+    nodes: openArray[ByteHash],
+    nleaves: int,
+): ?!CodexTree =
+  if nodes.len == 0:
+    return failure "Empty nodes"
+
+  let
+    digestSize = ?mcodec.digestSize.mapFailure
+    Zero = newSeq[byte](digestSize)
+    compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} =
+      compress(x, y, key, mcodec)
+
+  if digestSize != nodes[0].len:
+    return failure "Invalid hash length"
+
+  var self = CodexTree(mcodec: mcodec)
+  ?self.fromNodes(compressor, Zero, nodes, nleaves)
+
+  let
+    index = Rng.instance.rand(nleaves - 1)
+    proof = ?self.getProof(index)
+
+  if not ?proof.verify(self.leaves[index], ?self.root): # sanity check
+    return failure "Unable to verify tree built from nodes"
+
+  success self
+
+func init*(
+    _: type CodexProof,
+    mcodec: MultiCodec = Sha256HashCodec,
+    index: int,
+    nleaves: int,
+    nodes: openArray[ByteHash],
+): ?!CodexProof =
+  if nodes.len == 0:
+    return failure "Empty nodes"
+
+  let
+    digestSize = ?mcodec.digestSize.mapFailure
+    Zero = newSeq[byte](digestSize)
+    compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!seq[byte] {.noSideEffect.} =
+      compress(x, y, key, mcodec)
+
+  success CodexProof(
+    compress: compressor,
+    zero: Zero,
+    mcodec: mcodec,
+    index: index,
+    nleaves: nleaves,
+    path: @nodes,
+  )
diff --git a/src/merkletree/merkletree.nim b/src/merkletree/merkletree.nim
new file mode 100644
index 0000000..a3cc7af
--- /dev/null
+++ b/src/merkletree/merkletree.nim
@@ -0,0 +1,394 @@
+## Logos Storage
+## Copyright (c) 2023 Status Research & Development GmbH
+## Licensed under either of
+##  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
+##  * MIT license ([LICENSE-MIT](LICENSE-MIT))
+## at your option.
+## This file may not be copied, modified, or distributed except according to
+## those terms.
+
+{.push raises: [].}
+
+import std/[bitops, atomics, sequtils]
+import pkg/taskpools
+import pkg/chronos
+import pkg/chronos/threadsync
+# import pkg/libp2p/[multicodec]
+import pkg/stew/assign2
+import pkg/questionable/results
+import pkg/merkletree/utils/sharedbuf
+
+export sharedbuf
+
+template nodeData(
+    data: openArray[byte], offsets: openArray[int], nodeSize, i, j: int
+): openArray[byte] =
+  ## Bytes of the j'th entry of the i'th level in the tree, starting with the
+  ## leaves (at level 0).
+  let start = (offsets[i] + j) * nodeSize
+  data.toOpenArray(start, start + nodeSize - 1)
+
+type
+  # TODO hash functions don't fail - removing the ?! from this function would
+  #      significantly simplify the flow below
+  CompressFn*[H, K] = proc(x, y: H, key: K): ?!H {.noSideEffect, raises: [].}
+
+  CompressData[H, K] = object
+    fn: CompressFn[H, K]
+    nodeSize: int
+    zero: H
+
+  MerkleTreeObj*[H, K] = object of RootObj
+    store*: seq[byte]
+      ## Flattened merkle tree where hashes are assumed to be trivial bytes and
+      ## uniform in size.
+      ##
+      ## Each layer of the tree is stored serially starting with the leaves and
+      ## ending with the root.
+      ##
+      ## Beacuse the tree might not be balanced, `layerOffsets` contains the
+      ## index of the starting point of each level, for easy lookup.
+    layerOffsets*: seq[int]
+      ## Starting point of each level in the tree, starting from the leaves -
+      ## multiplied by the entry size, this is the offset in the payload where
+      ## the entries of that level start
+      ##
+      ## For example, a tree with 4 leaves will have [0, 4, 6] stored here.
+      ##
+      ## See nodesPerLevel function, from whic this sequence is derived
+    compress*: CompressData[H, K]
+
+  MerkleTree*[H, K] = ref MerkleTreeObj[H, K]
+
+  MerkleProof*[H, K] = ref object of RootObj
+    index*: int # linear index of the leaf, starting from 0
+    path*: seq[H] # order: from the bottom to the top
+    nleaves*: int # number of leaves in the tree (=size of input)
+    compress*: CompressFn[H, K] # compress function
+    zero*: H # zero value
+
+func levels*[H, K](self: MerkleTree[H, K]): int =
+  return self.layerOffsets.len
+
+func depth*[H, K](self: MerkleTree[H, K]): int =
+  return self.levels() - 1
+
+func nodesInLayer(offsets: openArray[int], layer: int): int =
+  if layer == offsets.high:
+    1
+  else:
+    offsets[layer + 1] - offsets[layer]
+
+func nodesInLayer(self: MerkleTree | MerkleTreeObj, layer: int): int =
+  self.layerOffsets.nodesInLayer(layer)
+
+func leavesCount*[H, K](self: MerkleTree[H, K]): int =
+  return self.nodesInLayer(0)
+
+func nodesPerLevel(nleaves: int): seq[int] =
+  ## Given a number of leaves, return a seq with the number of nodes at each
+  ## layer of the tree (from the bottom/leaves to the root)
+  ##
+  ## Ie For a tree of 4 leaves, return `[4, 2, 1]`
+  if nleaves <= 0:
+    return @[]
+  elif nleaves == 1:
+    return @[1, 1] # leaf and root
+
+  var nodes: seq[int] = @[]
+  var m = nleaves
+  while true:
+    nodes.add(m)
+    if m == 1:
+      break
+    # Next layer size is ceil(m/2)
+    m = (m + 1) shr 1
+
+  nodes
+
+func layerOffsets(nleaves: int): seq[int] =
+  ## Given a number of leaves, return a seq of the starting offsets of each
+  ## layer in the node store that results from flattening the binary tree
+  ##
+  ## Ie For a tree of 4 leaves, return `[0, 4, 6]`
+  let nodes = nodesPerLevel(nleaves)
+  var tot = 0
+  let offsets = nodes.mapIt:
+    let cur = tot
+    tot += it
+    cur
+  offsets
+
+template nodeData(self: MerkleTreeObj, i, j: int): openArray[byte] =
+  ## Bytes of the j'th node of the i'th level in the tree, starting with the
+  ## leaves (at level 0).
+  self.store.nodeData(self.layerOffsets, self.compress.nodeSize, i, j)
+
+func layer*[H, K](
+    self: MerkleTree[H, K], layer: int
+): seq[H] {.deprecated: "Expensive".} =
+  var nodes = newSeq[H](self.nodesInLayer(layer))
+  for i, h in nodes.mpairs:
+    assign(h, self[].nodeData(layer, i))
+  return nodes
+
+func leaves*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} =
+  self.layer(0)
+
+iterator layers*[H, K](self: MerkleTree[H, K]): seq[H] {.deprecated: "Expensive".} =
+  for i in 0 ..< self.layerOffsets.len:
+    yield self.layer(i)
+
+proc layers*[H, K](self: MerkleTree[H, K]): seq[seq[H]] {.deprecated: "Expensive".} =
+  for l in self.layers():
+    result.add l
+
+iterator nodes*[H, K](self: MerkleTree[H, K]): H =
+  ## Iterate over the nodes of each layer starting with the leaves
+  var node: H
+  for i in 0 ..< self.layerOffsets.len:
+    let nodesInLayer = self.nodesInLayer(i)
+    for j in 0 ..< nodesInLayer:
+      assign(node, self[].nodeData(i, j))
+      yield node
+
+func root*[H, K](self: MerkleTree[H, K]): ?!H =
+  mixin assign
+  if self.layerOffsets.len == 0:
+    return failure "invalid tree"
+
+  var h: H
+  assign(h, self[].nodeData(self.layerOffsets.high(), 0))
+  return success h
+
+func getProof*[H, K](
+    self: MerkleTree[H, K], index: int, proof: MerkleProof[H, K]
+): ?!void =
+  let depth = self.depth
+  let nleaves = self.leavesCount
+
+  if not (index >= 0 and index < nleaves):
+    return failure "index out of bounds"
+
+  var path: seq[H] = newSeq[H](depth)
+  var k = index
+  var m = nleaves
+  for i in 0 ..< depth:
+    let j = k xor 1
+
+    if (j < m):
+      assign(path[i], self[].nodeData(i, j))
+    else:
+      path[i] = self.compress.zero
+
+    k = k shr 1
+    m = (m + 1) shr 1
+
+  proof.index = index
+  proof.path = path
+  proof.nleaves = nleaves
+  proof.compress = self.compress.fn
+
+  success()
+
+func getProof*[H, K](self: MerkleTree[H, K], index: int): ?!MerkleProof[H, K] =
+  var proof = MerkleProof[H, K]()
+
+  ?self.getProof(index, proof)
+
+  success proof
+
+func reconstructRoot*[H, K](proof: MerkleProof[H, K], leaf: H): ?!H =
+  var
+    m = proof.nleaves
+    j = proof.index
+    h = leaf
+    bottomFlag = K.KeyBottomLayer
+
+  for p in proof.path:
+    let oddIndex: bool = (bitand(j, 1) != 0)
+    if oddIndex:
+      # the index of the child is odd, so the node itself can't be odd (a bit counterintuitive, yeah :)
+      h = ?proof.compress(p, h, bottomFlag)
+    else:
+      if j == m - 1:
+        # single child => odd node
+        h = ?proof.compress(h, p, K(bottomFlag.ord + 2))
+      else:
+        # even node
+        h = ?proof.compress(h, p, bottomFlag)
+    bottomFlag = K.KeyNone
+    j = j shr 1
+    m = (m + 1) shr 1
+
+  return success h
+
+func verify*[H, K](proof: MerkleProof[H, K], leaf: H, root: H): ?!bool =
+  success bool(root == ?proof.reconstructRoot(leaf))
+
+func fromNodes*[H, K](
+    self: MerkleTree[H, K],
+    compressor: CompressFn,
+    zero: H,
+    nodes: openArray[H],
+    nleaves: int,
+): ?!void =
+  mixin assign
+
+  if nodes.len < 2: # At least leaf and root
+    return failure "Not enough nodes"
+
+  if nleaves == 0:
+    return failure "No leaves"
+
+  self.compress = CompressData[H, K](fn: compressor, nodeSize: nodes[0].len, zero: zero)
+  self.layerOffsets = layerOffsets(nleaves)
+
+  if self.layerOffsets[^1] + 1 != nodes.len:
+    return failure "bad node count"
+
+  self.store = newSeqUninit[byte](nodes.len * self.compress.nodeSize)
+
+  for i in 0 ..< nodes.len:
+    assign(
+      self[].store.toOpenArray(
+        i * self.compress.nodeSize, (i + 1) * self.compress.nodeSize - 1
+      ),
+      nodes[i],
+    )
+
+  success()
+
+func merkleTreeWorker[H, K](
+    store: var openArray[byte],
+    offsets: openArray[int],
+    compress: CompressData[H, K],
+    layer: int,
+    isBottomLayer: static bool,
+): ?!void =
+  ## Worker used to compute the merkle tree from the leaves that are assumed to
+  ## already be stored at the beginning of the `store`, as done by `prepare`.
+
+  # Throughout, we use `assign` to convert from H to bytes and back, assuming
+  # this assignment can be done somewhat efficiently (ie memcpy) - because
+  # the code must work with multihash where len(H) is can differ, we cannot
+  # simply use a fixed-size array here.
+  mixin assign
+
+  template nodeData(i, j: int): openArray[byte] =
+    # Pick out the bytes of node j in layer i
+    store.nodeData(offsets, compress.nodeSize, i, j)
+
+  let m = offsets.nodesInLayer(layer)
+
+  when not isBottomLayer:
+    if m == 1:
+      return success()
+
+  let halfn: int = m div 2
+  let n: int = 2 * halfn
+  let isOdd: bool = (n != m)
+
+  # Because the compression function we work with works with H and not bytes,
+  # we need to extract H from the raw data - a little abstraction tax that
+  # ensures that properties like alignment of H are respected.
+  var a, b, tmp: H
+
+  for i in 0 ..< halfn:
+    const key = when isBottomLayer: K.KeyBottomLayer else: K.KeyNone
+
+    assign(a, nodeData(layer, i * 2))
+    assign(b, nodeData(layer, i * 2 + 1))
+
+    tmp = ?compress.fn(a, b, key = key)
+
+    assign(nodeData(layer + 1, i), tmp)
+
+  if isOdd:
+    const key = when isBottomLayer: K.KeyOddAndBottomLayer else: K.KeyOdd
+
+    assign(a, nodeData(layer, n))
+
+    tmp = ?compress.fn(a, compress.zero, key = key)
+
+    assign(nodeData(layer + 1, halfn), tmp)
+
+  merkleTreeWorker(store, offsets, compress, layer + 1, false)
+
+proc merkleTreeWorker[H, K](
+    store: SharedBuf[byte],
+    offsets: SharedBuf[int],
+    compress: ptr CompressData[H, K],
+    signal: ThreadSignalPtr,
+): bool =
+  defer:
+    discard signal.fireSync()
+
+  let res = merkleTreeWorker(
+    store.toOpenArray(), offsets.toOpenArray(), compress[], 0, isBottomLayer = true
+  )
+
+  return res.isOk()
+
+func prepare*[H, K](
+    self: MerkleTree[H, K], compressor: CompressFn, zero: H, leaves: openArray[H]
+): ?!void =
+  ## Prepare the instance for computing the merkle tree of the given leaves using
+  ## the given compression function. After preparation, `compute` should be
+  ## called to perform the actual computation. `leaves` will be copied into the
+  ## tree so they can be freed after the call.
+
+  if leaves.len == 0:
+    return failure "No leaves"
+
+  self.compress =
+    CompressData[H, K](fn: compressor, nodeSize: leaves[0].len, zero: zero)
+  self.layerOffsets = layerOffsets(leaves.len)
+
+  self.store = newSeqUninit[byte]((self.layerOffsets[^1] + 1) * self.compress.nodeSize)
+
+  for j in 0 ..< leaves.len:
+    assign(self[].nodeData(0, j), leaves[j])
+
+  return success()
+
+proc compute*[H, K](self: MerkleTree[H, K]): ?!void =
+  merkleTreeWorker(
+    self.store, self.layerOffsets, self.compress, 0, isBottomLayer = true
+  )
+
+proc compute*[H, K](
+    self: MerkleTree[H, K], tp: Taskpool
+): Future[?!void] {.async: (raises: []).} =
+  if tp.numThreads == 1:
+    # With a single thread, there's no point creating a separate task
+    return self.compute()
+
+  # TODO this signal would benefit from reuse across computations
+  without signal =? ThreadSignalPtr.new():
+    return failure("Unable to create thread signal")
+
+  defer:
+    signal.close().expect("closing once works")
+
+  let res = tp.spawn merkleTreeWorker(
+    SharedBuf.view(self.store),
+    SharedBuf.view(self.layerOffsets),
+    addr self.compress,
+    signal,
+  )
+
+  # To support cancellation, we'd have to ensure the task we posted to taskpools
+  # exits early - since we're not doing that, block cancellation attempts
+  try:
+    await noCancel signal.wait()
+  except AsyncError as exc:
+    # Since we initialized the signal, the OS or chronos is misbehaving. In any
+    # case, it would mean the task is still running which would cause a memory
+    # a memory violation if we let it run - panic instead
+    raiseAssert "Could not wait for signal, was it initialized? " & exc.msg
+
+  if not res.sync():
+    return failure("merkle tree task failed")
+
+  return success()
diff --git a/src/merkletree/utils/digest.nim b/src/merkletree/utils/digest.nim
new file mode 100644
index 0000000..c756da6
--- /dev/null
+++ b/src/merkletree/utils/digest.nim
@@ -0,0 +1,7 @@
+from pkg/libp2p import MultiHash
+
+func digestBytes*(mhash: MultiHash): seq[byte] =
+  ## Extract hash digestBytes
+  ##
+
+  mhash.data.buffer[mhash.dpos ..< mhash.dpos + mhash.size]
\ No newline at end of file
diff --git a/src/merkletree/utils/sharedbuf.nim b/src/merkletree/utils/sharedbuf.nim
new file mode 100644
index 0000000..186d712
--- /dev/null
+++ b/src/merkletree/utils/sharedbuf.nim
@@ -0,0 +1,24 @@
+import stew/ptrops
+
+type SharedBuf*[T] = object
+  payload*: ptr UncheckedArray[T]
+  len*: int
+
+proc view*[T](_: type SharedBuf, v: openArray[T]): SharedBuf[T] =
+  if v.len > 0:
+    SharedBuf[T](payload: makeUncheckedArray(addr v[0]), len: v.len)
+  else:
+    default(SharedBuf[T])
+
+template checkIdx(v: SharedBuf, i: int) =
+  doAssert i > 0 and i <= v.len
+
+proc `[]`*[T](v: SharedBuf[T], i: int): var T =
+  v.checkIdx(i)
+  v.payload[i]
+
+template toOpenArray*[T](v: SharedBuf[T]): var openArray[T] =
+  v.payload.toOpenArray(0, v.len - 1)
+
+template toOpenArray*[T](v: SharedBuf[T], s, e: int): var openArray[T] =
+  v.toOpenArray().toOpenArray(s, e)
diff --git a/tests/helpers.nim b/tests/helpers.nim
new file mode 100644
index 0000000..3715ae1
--- /dev/null
+++ b/tests/helpers.nim
@@ -0,0 +1,11 @@
+import pkg/merkletree
+# import ./helpers
+
+export merkletree#, helpers
+
+proc `==`*(a, b: CodexTree): bool =
+  (a.mcodec == b.mcodec) and (a.leavesCount == b.leavesCount) and (a.levels == b.levels)
+
+proc `==`*(a, b: CodexProof): bool =
+  (a.mcodec == b.mcodec) and (a.nleaves == b.nleaves) and (a.path == b.path) and
+    (a.index == b.index)
diff --git a/tests/test1.nim b/tests/test1.nim
deleted file mode 100644
index 4bf04d6..0000000
--- a/tests/test1.nim
+++ /dev/null
@@ -1,12 +0,0 @@
-# This is just an example to get you started. You may wish to put all of your
-# tests into a single file, or separate them into multiple `test1`, `test2`
-# etc. files (better names are recommended, just make sure the name starts with
-# the letter 't').
-#
-# To run these tests, simply execute `nimble test`.
-
-import unittest
-
-import merkletree
-test "can add":
-  check add(5, 5) == 10
diff --git a/tests/testcodexcoders.nim b/tests/testcodexcoders.nim
new file mode 100644
index 0000000..2c5ebfc
--- /dev/null
+++ b/tests/testcodexcoders.nim
@@ -0,0 +1,44 @@
+import pkg/unittest2
+
+import pkg/questionable/results
+import pkg/stew/byteutils
+
+import pkg/merkletree
+import ./helpers
+
+const data = [
+  "00000000000000000000000000000001".toBytes,
+  "00000000000000000000000000000002".toBytes,
+  "00000000000000000000000000000003".toBytes,
+  "00000000000000000000000000000004".toBytes,
+  "00000000000000000000000000000005".toBytes,
+  "00000000000000000000000000000006".toBytes,
+  "00000000000000000000000000000007".toBytes,
+  "00000000000000000000000000000008".toBytes,
+  "00000000000000000000000000000009".toBytes, "00000000000000000000000000000010".toBytes,
+]
+
+suite "merkletree - coders":
+  test "encoding and decoding a tree yields the same tree":
+    let
+      tree = CodexTree.init(Sha256HashCodec, data).tryGet()
+      encodedBytes = tree.encode()
+      decodedTree = CodexTree.decode(encodedBytes).tryGet()
+
+    check:
+      tree == decodedTree
+
+  test "encoding and decoding a proof yields the same proof":
+    let
+      tree = CodexTree.init(Sha256HashCodec, data).tryGet()
+      proof = tree.getProof(4).tryGet()
+
+    check:
+      proof.verify(tree.leaves[4], tree.root.tryGet).isOk
+
+    let
+      encodedBytes = proof.encode()
+      decodedProof = CodexProof.decode(encodedBytes).tryGet()
+
+    check:
+      proof == decodedProof
diff --git a/tests/testcodextree.nim b/tests/testcodextree.nim
new file mode 100644
index 0000000..91ccd7f
--- /dev/null
+++ b/tests/testcodextree.nim
@@ -0,0 +1,93 @@
+import std/sequtils
+
+import pkg/unittest2
+import pkg/questionable/results
+import pkg/stew/byteutils
+import pkg/libp2p
+
+import pkg/codex/codextypes
+import pkg/codex/merkletree
+import pkg/codex/utils/digest
+
+import ./helpers
+import ./generictreetests
+
+# TODO: Generalize to other hashes
+
+const
+  data = [
+    "00000000000000000000000000000001".toBytes,
+    "00000000000000000000000000000002".toBytes,
+    "00000000000000000000000000000003".toBytes,
+    "00000000000000000000000000000004".toBytes,
+    "00000000000000000000000000000005".toBytes,
+    "00000000000000000000000000000006".toBytes,
+    "00000000000000000000000000000007".toBytes,
+    "00000000000000000000000000000008".toBytes,
+    "00000000000000000000000000000009".toBytes,
+    "00000000000000000000000000000010".toBytes,
+  ]
+  sha256 = Sha256HashCodec
+
+suite "Test CodexTree":
+  test "Cannot init tree without any multihash leaves":
+    check:
+      CodexTree.init(leaves = newSeq[MultiHash]()).isErr
+
+  test "Cannot init tree without any cid leaves":
+    check:
+      CodexTree.init(leaves = newSeq[Cid]()).isErr
+
+  test "Cannot init tree without any byte leaves":
+    check:
+      CodexTree.init(sha256, leaves = newSeq[ByteHash]()).isErr
+
+  test "Should build tree from multihash leaves":
+    var expectedLeaves = data.mapIt(MultiHash.digest($sha256, it).tryGet())
+
+    var tree = CodexTree.init(leaves = expectedLeaves)
+    check:
+      tree.isOk
+      tree.get().leaves == expectedLeaves.mapIt(it.digestBytes)
+      tree.get().mcodec == sha256
+
+  test "Should build tree from cid leaves":
+    var expectedLeaves = data.mapIt(
+      Cid.init(CidVersion.CIDv1, BlockCodec, MultiHash.digest($sha256, it).tryGet).tryGet
+    )
+
+    let tree = CodexTree.init(leaves = expectedLeaves)
+
+    check:
+      tree.isOk
+      tree.get().leaves == expectedLeaves.mapIt(it.mhash.tryGet.digestBytes)
+      tree.get().mcodec == sha256
+
+  test "Should build from raw digestbytes (should not hash leaves)":
+    let tree = CodexTree.init(sha256, leaves = data).tryGet
+
+    check:
+      tree.mcodec == sha256
+      tree.leaves == data
+
+  test "Should build from nodes":
+    let
+      tree = CodexTree.init(sha256, leaves = data).tryGet
+      fromNodes = CodexTree.fromNodes(
+        nodes = toSeq(tree.nodes), nleaves = tree.leavesCount
+      ).tryGet
+
+    check:
+      tree.mcodec == sha256
+      tree == fromNodes
+
+let
+  digestSize = sha256.digestSize.get
+  zero: seq[byte] = newSeq[byte](digestSize)
+  compress = proc(x, y: seq[byte], key: ByteTreeKey): seq[byte] =
+    compress(x, y, key, sha256).tryGet
+
+  makeTree = proc(data: seq[seq[byte]]): CodexTree =
+    CodexTree.init(sha256, leaves = data).tryGet
+
+testGenericTree("CodexTree", @data, zero, compress, makeTree)
diff --git a/tests/testgenerictree.nim b/tests/testgenerictree.nim
new file mode 100644
index 0000000..9d39864
--- /dev/null
+++ b/tests/testgenerictree.nim
@@ -0,0 +1,111 @@
+import pkg/unittest2
+
+import pkg/merkletree
+
+proc testGenericTree*[H, K, U](
+    name: string,
+    data: openArray[H],
+    zero: H,
+    compress: proc(z, y: H, key: K): H,
+    makeTree: proc(data: seq[H]): U,
+) =
+  let data = @data
+
+  suite "Correctness tests - " & name:
+    test "Should build correct tree for even bottom layer":
+      let expectedRoot = compress(
+        compress(
+          compress(data[0], data[1], K.KeyBottomLayer),
+          compress(data[2], data[3], K.KeyBottomLayer),
+          K.KeyNone,
+        ),
+        compress(
+          compress(data[4], data[5], K.KeyBottomLayer),
+          compress(data[6], data[7], K.KeyBottomLayer),
+          K.KeyNone,
+        ),
+        K.KeyNone,
+      )
+
+      let tree = makeTree(data[0 .. 7])
+
+      check:
+        tree.root.tryGet == expectedRoot
+
+    test "Should build correct tree for odd bottom layer":
+      let expectedRoot = compress(
+        compress(
+          compress(data[0], data[1], K.KeyBottomLayer),
+          compress(data[2], data[3], K.KeyBottomLayer),
+          K.KeyNone,
+        ),
+        compress(
+          compress(data[4], data[5], K.KeyBottomLayer),
+          compress(data[6], zero, K.KeyOddAndBottomLayer),
+          K.KeyNone,
+        ),
+        K.KeyNone,
+      )
+
+      let tree = makeTree(data[0 .. 6])
+
+      check:
+        tree.root.tryGet == expectedRoot
+
+    test "Should build correct tree for even bottom and odd upper layers":
+      let expectedRoot = compress(
+        compress(
+          compress(
+            compress(data[0], data[1], K.KeyBottomLayer),
+            compress(data[2], data[3], K.KeyBottomLayer),
+            K.KeyNone,
+          ),
+          compress(
+            compress(data[4], data[5], K.KeyBottomLayer),
+            compress(data[6], data[7], K.KeyBottomLayer),
+            K.KeyNone,
+          ),
+          K.KeyNone,
+        ),
+        compress(
+          compress(compress(data[8], data[9], K.KeyBottomLayer), zero, K.KeyOdd),
+          zero,
+          K.KeyOdd,
+        ),
+        K.KeyNone,
+      )
+
+      let tree = makeTree(data[0 .. 9])
+
+      check:
+        tree.root.tryGet == expectedRoot
+
+    test "Should get and validate correct proofs":
+      let expectedRoot = compress(
+        compress(
+          compress(
+            compress(data[0], data[1], K.KeyBottomLayer),
+            compress(data[2], data[3], K.KeyBottomLayer),
+            K.KeyNone,
+          ),
+          compress(
+            compress(data[4], data[5], K.KeyBottomLayer),
+            compress(data[6], data[7], K.KeyBottomLayer),
+            K.KeyNone,
+          ),
+          K.KeyNone,
+        ),
+        compress(
+          compress(compress(data[8], data[9], K.KeyBottomLayer), zero, K.KeyOdd),
+          zero,
+          K.KeyOdd,
+        ),
+        K.KeyNone,
+      )
+
+      let tree = makeTree(data)
+
+      for i in 0 ..< data.len:
+        let proof = tree.getProof(i).tryGet
+        check:
+          proof.verify(tree.leaves[i], expectedRoot).isOk