diff --git a/codex/merkletree/codexmerkletree.nim b/codex/merkletree/codexmerkletree.nim new file mode 100644 index 00000000..0a45f3d4 --- /dev/null +++ b/codex/merkletree/codexmerkletree.nim @@ -0,0 +1,4 @@ +import ./codexmerkletree/codexmerkletree +import ./codexmerkletree/coders + +export codexmerkletree, coders diff --git a/codex/merkletree/codexmerkletree/coders.nim b/codex/merkletree/codexmerkletree/coders.nim new file mode 100644 index 00000000..72ffcd08 --- /dev/null +++ b/codex/merkletree/codexmerkletree/coders.nim @@ -0,0 +1,110 @@ +## Nim-Codex +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises + +push: {.upraises: [].} + +import std/sequtils + +import pkg/libp2p +import pkg/questionable +import pkg/questionable/results + +import ../../units +import ../../errors + +import ./codexmerkletree + +const MaxMerkleTreeSize = 100.MiBs.uint +const MaxMerkleProofSize = 1.MiBs.uint + +proc encode*(self: CodexMerkleTree): seq[byte] = + var pb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + pb.write(1, self.mcodec.uint64) + pb.write(2, self.digestSize.uint64) + pb.write(3, self.leavesCount.uint64) + var nodesPb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + for node in self.nodes: + nodesPb.write(1, node) + nodesPb.finish() + pb.write(4, nodesPb) + + pb.finish + pb.buffer + +proc decode*(_: type CodexMerkleTree, data: seq[byte]): ?!CodexMerkleTree = + var pb = initProtoBuffer(data, maxSize = MaxMerkleTreeSize) + var mcodecCode: uint64 + var digestSize: uint64 + var leavesCount: uint64 + discard ? pb.getField(1, mcodecCode).mapFailure + discard ? pb.getField(2, digestSize).mapFailure + discard ? pb.getField(3, leavesCount).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ? pb.getRepeatedField(4, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + let nodePb = initProtoBuffer(nodeBuff) + discard ? nodePb.getField(1, node).mapFailure + nodes.add node + + let tree = ? CodexMerkleTree.fromNodes(mcodec, digestSize, leavesCount, nodesBuffer) + success(tree) + +proc encode*(self: CodexMerkleProof): seq[byte] = + var pb = initProtoBuffer(maxSize = MaxMerkleProofSize) + pb.write(1, self.mcodec.uint64) + pb.write(2, self.digestSize.uint64) + pb.write(3, self.index.uint64) + var nodesPb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + for node in self.path: + nodesPb.write(1, node) + nodesPb.finish() + pb.write(4, nodesPb) + pb.finish + pb.buffer + +proc decode*(_: type CodexMerkleProof, data: seq[byte]): ?!CodexMerkleProof = + var pb = initProtoBuffer(data, maxSize = MaxMerkleProofSize) + var mcodecCode: uint64 + var digestSize: uint64 + var index: uint64 + discard ? pb.getField(1, mcodecCode).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + discard ? pb.getField(2, digestSize).mapFailure + discard ? pb.getField(3, index).mapFailure + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ? pb.getRepeatedField(4, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + let nodePb = initProtoBuffer(nodeBuff) + discard ? nodePb.getField(1, node).mapFailure + nodes.add node + + let + proof = ? CodexMerkleProof.init(mcodec, index.int, nodes) + + success(proof) diff --git a/codex/merkletree/codexmerkletree/codexmerkletree.nim b/codex/merkletree/codexmerkletree/codexmerkletree.nim new file mode 100644 index 00000000..ae3e98d4 --- /dev/null +++ b/codex/merkletree/codexmerkletree/codexmerkletree.nim @@ -0,0 +1,245 @@ +## Nim-Codex +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises + +push: {.upraises: [].} + +import std/math +import std/bitops +import std/sequtils +import std/sugar +import std/algorithm +import std/tables + +import pkg/chronicles +import pkg/questionable +import pkg/questionable/results +import pkg/libp2p/[cid, multicodec, multihash] +import pkg/stew/byteutils + +import ../../errors +import ../../blocktype + +import ../merkletree + +export merkletree + +logScope: + topics = "codex merkletree" + +type + ByteTreeKey* {.pure.} = enum + KeyNone = 0x0.byte + KeyBottomLayer = 0x1.byte + KeyOdd = 0x2.byte + KeyOddAndBottomLayer = 0x3.byte + + ByteHash* = seq[byte] + ByteTree* = MerkleTree[ByteHash, ByteTreeKey] + ByteTreeProof* = MerkleProof[ByteHash, ByteTreeKey] + + CodexMerkleTree* = object of ByteTree + mhash: MHash + + CodexMerkleProof* = object of ByteTreeProof + mhash: MHash + +func getMhash*(mcodec: MultiCodec): ?!MHash = + let + mhash = CodeHashes.getOrDefault(mcodec) + + if isNil(mhash.coder): + return failure "Invalid multihash codec" + + success mhash + +func digestSize*(self: (CodexMerkleTree or CodexMerkleProof)): int = + ## Number of leaves + ## + + self.mhash.size + +func mcodec*(self: (CodexMerkleTree or CodexMerkleProof)): MultiCodec = + ## Multicodec + ## + + self.mhash.mcodec + +func bytes*(mhash: MultiHash): seq[byte] = + ## Extract hash bytes + ## + + mhash.data.buffer[mhash.dpos..= self.leavesCount: + return failure "Invalid leaf index " & $i + + let + leaf = self.leaves[i] + + Cid.init( + CidVersion.CIDv1, + dataCodec, + ? MultiHash.init(self.mcodec, self.root).mapFailure).mapFailure + +func compress*( + x, y: openArray[byte], + key: ByteTreeKey, + mhash: MHash): ?!ByteHash = + ## Compress two hashes + ## + + var digest = newSeq[byte](mhash.size) + mhash.coder(@x & @y & @[ key.byte ], digest) + success digest + +func init*( + _: type CodexMerkleTree, + mcodec: MultiCodec, + leaves: openArray[ByteHash]): ?!CodexMerkleTree = + + if leaves.len == 0: + return failure "Empty leaves" + + let + mhash = ? mcodec.getMhash() + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mhash) + Zero: ByteHash = newSeq[byte](mhash.size) + + if mhash.size != leaves[0].len: + return failure "Invalid hash length" + + var + self = CodexMerkleTree(mhash: mhash, compress: compressor, zero: Zero) + + self.layers = ? merkleTreeWorker(self, leaves, isBottomLayer = true) + success self + +func init*( + _: type CodexMerkleTree, + leaves: openArray[MultiHash]): ?!CodexMerkleTree = + + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt( it.bytes ) + + CodexMerkleTree.init(mcodec, leaves) + +func init*( + _: type CodexMerkleTree, + leaves: openArray[Cid]): ?!CodexMerkleTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = (? leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt( (? it.mhash.mapFailure).bytes ) + + CodexMerkleTree.init(mcodec, leaves) + +func fromNodes*( + _: type CodexMerkleTree, + mcodec: MultiCodec, + nodes: openArray[seq[ByteHash]], + nleaves: int): ?!CodexMerkleTree = + + if nodes.len == 0: + return failure "Empty nodes" + + let + mhash = ? mcodec.getMhash() + Zero = newSeq[ByteHash](mhash.size) + compressor = proc(x, y: openArray[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mhash) + + if mhash.size != nodes[0].len: + return failure "Invalid hash length" + + let + self = CodexMerkleTree(compress: compressor, zero: Zero, mhash: mhash) + + var + layer = nleaves + pos = 0 + + while layer > 0: + self.layers.add( nodes[pos.. dst.len: + return failure("Not enough space in a destination buffer") + dst[dstPos.. self.nodeBufferToMultiHash(i)) + +proc mcodec*(self: (MerkleTree | MerkleProof)): MultiCodec = + self.mcodec + +proc digestSize*(self: (MerkleTree | MerkleProof)): Natural = + self.digestSize + +proc root*(self: MerkleTree): MultiHash = + let rootIndex = self.len - 1 + self.nodeBufferToMultiHash(rootIndex) + +proc rootCid*(self: MerkleTree, version = CIDv1, dataCodec = multiCodec("raw")): ?!Cid = + Cid.init(version, dataCodec, self.root).mapFailure + +iterator leaves*(self: MerkleTree): MultiHash = + for i in 0..= self.leavesCount: + return failure("Index " & $index & " out of range [0.." & $(self.leavesCount - 1) & "]" ) + + success(self.nodeBufferToMultiHash(index)) + +proc getLeafCid*(self: MerkleTree, index: Natural, version = CIDv1, dataCodec = multiCodec("raw")): ?!Cid = + let leaf = ? self.getLeaf(index) + Cid.init(version, dataCodec, leaf).mapFailure + +proc height*(self: MerkleTree): Natural = + computeTreeHeight(self.leavesCount) + +proc getProof*(self: MerkleTree, index: Natural): ?!MerkleProof = + ## Extracts proof from a tree for a given index + ## + ## Given a tree built from data blocks A, B and C + ## H5 + ## / \ + ## H3 H4 + ## / \ / + ## H0 H1 H2 + ## | | | + ## A B C + ## + ## Proofs of inclusion (index and path) are + ## - 0,[H1, H4] for data block A + ## - 1,[H0, H4] for data block B + ## - 2,[0x00, H3] for data block C + ## + if index >= self.leavesCount: + return failure("Index " & $index & " out of range [0.." & $(self.leavesCount - 1) & "]" ) + + var zero = newSeq[byte](self.digestSize) + var one = newSeq[byte](self.digestSize) + one[^1] = 0x01 + + let levels = computeLevels(self.leavesCount) + var proofNodesBuffer = newSeq[byte]((levels.len - 1) * self.digestSize) + for level in levels[0..^2]: + let lr = index shr level.index + let siblingIndex = if lr mod 2 == 0: + level.offset + lr + 1 + else: + level.offset + lr - 1 + + var dummyValue = if level.index == 0: zero else: one + + if siblingIndex < level.offset + level.width: + proofNodesBuffer[level.index * self.digestSize..<(level.index + 1) * self.digestSize] = + self.nodesBuffer[siblingIndex * self.digestSize..<(siblingIndex + 1) * self.digestSize] + else: + proofNodesBuffer[level.index * self.digestSize..<(level.index + 1) * self.digestSize] = dummyValue + + success(MerkleProof(mcodec: self.mcodec, digestSize: self.digestSize, index: index, nodesBuffer: proofNodesBuffer)) + +proc `$`*(self: MerkleTree): string {.noSideEffect.} = + "mcodec:" & $self.mcodec & + ", digestSize: " & $self.digestSize & + ", leavesCount: " & $self.leavesCount & + ", nodes: " & $self.nodes + +proc `==`*(a, b: MerkleTree): bool = + (a.mcodec == b.mcodec) and + (a.digestSize == b.digestSize) and + (a.leavesCount == b.leavesCount) and + (a.nodesBuffer == b.nodesBuffer) + +proc init*( + T: type MerkleTree, + mcodec: MultiCodec, + digestSize: Natural, + leavesCount: Natural, + nodesBuffer: seq[byte] +): ?!MerkleTree = + let levels = computeLevels(leavesCount) + let totalNodes = levels[^1].offset + 1 + if totalNodes * digestSize == nodesBuffer.len: + success( + MerkleTree( + mcodec: mcodec, + digestSize: digestSize, + leavesCount: leavesCount, + nodesBuffer: nodesBuffer + ) + ) + else: + failure("Expected nodesBuffer len to be " & $(totalNodes * digestSize) & " but was " & $nodesBuffer.len) + +proc init*( + T: type MerkleTree, + leaves: openArray[MultiHash] +): ?!MerkleTree = + without leaf =? leaves.?[0]: + return failure("At least one leaf is required") + + var builder = ? MerkleTreeBuilder.init(mcodec = leaf.mcodec) + + for l in leaves: + let res = builder.addLeaf(l) + if res.isErr: + return failure(res.error) + + builder.build() + +proc init*( + T: type MerkleTree, + cids: openArray[Cid] +): ?!MerkleTree = + var leaves = newSeq[MultiHash]() + + for cid in cids: + let res = cid.mhash.mapFailure + if res.isErr: + return failure(res.error) + else: + leaves.add(res.value) + + MerkleTree.init(leaves) + +########################################################### +# MerkleProof +########################################################### + +proc verifyLeaf*(self: MerkleProof, leaf: MultiHash, treeRoot: MultiHash): ?!bool = + if leaf.mcodec != self.mcodec: + return failure("Leaf mcodec was " & $leaf.mcodec & ", but " & $self.mcodec & " expected") + + if leaf.mcodec != self.mcodec: + return failure("Tree root mcodec was " & $treeRoot.mcodec & ", but " & $treeRoot.mcodec & " expected") + + var digestBuf = newSeq[byte](self.digestSize) + digestBuf[0..^1] = leaf.data.buffer[leaf.dpos..<(leaf.dpos + self.digestSize)] + + let proofLen = self.nodesBuffer.len div self.digestSize + var concatBuf = newSeq[byte](2 * self.digestSize) + for i in 0..