From 52c5578c46f90462379e44845087a8c94d3316a1 Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Thu, 21 Dec 2023 00:41:43 -0600 Subject: [PATCH] Rework merkle tree (#654) * rework merkle tree support * deps * rename merkletree -> codexmerkletree * treed and proof encoding/decoding * small change to invoke proof verification * rename merkletree to codexmerkletree * style * adding codex merkle and coders tests * fixup imports * remove new codecs for now * bump deps * adding trace statement * properly serde of manifest block codecs * use default hash codec * add more trace logging to aid debugging * misc * remove double import * revert un-needded change * proof size changed * bump poseidon2 * add from nodes test * shorte file names * remove upraises * wip poseidon tree * adjust file names * misc * shorten file names * fix bad `elements` iter * don't do asserts * add fromNodes and converters * root and getProof now return result * add poseidon2 tree tests * root now returns result * misc * had to make merkletree a ref, because nim blows up otherwise * root returns a result * root returns a result * import poseidon tests * bump * merkle poseidon2 digest * misc * add merkle digest tests * bump * don't use checksuite * Update tests/codex/merkletree/generictreetests.nim Co-authored-by: markspanbroek Signed-off-by: Dmitriy Ryajov * Update codex/merkletree/merkletree.nim Co-authored-by: markspanbroek Signed-off-by: Dmitriy Ryajov * Update codex/merkletree/merkletree.nim Co-authored-by: markspanbroek Signed-off-by: Dmitriy Ryajov * Update tests/codex/merkletree/generictreetests.nim Co-authored-by: markspanbroek Signed-off-by: Dmitriy Ryajov * missing return * make toBool private (it's still needed otherwise comparison won't work) * added `digestTree` that returns a tree and `digest` for root * test against both poseidon trees - codex and poseidon2 * shorten merkle tree names * don't compare trees - it's going to be too slow * move comparison to mekrle helper * remove merkle utils --------- Signed-off-by: Dmitriy Ryajov Co-authored-by: markspanbroek --- codex/blockexchange/engine/engine.nim | 8 +- codex/blockexchange/engine/pendingblocks.nim | 2 - codex/blockexchange/peers/peercontext.nim | 1 - codex/blockexchange/protobuf/blockexc.nim | 1 - codex/blockexchange/protobuf/message.nim | 12 +- codex/blocktype.nim | 8 + codex/conf.nim | 7 +- codex/erasure/erasure.nim | 9 +- codex/manifest/coders.nim | 40 +- codex/manifest/manifest.nim | 58 +- codex/manifest/types.nim | 2 - codex/merkletree.nim | 5 +- codex/merkletree/coders.nim | 75 --- codex/merkletree/codex.nim | 4 + codex/merkletree/codex/coders.nim | 102 ++++ codex/merkletree/codex/codex.nim | 270 ++++++++++ codex/merkletree/merkletree.nim | 505 +++++------------- codex/merkletree/poseidon2.nim | 104 ++++ codex/node.nim | 2 +- codex/sales/states/cancelled.nim | 1 - codex/stores.nim | 9 +- codex/stores/blockstore.nim | 4 +- codex/stores/cachestore.nim | 10 +- codex/stores/keyutils.nim | 1 - codex/stores/networkstore.nim | 4 +- codex/stores/repostore.nim | 34 +- codex/stores/treehelper.nim | 10 +- codex/utils.nim | 9 +- codex/utils/asynciter.nim | 1 - codex/utils/digest.nim | 40 ++ codex/utils/timer.nim | 5 +- .../blockexchange/discovery/testdiscovery.nim | 2 +- tests/codex/helpers.nim | 18 +- tests/codex/merkletree/generictreetests.nim | 130 +++++ tests/codex/merkletree/helpers.nim | 31 ++ tests/codex/merkletree/testcoders.nim | 42 -- tests/codex/merkletree/testcodexcoders.nim | 48 ++ tests/codex/merkletree/testcodextree.nim | 106 ++++ tests/codex/merkletree/testmerkledigest.nim | 62 +++ tests/codex/merkletree/testmerkletree.nim | 245 --------- tests/codex/merkletree/testposeidon2tree.nim | 88 +++ tests/codex/stores/commonstoretests.nim | 2 +- tests/codex/testmerkletree.nim | 6 +- tests/integration/testIntegration.nim | 2 +- vendor/nim-libp2p | 2 +- vendor/nim-poseidon2 | 2 +- 46 files changed, 1264 insertions(+), 865 deletions(-) delete mode 100644 codex/merkletree/coders.nim create mode 100644 codex/merkletree/codex.nim create mode 100644 codex/merkletree/codex/coders.nim create mode 100644 codex/merkletree/codex/codex.nim create mode 100644 codex/merkletree/poseidon2.nim create mode 100644 codex/utils/digest.nim create mode 100644 tests/codex/merkletree/generictreetests.nim create mode 100644 tests/codex/merkletree/helpers.nim delete mode 100644 tests/codex/merkletree/testcoders.nim create mode 100644 tests/codex/merkletree/testcodexcoders.nim create mode 100644 tests/codex/merkletree/testcodextree.nim create mode 100644 tests/codex/merkletree/testmerkledigest.nim delete mode 100644 tests/codex/merkletree/testmerkletree.nim create mode 100644 tests/codex/merkletree/testposeidon2tree.nim diff --git a/codex/blockexchange/engine/engine.nim b/codex/blockexchange/engine/engine.nim index ddff3c49..23683718 100644 --- a/codex/blockexchange/engine/engine.nim +++ b/codex/blockexchange/engine/engine.nim @@ -328,11 +328,9 @@ proc validateBlockDelivery( without treeRoot =? bd.address.treeCid.mhash.mapFailure, err: return failure("Unable to get mhash from treeCid for block, nested err: " & err.msg) - without verifyOutcome =? proof.verifyLeaf(leaf, treeRoot), err: + if err =? proof.verify(leaf, treeRoot).errorOption: return failure("Unable to verify proof for block, nested err: " & err.msg) - if not verifyOutcome: - return failure("Provided inclusion proof is invalid") else: # not leaf if bd.address.cid != bd.blk.cid: return failure("Delivery cid " & $bd.address.cid & " doesn't match block cid " & $bd.blk.cid) @@ -537,12 +535,12 @@ proc taskHandler*(b: BlockExcEngine, task: BlockExcPeerCtx) {.gcsafe, async.} = trace "Handling lookup for entry", address = e.address if e.address.leaf: (await b.localStore.getBlockAndProof(e.address.treeCid, e.address.index)).map( - (blkAndProof: (Block, MerkleProof)) => + (blkAndProof: (Block, CodexProof)) => BlockDelivery(address: e.address, blk: blkAndProof[0], proof: blkAndProof[1].some) ) else: (await b.localStore.getBlock(e.address)).map( - (blk: Block) => BlockDelivery(address: e.address, blk: blk, proof: MerkleProof.none) + (blk: Block) => BlockDelivery(address: e.address, blk: blk, proof: CodexProof.none) ) let diff --git a/codex/blockexchange/engine/pendingblocks.nim b/codex/blockexchange/engine/pendingblocks.nim index c4763998..ba92da97 100644 --- a/codex/blockexchange/engine/pendingblocks.nim +++ b/codex/blockexchange/engine/pendingblocks.nim @@ -18,11 +18,9 @@ import pkg/chronicles import pkg/chronos import pkg/libp2p import pkg/metrics -import pkg/questionable/results import ../protobuf/blockexc import ../../blocktype -import ../../merkletree logScope: topics = "codex pendingblocks" diff --git a/codex/blockexchange/peers/peercontext.nim b/codex/blockexchange/peers/peercontext.nim index 66418ddd..556de434 100644 --- a/codex/blockexchange/peers/peercontext.nim +++ b/codex/blockexchange/peers/peercontext.nim @@ -9,7 +9,6 @@ import std/sequtils import std/tables -import std/sugar import std/sets import pkg/chronicles diff --git a/codex/blockexchange/protobuf/blockexc.nim b/codex/blockexchange/protobuf/blockexc.nim index d511ea82..12049853 100644 --- a/codex/blockexchange/protobuf/blockexc.nim +++ b/codex/blockexchange/protobuf/blockexc.nim @@ -9,7 +9,6 @@ import std/hashes import std/sequtils -import pkg/libp2p import pkg/stew/endians2 import message diff --git a/codex/blockexchange/protobuf/message.nim b/codex/blockexchange/protobuf/message.nim index ffec0fcf..722bc016 100644 --- a/codex/blockexchange/protobuf/message.nim +++ b/codex/blockexchange/protobuf/message.nim @@ -37,7 +37,7 @@ type BlockDelivery* = object blk*: Block address*: BlockAddress - proof*: ?MerkleProof # Present only if `address.leaf` is true + proof*: ?CodexProof # Present only if `address.leaf` is true BlockPresenceType* = enum Have = 0, @@ -152,7 +152,7 @@ proc decode*(_: type BlockAddress, pb: ProtoBuffer): ProtoResult[BlockAddress] = if ? pb.getField(1, field): leaf = bool(field) - + if leaf: var treeCid: Cid @@ -215,16 +215,16 @@ proc decode*(_: type BlockDelivery, pb: ProtoBuffer): ProtoResult[BlockDelivery] value.blk = ? Block.new(cid, dataBuf, verify = true).mapErr(x => ProtoError.IncorrectBlob) if ? pb.getField(3, ipb): value.address = ? BlockAddress.decode(ipb) - + if value.address.leaf: var proofBuf = newSeq[byte]() if ? pb.getField(4, proofBuf): - let proof = ? MerkleProof.decode(proofBuf).mapErr(x => ProtoError.IncorrectBlob) + let proof = ? CodexProof.decode(proofBuf).mapErr(x => ProtoError.IncorrectBlob) value.proof = proof.some else: - value.proof = MerkleProof.none + value.proof = CodexProof.none else: - value.proof = MerkleProof.none + value.proof = CodexProof.none ok(value) diff --git a/codex/blocktype.nim b/codex/blocktype.nim index a26a3157..4c4e500e 100644 --- a/codex/blocktype.nim +++ b/codex/blocktype.nim @@ -34,6 +34,14 @@ const # should be divisible by 31 for PoR and by 64 for Leopard ECC DefaultBlockSize* = NBytes 31 * 64 * 33 + # hashes + Sha256Hash* = multiCodec("sha2-256") + + # CIDs + Raw = multiCodec("raw") + DagPB* = multiCodec("dag-pb") + DagJson* = multiCodec("dag-json") + type Block* = ref object of RootObj cid*: Cid diff --git a/codex/conf.nim b/codex/conf.nim index 4a0e70f4..e693a4f4 100644 --- a/codex/conf.nim +++ b/codex/conf.nim @@ -40,7 +40,12 @@ import ./units import ./utils export units -export net, DefaultQuotaBytes, DefaultBlockTtl, DefaultBlockMaintenanceInterval, DefaultNumberOfBlocksToMaintainPerInterval +export net +export + DefaultQuotaBytes, + DefaultBlockTtl, + DefaultBlockMaintenanceInterval, + DefaultNumberOfBlocksToMaintainPerInterval const codex_enable_api_debug_peers* {.booldefine.} = false diff --git a/codex/erasure/erasure.nim b/codex/erasure/erasure.nim index 1bab089a..79cab956 100644 --- a/codex/erasure/erasure.nim +++ b/codex/erasure/erasure.nim @@ -97,7 +97,9 @@ proc getPendingBlocks( var # request blocks from the store pendingBlocks = indicies.map( (i: int) => - self.store.getBlock(BlockAddress.init(manifest.treeCid, i)).map((r: ?!bt.Block) => (r, i)) # Get the data blocks (first K) + self.store.getBlock( + BlockAddress.init(manifest.treeCid, i) + ).map((r: ?!bt.Block) => (r, i)) # Get the data blocks (first K) ) proc isFinished(): bool = pendingBlocks.len == 0 @@ -291,7 +293,7 @@ proc encodeData( return failure("Unable to store block!") idx.inc(params.steps) - without tree =? MerkleTree.init(cids[]), err: + without tree =? CodexTree.init(cids[]), err: return failure(err) without treeCid =? tree.rootCid, err: @@ -308,6 +310,7 @@ proc encodeData( ecM = params.ecM ) + trace "Encoded data successfully", treeCid, blocksCount = params.blocksCount return encodedManifest.success except CancelledError as exc: trace "Erasure coding encoding cancelled" @@ -415,7 +418,7 @@ proc decode*( finally: decoder.release() - without tree =? MerkleTree.init(cids[0.. 0 @@ -155,23 +173,21 @@ proc decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest = treeCid = treeCid, datasetSize = datasetSize.NBytes, blockSize = blockSize.NBytes, - version = treeCid.cidver, - hcodec = (? treeCid.mhash.mapFailure).mcodec, - codec = treeCid.mcodec, + version = CidVersion(version), + hcodec = hcodec.MultiCodec, + codec = codec.MultiCodec, ecK = ecK.int, ecM = ecM.int, originalTreeCid = ? Cid.init(originalTreeCid).mapFailure, - originalDatasetSize = originalDatasetSize.NBytes - ) + originalDatasetSize = originalDatasetSize.NBytes) else: Manifest.new( treeCid = treeCid, datasetSize = datasetSize.NBytes, blockSize = blockSize.NBytes, - version = treeCid.cidver, - hcodec = (? treeCid.mhash.mapFailure).mcodec, - codec = treeCid.mcodec - ) + version = CidVersion(version), + hcodec = hcodec.MultiCodec, + codec = codec.MultiCodec) ? self.verify() diff --git a/codex/manifest/manifest.nim b/codex/manifest/manifest.nim index 8f8df55c..203cc2bc 100644 --- a/codex/manifest/manifest.nim +++ b/codex/manifest/manifest.nim @@ -33,9 +33,9 @@ type treeCid {.serialize.}: Cid # Root of the merkle tree datasetSize {.serialize.}: NBytes # Total size of all blocks blockSize {.serialize.}: NBytes # Size of each contained block (might not be needed if blocks are len-prefixed) - version: CidVersion # Cid version + codec: MultiCodec # Dataset codec hcodec: MultiCodec # Multihash codec - codec: MultiCodec # Data set codec + version: CidVersion # Cid version case protected {.serialize.}: bool # Protected datasets have erasure coded info of true: ecK: int # Number of blocks to encode @@ -194,15 +194,14 @@ proc `$`*(self: Manifest): string = ############################################################ proc new*( - T: type Manifest, - treeCid: Cid, - blockSize: NBytes, - datasetSize: NBytes, - version: CidVersion = CIDv1, - hcodec = multiCodec("sha2-256"), - codec = multiCodec("raw"), - protected = false, -): Manifest = + T: type Manifest, + treeCid: Cid, + blockSize: NBytes, + datasetSize: NBytes, + version: CidVersion = CIDv1, + hcodec = multiCodec("sha2-256"), + codec = multiCodec("raw"), + protected = false): Manifest = T( treeCid: treeCid, @@ -214,15 +213,15 @@ proc new*( protected: protected) proc new*( - T: type Manifest, - manifest: Manifest, - treeCid: Cid, - datasetSize: NBytes, - ecK, ecM: int -): Manifest = + T: type Manifest, + manifest: Manifest, + treeCid: Cid, + datasetSize: NBytes, + ecK, ecM: int): Manifest = ## Create an erasure protected dataset from an ## unprotected one ## + Manifest( treeCid: treeCid, datasetSize: datasetSize, @@ -236,9 +235,8 @@ proc new*( originalDatasetSize: manifest.datasetSize) proc new*( - T: type Manifest, - manifest: Manifest -): Manifest = + T: type Manifest, + manifest: Manifest): Manifest = ## Create an unprotected dataset from an ## erasure protected one ## @@ -254,10 +252,10 @@ proc new*( proc new*( T: type Manifest, data: openArray[byte], - decoder = ManifestContainers[$DagPBCodec] -): ?!Manifest = + decoder = ManifestContainers[$DagPBCodec]): ?!Manifest = ## Create a manifest instance from given data ## + Manifest.decode(data, decoder) proc new*( @@ -271,8 +269,8 @@ proc new*( ecK: int, ecM: int, originalTreeCid: Cid, - originalDatasetSize: NBytes -): Manifest = + originalDatasetSize: NBytes): Manifest = + Manifest( treeCid: treeCid, datasetSize: datasetSize, @@ -288,11 +286,10 @@ proc new*( ) proc new*( - T: type Manifest, - manifest: Manifest, - verificationRoot: Cid, - slotRoots: seq[Cid] -): ?!Manifest = + T: type Manifest, + manifest: Manifest, + verificationRoot: Cid, + slotRoots: seq[Cid]): ?!Manifest = ## Create a verifiable dataset from an ## protected one ## @@ -313,5 +310,4 @@ proc new*( originalDatasetSize: manifest.originalDatasetSize, verifiable: true, verificationRoot: verificationRoot, - slotRoots: slotRoots - )) + slotRoots: slotRoots)) diff --git a/codex/manifest/types.nim b/codex/manifest/types.nim index 613de2d7..ef4c464e 100644 --- a/codex/manifest/types.nim +++ b/codex/manifest/types.nim @@ -10,14 +10,12 @@ # This module defines Manifest and all related types import std/tables -import std/strutils import pkg/libp2p import ../units export units const - BlockCodec* = multiCodec("raw") DagPBCodec* = multiCodec("dag-pb") type diff --git a/codex/merkletree.nim b/codex/merkletree.nim index 366af992..d7f6b1d7 100644 --- a/codex/merkletree.nim +++ b/codex/merkletree.nim @@ -1,4 +1,5 @@ import ./merkletree/merkletree -import ./merkletree/coders +import ./merkletree/codex +import ./merkletree/poseidon2 -export merkletree, coders +export codex, poseidon2, merkletree diff --git a/codex/merkletree/coders.nim b/codex/merkletree/coders.nim deleted file mode 100644 index 2d9fc86d..00000000 --- a/codex/merkletree/coders.nim +++ /dev/null @@ -1,75 +0,0 @@ -## Nim-Codex -## Copyright (c) 2023 Status Research & Development GmbH -## Licensed under either of -## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) -## * MIT license ([LICENSE-MIT](LICENSE-MIT)) -## at your option. -## This file may not be copied, modified, or distributed except according to -## those terms. - -import pkg/libp2p -import pkg/questionable -import pkg/questionable/results - -import ./merkletree -import ../units -import ../errors - -const MaxMerkleTreeSize = 100.MiBs.uint -const MaxMerkleProofSize = 1.MiBs.uint - -proc encode*(self: MerkleTree): seq[byte] = - var pb = initProtoBuffer(maxSize = MaxMerkleTreeSize) - pb.write(1, self.mcodec.uint64) - pb.write(2, self.digestSize.uint64) - pb.write(3, self.leavesCount.uint64) - pb.write(4, self.nodesBuffer) - pb.finish - pb.buffer - -proc decode*(_: type MerkleTree, data: seq[byte]): ?!MerkleTree = - var pb = initProtoBuffer(data, maxSize = MaxMerkleTreeSize) - var mcodecCode: uint64 - var digestSize: uint64 - var leavesCount: uint64 - discard ? pb.getField(1, mcodecCode).mapFailure - discard ? pb.getField(2, digestSize).mapFailure - discard ? pb.getField(3, leavesCount).mapFailure - - let mcodec = MultiCodec.codec(cast[int](mcodecCode)) - if mcodec == InvalidMultiCodec: - return failure("Invalid MultiCodec code " & $cast[int](mcodec)) - - var nodesBuffer = newSeq[byte]() - discard ? pb.getField(4, nodesBuffer).mapFailure - - let tree = ? MerkleTree.init(mcodec, digestSize, leavesCount, nodesBuffer) - success(tree) - -proc encode*(self: MerkleProof): seq[byte] = - var pb = initProtoBuffer(maxSize = MaxMerkleProofSize) - pb.write(1, self.mcodec.uint64) - pb.write(2, self.digestSize.uint64) - pb.write(3, self.index.uint64) - pb.write(4, self.nodesBuffer) - pb.finish - pb.buffer - -proc decode*(_: type MerkleProof, data: seq[byte]): ?!MerkleProof = - var pb = initProtoBuffer(data, maxSize = MaxMerkleProofSize) - var mcodecCode: uint64 - var digestSize: uint64 - var index: uint64 - discard ? pb.getField(1, mcodecCode).mapFailure - discard ? pb.getField(2, digestSize).mapFailure - discard ? pb.getField(3, index).mapFailure - - let mcodec = MultiCodec.codec(cast[int](mcodecCode)) - if mcodec == InvalidMultiCodec: - return failure("Invalid MultiCodec code " & $cast[int](mcodec)) - - var nodesBuffer = newSeq[byte]() - discard ? pb.getField(4, nodesBuffer).mapFailure - - let proof = ? MerkleProof.init(mcodec, digestSize, index, nodesBuffer) - success(proof) \ No newline at end of file diff --git a/codex/merkletree/codex.nim b/codex/merkletree/codex.nim new file mode 100644 index 00000000..0e9a5874 --- /dev/null +++ b/codex/merkletree/codex.nim @@ -0,0 +1,4 @@ +import ./codex/codex +import ./codex/coders + +export codex, coders diff --git a/codex/merkletree/codex/coders.nim b/codex/merkletree/codex/coders.nim new file mode 100644 index 00000000..62e4f75b --- /dev/null +++ b/codex/merkletree/codex/coders.nim @@ -0,0 +1,102 @@ +## Nim-Codex +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises + +push: {.upraises: [].} + +import pkg/libp2p +import pkg/questionable +import pkg/questionable/results + +import ../../units +import ../../errors + +import ./codex + +const MaxMerkleTreeSize = 100.MiBs.uint +const MaxMerkleProofSize = 1.MiBs.uint + +proc encode*(self: CodexTree): seq[byte] = + var pb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + pb.write(1, self.mcodec.uint64) + pb.write(2, self.leavesCount.uint64) + for node in self.nodes: + var nodesPb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + nodesPb.write(1, node) + nodesPb.finish() + pb.write(3, nodesPb) + + pb.finish + pb.buffer + +proc decode*(_: type CodexTree, data: seq[byte]): ?!CodexTree = + var pb = initProtoBuffer(data, maxSize = MaxMerkleTreeSize) + var mcodecCode: uint64 + var leavesCount: uint64 + discard ? pb.getField(1, mcodecCode).mapFailure + discard ? pb.getField(2, leavesCount).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ? pb.getRepeatedField(3, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + discard ? initProtoBuffer(nodeBuff).getField(1, node).mapFailure + nodes.add node + + CodexTree.fromNodes(mcodec, nodes, leavesCount.int) + +proc encode*(self: CodexProof): seq[byte] = + var pb = initProtoBuffer(maxSize = MaxMerkleProofSize) + pb.write(1, self.mcodec.uint64) + pb.write(2, self.index.uint64) + pb.write(3, self.nleaves.uint64) + + for node in self.path: + var nodesPb = initProtoBuffer(maxSize = MaxMerkleTreeSize) + nodesPb.write(1, node) + nodesPb.finish() + pb.write(4, nodesPb) + + pb.finish + pb.buffer + +proc decode*(_: type CodexProof, data: seq[byte]): ?!CodexProof = + var pb = initProtoBuffer(data, maxSize = MaxMerkleProofSize) + var mcodecCode: uint64 + var index: uint64 + var nleaves: uint64 + discard ? pb.getField(1, mcodecCode).mapFailure + + let mcodec = MultiCodec.codec(mcodecCode.int) + if mcodec == InvalidMultiCodec: + return failure("Invalid MultiCodec code " & $mcodecCode) + + discard ? pb.getField(2, index).mapFailure + discard ? pb.getField(3, nleaves).mapFailure + + var + nodesBuff: seq[seq[byte]] + nodes: seq[ByteHash] + + if ? pb.getRepeatedField(4, nodesBuff).mapFailure: + for nodeBuff in nodesBuff: + var node: ByteHash + let nodePb = initProtoBuffer(nodeBuff) + discard ? nodePb.getField(1, node).mapFailure + nodes.add node + + CodexProof.init(mcodec, index.int, nleaves.int, nodes) diff --git a/codex/merkletree/codex/codex.nim b/codex/merkletree/codex/codex.nim new file mode 100644 index 00000000..4266fa44 --- /dev/null +++ b/codex/merkletree/codex/codex.nim @@ -0,0 +1,270 @@ +## Nim-Codex +## Copyright (c) 2023 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises + +push: {.upraises: [].} + +import std/bitops +import std/sequtils + +import pkg/questionable +import pkg/questionable/results +import pkg/libp2p/[cid, multicodec, multihash] +import pkg/stew/byteutils + +import ../../utils +import ../../rng +import ../../errors +import ../../blocktype + +import ../merkletree + +export merkletree + +logScope: + topics = "codex merkletree" + +const + DatasetRootCodec* = multiCodec("codex-root") # TODO: move to blocktype + BlockCodec* = multiCodec("raw") # TODO: fix multicodec to `codex-block` and move to blocktype + +type + ByteTreeKey* {.pure.} = enum + KeyNone = 0x0.byte + KeyBottomLayer = 0x1.byte + KeyOdd = 0x2.byte + KeyOddAndBottomLayer = 0x3.byte + + ByteHash* = seq[byte] + ByteTree* = MerkleTree[ByteHash, ByteTreeKey] + ByteTreeProof* = MerkleProof[ByteHash, ByteTreeKey] + + CodexTree* = ref object of ByteTree + mhash: MHash + + CodexProof* = ref object of ByteTreeProof + mhash: MHash + +func getMhash*(mcodec: MultiCodec): ?!MHash = + let + mhash = CodeHashes.getOrDefault(mcodec) + + if isNil(mhash.coder): + return failure "Invalid multihash codec" + + success mhash + +func digestSize*(self: (CodexTree or CodexProof)): int = + ## Number of leaves + ## + + self.mhash.size + +func mcodec*(self: (CodexTree or CodexProof)): MultiCodec = + ## Multicodec + ## + + self.mhash.mcodec + +func bytes*(mhash: MultiHash): seq[byte] = + ## Extract hash bytes + ## + + mhash.data.buffer[mhash.dpos..= self.leavesCount: + return failure "Invalid leaf index " & $i + + let + leaf = self.leaves[i] + mhash = ? MultiHash.init($self.mcodec, leaf).mapFailure + + Cid.init(version, dataCodec, mhash).mapFailure + +proc `$`*(self: CodexTree): string = + "CodexTree( mcodec: " & + $self.mcodec & + ", leavesCount: " & + $self.leavesCount & " )" + +proc `$`*(self: CodexProof): string = + "CodexProof( mcodec: " & + $self.mcodec & ", nleaves: " & + $self.nleaves & ", index: " & + $self.index & " )" + +func compress*( + x, y: openArray[byte], + key: ByteTreeKey, + mhash: MHash): ?!ByteHash = + ## Compress two hashes + ## + + var digest = newSeq[byte](mhash.size) + mhash.coder(@x & @y & @[ key.byte ], digest) + success digest + +func init*( + _: type CodexTree, + mcodec: MultiCodec = multiCodec("sha2-256"), + leaves: openArray[ByteHash]): ?!CodexTree = + + if leaves.len == 0: + return failure "Empty leaves" + + let + mhash = ? mcodec.getMhash() + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mhash) + Zero: ByteHash = newSeq[byte](mhash.size) + + if mhash.size != leaves[0].len: + return failure "Invalid hash length" + + var + self = CodexTree(mhash: mhash, compress: compressor, zero: Zero) + + self.layers = ? merkleTreeWorker(self, leaves, isBottomLayer = true) + success self + +func init*( + _: type CodexTree, + leaves: openArray[MultiHash]): ?!CodexTree = + + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = leaves[0].mcodec + leaves = leaves.mapIt( it.bytes ) + + CodexTree.init(mcodec, leaves) + +func init*( + _: type CodexTree, + leaves: openArray[Cid]): ?!CodexTree = + if leaves.len == 0: + return failure "Empty leaves" + + let + mcodec = (? leaves[0].mhash.mapFailure).mcodec + leaves = leaves.mapIt( (? it.mhash.mapFailure).bytes ) + + CodexTree.init(mcodec, leaves) + +proc fromNodes*( + _: type CodexTree, + mcodec: MultiCodec = multiCodec("sha2-256"), + nodes: openArray[ByteHash], + nleaves: int): ?!CodexTree = + + if nodes.len == 0: + return failure "Empty nodes" + + let + mhash = ? mcodec.getMhash() + Zero = newSeq[byte](mhash.size) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!ByteHash {.noSideEffect.} = + compress(x, y, key, mhash) + + if mhash.size != nodes[0].len: + return failure "Invalid hash length" + + var + self = CodexTree(compress: compressor, zero: Zero, mhash: mhash) + layer = nleaves + pos = 0 + + while pos < nodes.len: + self.layers.add( nodes[pos..<(pos + layer)] ) + pos += layer + layer = divUp(layer, 2) + + let + index = Rng.instance.rand(nleaves - 1) + proof = ? self.getProof(index) + + ? proof.verify(self.leaves[index], ? self.root) # sanity check + success self + +func init*( + _: type CodexProof, + mcodec: MultiCodec = multiCodec("sha2-256"), + index: int, + nleaves: int, + nodes: openArray[ByteHash]): ?!CodexProof = + + if nodes.len == 0: + return failure "Empty nodes" + + let + mhash = ? mcodec.getMhash() + Zero = newSeq[byte](mhash.size) + compressor = proc(x, y: seq[byte], key: ByteTreeKey): ?!seq[byte] {.noSideEffect.} = + compress(x, y, key, mhash) + + + success CodexProof( + compress: compressor, + zero: Zero, + mhash: mhash, + index: index, + nleaves: nleaves, + path: @nodes) diff --git a/codex/merkletree/merkletree.nim b/codex/merkletree/merkletree.nim index 74ddba6d..46e53ba5 100644 --- a/codex/merkletree/merkletree.nim +++ b/codex/merkletree/merkletree.nim @@ -7,412 +7,157 @@ ## This file may not be copied, modified, or distributed except according to ## those terms. -import std/math -import std/bitops -import std/sequtils -import std/sugar -import std/algorithm +{.push raises: [].} + +import std/bitops -import pkg/chronicles -import pkg/questionable import pkg/questionable/results -import pkg/nimcrypto/sha2 -import pkg/libp2p/[cid, multicodec, multihash, vbuffer] -import pkg/stew/byteutils import ../errors -logScope: - topics = "codex merkletree" - type - MerkleTree* = object - mcodec: MultiCodec - digestSize: Natural - leavesCount: Natural - nodesBuffer*: seq[byte] - MerkleProof* = object - mcodec: MultiCodec - digestSize: Natural - index: Natural - nodesBuffer*: seq[byte] - MerkleTreeBuilder* = object - mcodec: MultiCodec - digestSize: Natural - buffer: seq[byte] + PutFn*[H] = proc(i: Natural, x: H): ?!void {.noSideEffect, raises: [].} + GetFn*[H] = proc(i: Natural): ?!H {.noSideEffect, raises: [].} -########################################################### -# Helper functions -########################################################### + StoreBackend*[H] = object + put: PutFn[H] + get: GetFn[H] -func computeTreeHeight(leavesCount: int): int = - if isPowerOfTwo(leavesCount): - fastLog2(leavesCount) + 1 - else: - fastLog2(leavesCount) + 2 + CompressFn*[H, K] = proc (x, y: H, key: K): ?!H {.noSideEffect, raises: [].} -func computeLevels(leavesCount: int): seq[tuple[offset: int, width: int, index: int]] = - let height = computeTreeHeight(leavesCount) - var levels = newSeq[tuple[offset: int, width: int, index: int]](height) + MerkleTree*[H, K] = ref object of RootObj + layers* : seq[seq[H]] + compress*: CompressFn[H, K] + zero* : H - levels[0].offset = 0 - levels[0].width = leavesCount - levels[0].index = 0 - for i in 1..= 0 and index < nleaves): + return failure "index out of bounds" + + var path : seq[H] = newSeq[H](depth) + var k = index + var m = nleaves + for i in 0.. dst.len: - return failure("Not enough space in a destination buffer") - dst[dstPos.. odd node + h = ? proof.compress( h, p, K(bottomFlag.ord + 2) ) else: - concatBuf[digestSize..^1] = dummyValue + # even node + h = ? proof.compress( h , p, bottomFlag ) + bottomFlag = K.KeyNone + j = j shr 1 + m = (m+1) shr 1 - ? digestFn(mcodec, tree.nodesBuffer, parentIndex * digestSize, concatBuf) - prevLevel = level + return success h - return success(tree) - -########################################################### -# MerkleTree -########################################################### - -proc nodeBufferToMultiHash(self: (MerkleTree | MerkleProof), index: int): MultiHash = - var buf = newSeq[byte](self.digestSize) - let offset = index * self.digestSize - buf[0..^1] = self.nodesBuffer[offset..<(offset + self.digestSize)] - - {.noSideEffect.}: - without mhash =? MultiHash.init($self.mcodec, buf).mapFailure, errx: - error "Error converting bytes to hash", msg = errx.msg - mhash - -proc len*(self: (MerkleTree | MerkleProof)): Natural = - self.nodesBuffer.len div self.digestSize - -proc nodes*(self: (MerkleTree | MerkleProof)): seq[MultiHash] {.noSideEffect.} = - toSeq(0.. self.nodeBufferToMultiHash(i)) - -proc mcodec*(self: (MerkleTree | MerkleProof)): MultiCodec = - self.mcodec - -proc digestSize*(self: (MerkleTree | MerkleProof)): Natural = - self.digestSize - -proc root*(self: MerkleTree): MultiHash = - let rootIndex = self.len - 1 - self.nodeBufferToMultiHash(rootIndex) - -proc rootCid*(self: MerkleTree, version = CIDv1, dataCodec = multiCodec("raw")): ?!Cid = - Cid.init(version, dataCodec, self.root).mapFailure - -iterator leaves*(self: MerkleTree): MultiHash = - for i in 0..= self.leavesCount: - return failure("Index " & $index & " out of range [0.." & $(self.leavesCount - 1) & "]" ) - - success(self.nodeBufferToMultiHash(index)) - -proc getLeafCid*(self: MerkleTree, index: Natural, version = CIDv1, dataCodec = multiCodec("raw")): ?!Cid = - let leaf = ? self.getLeaf(index) - Cid.init(version, dataCodec, leaf).mapFailure - -proc height*(self: MerkleTree): Natural = - computeTreeHeight(self.leavesCount) - -proc getProof*(self: MerkleTree, index: Natural): ?!MerkleProof = - ## Extracts proof from a tree for a given index - ## - ## Given a tree built from data blocks A, B and C - ## H5 - ## / \ - ## H3 H4 - ## / \ / - ## H0 H1 H2 - ## | | | - ## A B C - ## - ## Proofs of inclusion (index and path) are - ## - 0,[H1, H4] for data block A - ## - 1,[H0, H4] for data block B - ## - 2,[0x00, H3] for data block C - ## - if index >= self.leavesCount: - return failure("Index " & $index & " out of range [0.." & $(self.leavesCount - 1) & "]" ) - - var zero = newSeq[byte](self.digestSize) - var one = newSeq[byte](self.digestSize) - one[^1] = 0x01 - - let levels = computeLevels(self.leavesCount) - var proofNodesBuffer = newSeq[byte]((levels.len - 1) * self.digestSize) - for level in levels[0..^2]: - let lr = index shr level.index - let siblingIndex = if lr mod 2 == 0: - level.offset + lr + 1 +func verify*[H, K](proof: MerkleProof[H, K], leaf: H, root: H): ?!void = + return if bool(root == ? proof.reconstructRoot(leaf)): + success() else: - level.offset + lr - 1 + failure("invalid proof") - var dummyValue = if level.index == 0: zero else: one +func merkleTreeWorker*[H, K]( + self: MerkleTree[H, K], + xs: openArray[H], + isBottomLayer: static bool): ?!seq[seq[H]] = - if siblingIndex < level.offset + level.width: - proofNodesBuffer[level.index * self.digestSize..<(level.index + 1) * self.digestSize] = - self.nodesBuffer[siblingIndex * self.digestSize..<(siblingIndex + 1) * self.digestSize] - else: - proofNodesBuffer[level.index * self.digestSize..<(level.index + 1) * self.digestSize] = dummyValue + let a = low(xs) + let b = high(xs) + let m = b - a + 1 - success(MerkleProof(mcodec: self.mcodec, digestSize: self.digestSize, index: index, nodesBuffer: proofNodesBuffer)) + when not isBottomLayer: + if m == 1: + return success @[ @xs ] -proc `$`*(self: MerkleTree): string {.noSideEffect.} = - "mcodec:" & $self.mcodec & - ", digestSize: " & $self.digestSize & - ", leavesCount: " & $self.leavesCount & - ", nodes: " & $self.nodes + let halfn: int = m div 2 + let n : int = 2 * halfn + let isOdd: bool = (n != m) -proc `==`*(a, b: MerkleTree): bool = - (a.mcodec == b.mcodec) and - (a.digestSize == b.digestSize) and - (a.leavesCount == b.leavesCount) and - (a.nodesBuffer == b.nodesBuffer) - -proc init*( - T: type MerkleTree, - mcodec: MultiCodec, - digestSize: Natural, - leavesCount: Natural, - nodesBuffer: seq[byte] -): ?!MerkleTree = - let levels = computeLevels(leavesCount) - let totalNodes = levels[^1].offset + 1 - if totalNodes * digestSize == nodesBuffer.len: - success( - MerkleTree( - mcodec: mcodec, - digestSize: digestSize, - leavesCount: leavesCount, - nodesBuffer: nodesBuffer - ) - ) + var ys: seq[H] + if not isOdd: + ys = newSeq[H](halfn) else: - failure("Expected nodesBuffer len to be " & $(totalNodes * digestSize) & " but was " & $nodesBuffer.len) + ys = newSeq[H](halfn + 1) -proc init*( - T: type MerkleTree, - leaves: openArray[MultiHash] -): ?!MerkleTree = - without leaf =? leaves.?[0]: - return failure("At least one leaf is required") + for i in 0.. k / $index) - \ No newline at end of file diff --git a/codex/stores/networkstore.nim b/codex/stores/networkstore.nim index 6da1465c..f12e252f 100644 --- a/codex/stores/networkstore.nim +++ b/codex/stores/networkstore.nim @@ -11,8 +11,6 @@ import pkg/upraises push: {.upraises: [].} -import std/sugar - import pkg/chronicles import pkg/chronos import pkg/libp2p @@ -87,7 +85,7 @@ method putBlockCidAndProof*( treeCid: Cid, index: Natural, blockCid: Cid, - proof: MerkleProof): Future[?!void] = + proof: CodexProof): Future[?!void] = self.localStore.putBlockCidAndProof(treeCid, index, blockCid, proof) method ensureExpiry*( diff --git a/codex/stores/repostore.nim b/codex/stores/repostore.nim index db53ae6f..c91263b2 100644 --- a/codex/stores/repostore.nim +++ b/codex/stores/repostore.nim @@ -77,7 +77,7 @@ func available*(self: RepoStore): uint = func available*(self: RepoStore, bytes: uint): bool = return bytes < self.available() -proc encode(cidAndProof: (Cid, MerkleProof)): seq[byte] = +proc encode(cidAndProof: (Cid, CodexProof)): seq[byte] = ## Encodes a tuple of cid and merkle proof in a following format: ## | 8-bytes | n-bytes | remaining bytes | ## | n | cid | proof | @@ -93,14 +93,14 @@ proc encode(cidAndProof: (Cid, MerkleProof)): seq[byte] = @nBytes & cidBytes & proofBytes -proc decode(_: type (Cid, MerkleProof), data: seq[byte]): ?!(Cid, MerkleProof) = +proc decode(_: type (Cid, CodexProof), data: seq[byte]): ?!(Cid, CodexProof) = let n = uint64.fromBytesBE(data[0.. i.ord)) -proc putAllProofs*(store: BlockStore, tree: MerkleTree): Future[?!void] = +proc putAllProofs*(store: BlockStore, tree: CodexTree): Future[?!void] = store.putSomeProofs(tree, Iter.fromSlice(0..