From 5f48de6a44005e80e4c155e600acae3b16c5ceca Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Wed, 12 Jan 2022 19:55:51 -0600 Subject: [PATCH] cleanup manifest (#44) * cleanup manifest * Attach decode to `Manifest` * move manifest container to it's own file * rebase main * update header license year --- dagger/blocksmanifest.nim | 158 ++++++++++++++++++++++++++++++ dagger/manifest.nim | 178 +++++----------------------------- dagger/node.nim | 13 ++- tests/dagger/testmanifest.nim | 10 +- tests/dagger/testnode.nim | 4 +- 5 files changed, 196 insertions(+), 167 deletions(-) create mode 100644 dagger/blocksmanifest.nim diff --git a/dagger/blocksmanifest.nim b/dagger/blocksmanifest.nim new file mode 100644 index 00000000..dfd6ee06 --- /dev/null +++ b/dagger/blocksmanifest.nim @@ -0,0 +1,158 @@ +## Nim-Dagger +## Copyright (c) 2022 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +{.push raises: [Defect].} + +import std/tables + +import pkg/libp2p +import pkg/questionable +import pkg/questionable/results +import pkg/chronicles +import pkg/chronos + +import ./manifest +import ./errors + +export manifest + +const + ManifestCodec* = multiCodec("dag-pb") + +var + emptyDigests {.threadvar.}: array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]] + once {.threadvar.}: bool + +template EmptyDigests: untyped = + if not once: + emptyDigests = [ + CIDv0: { + multiCodec("sha2-256"): Cid + .init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku") + .get() + .mhash + .get() + }.toTable, + CIDv1: { + multiCodec("sha2-256"): Cid.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") + .get() + .mhash + .get() + }.toTable, + ] + + once = true + emptyDigests + +type + BlocksManifest* = object + manifest: Manifest + version*: CidVersion + hcodec*: MultiCodec + codec*: MultiCodec + +proc len*(b: BlocksManifest): int = b.manifest.blocks.len + +iterator items*(b: BlocksManifest): Cid = + for b in b.manifest.blocks: + yield b + +template hashBytes(mh: MultiHash): seq[byte] = + ## get the hash bytes of a multihash object + ## + + mh.data.buffer[mh.dpos..(mh.dpos + mh.size - 1)] + +proc cid*(b: var BlocksManifest): ?!Cid = + ## Generate a root hash using the treehash algorithm + ## + + if htree =? b.manifest.cid: + return htree.success + + var + stack: seq[MultiHash] + + for cid in b.manifest.blocks: + stack.add(? cid.mhash.mapFailure) + + while stack.len > 1: + let + (b1, b2) = (stack.pop(), stack.pop()) + mh = ? MultiHash.digest( + $b.hcodec, + (b1.hashBytes() & b2.hashBytes())) + .mapFailure + stack.add(mh) + + if stack.len == 1: + let cid = ? Cid.init( + b.version, + b.codec, + (? EmptyDigests[b.version][b.hcodec].catch)) + .mapFailure + + b.manifest.cid = cid.some + return cid.success + +proc put*(b: var BlocksManifest, cid: Cid) = + b.manifest.cid = Cid.none + trace "Adding cid to manifest", cid + b.manifest.blocks.add(cid) + +proc contains*(b: BlocksManifest, cid: Cid): bool = + cid in b.manifest.blocks + +proc encode*(b: var BlocksManifest): ?!seq[byte] = + if b.manifest.cid.isNone: + b.manifest.cid = (? b.cid).some + + b.manifest.encode() + +proc init*( + T: type BlocksManifest, + blocks: openArray[Cid] = [], + version = CIDv1, + hcodec = multiCodec("sha2-256"), + codec = multiCodec("raw")): ?!T = + ## Create a manifest using array of `Cid`s + ## + + if hcodec notin EmptyDigests[version]: + return failure("Unsupported manifest hash codec!") + + T( + manifest: Manifest(blocks: @blocks), + version: version, + codec: codec, + hcodec: hcodec, + ).success + +proc init*( + T: type BlocksManifest, + data: openArray[byte]): ?!T = + ## Create manifest from a raw data blob + ## (in dag-pb for for now) + ## + + let + manifest = ? Manifest.decode(data) + cid = !manifest.cid + mhash = ? cid.mhash.mapFailure + + var blockManifest = ? BlocksManifest.init( + manifest.blocks, + cid.version, + mhash.mcodec, + cid.mcodec) + + if cid != ? blockManifest.cid: + return failure("Decoded content hash doesn't match!") + + blockManifest.success diff --git a/dagger/manifest.nim b/dagger/manifest.nim index bdcce58a..e032fbd0 100644 --- a/dagger/manifest.nim +++ b/dagger/manifest.nim @@ -1,5 +1,5 @@ ## Nim-Dagger -## Copyright (c) 2021 Status Research & Development GmbH +## Copyright (c) 2022 Status Research & Development GmbH ## Licensed under either of ## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) ## * MIT license ([LICENSE-MIT](LICENSE-MIT)) @@ -7,110 +7,23 @@ ## This file may not be copied, modified, or distributed except according to ## those terms. -{.push raises: [Defect].} - -import std/tables - -import pkg/libp2p import pkg/libp2p/protobuf/minprotobuf +import pkg/libp2p import pkg/questionable import pkg/questionable/results -import pkg/chronicles -import pkg/chronos -import ./blocktype import ./errors -const - ManifestCodec* = multiCodec("dag-pb") - -var - emptyDigests {.threadvar.}: array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]] - once {.threadvar.}: bool - -template EmptyDigests: untyped = - if not once: - emptyDigests = [ - CIDv0: { - multiCodec("sha2-256"): Cid - .init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku") - .get() - .mhash - .get() - }.toTable, - CIDv1: { - multiCodec("sha2-256"): Cid.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") - .get() - .mhash - .get() - }.toTable, - ] - - once = true - emptyDigests - type - BlocksManifest* = object - blocks: seq[Cid] - htree: ?Cid - version*: CidVersion - hcodec*: MultiCodec - codec*: MultiCodec + Manifest* = object + cid*: ?Cid + blocks*: seq[Cid] -proc len*(b: BlocksManifest): int = b.blocks.len - -iterator items*(b: BlocksManifest): Cid = - for b in b.blocks: - yield b - -proc hashBytes(mh: MultiHash): seq[byte] = - ## get the hash bytes of a multihash object - ## - - mh.data.buffer[mh.dpos..(mh.dpos + mh.size - 1)] - -proc cid*(b: var BlocksManifest): ?!Cid = - ## Generate a root hash using the treehash algorithm - ## - - if htree =? b.htree: - return htree.success - - var - stack: seq[MultiHash] - - for cid in b.blocks: - stack.add(? cid.mhash.mapFailure) - - while stack.len > 1: - let - (b1, b2) = (stack.pop(), stack.pop()) - mh = ? MultiHash.digest( - $b.hcodec, - (b1.hashBytes() & b2.hashBytes())) - .mapFailure - stack.add(mh) - - if stack.len == 1: - let cid = ? Cid.init( - b.version, - b.codec, - (? EmptyDigests[b.version][b.hcodec].catch)) - .mapFailure - b.htree = cid.some - return cid.success - -proc put*(b: var BlocksManifest, cid: Cid) = - b.htree = Cid.none - trace "Adding cid to manifest", cid - b.blocks.add(cid) - -proc contains*(b: BlocksManifest, cid: Cid): bool = - cid in b.blocks - -proc encode*(b: var BlocksManifest): ?!seq[byte] = +proc encode*(b: var Manifest): ?!seq[byte] = ## Encode the manifest into a ``ManifestCodec`` ## multicodec container (Dag-pb) for now + ## + var pbNode = initProtoBuffer() for c in b.blocks: @@ -119,75 +32,34 @@ proc encode*(b: var BlocksManifest): ?!seq[byte] = pbLink.finish() pbNode.write(2, pbLink) - let cid = ? b.cid + let cid = !b.cid pbNode.write(1, cid.data.buffer) # set the treeHash Cid as the data field pbNode.finish() return pbNode.buffer.success -proc decode*(_: type BlocksManifest, data: seq[byte]): ?!(Cid, seq[Cid]) = - ## Decode a manifest from a byte seq +proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = + ## Decode a manifest from a data blob ## + var pbNode = initProtoBuffer(data) cidBuf: seq[byte] blocks: seq[Cid] - if pbNode.getField(1, cidBuf).isOk: - let cid = ? Cid.init(cidBuf).mapFailure - var linksBuf: seq[seq[byte]] - if pbNode.getRepeatedField(2, linksBuf).isOk: - for pbLinkBuf in linksBuf: - var - blocksBuf: seq[seq[byte]] - blockBuf: seq[byte] - pbLink = initProtoBuffer(pbLinkBuf) + if pbNode.getField(1, cidBuf).isErr: + return failure("Unable to decode Cid from manifest!") - if pbLink.getField(1, blockBuf).isOk: - let cidRes = Cid.init(blockBuf) - if cidRes.isOk: - blocks.add(cidRes.get()) + let cid = ? Cid.init(cidBuf).mapFailure + var linksBuf: seq[seq[byte]] + if pbNode.getRepeatedField(2, linksBuf).isOk: + for pbLinkBuf in linksBuf: + var + blocksBuf: seq[seq[byte]] + blockBuf: seq[byte] + pbLink = initProtoBuffer(pbLinkBuf) - return (cid, blocks).success + if pbLink.getField(1, blockBuf).isOk: + blocks.add(? Cid.init(blockBuf).mapFailure) -proc init*( - T: type BlocksManifest, - blocks: openArray[Cid] = [], - version = CIDv1, - hcodec = multiCodec("sha2-256"), - codec = multiCodec("raw")): ?!T = - ## Create a manifest using array of `Cid`s - ## - - if hcodec notin EmptyDigests[version]: - return failure("Unsuported manifest hash codec!") - - T( - blocks: @blocks, - version: version, - codec: codec, - hcodec: hcodec, - ).success - -proc init*( - T: type BlocksManifest, - blk: Block): ?!T = - ## Create manifest from a raw manifest block - ## (in dag-pb for for now) - ## - - let - (cid, blocks) = ? BlocksManifest.decode(blk.data) - mhash = ? cid.mhash.mapFailure - - var - manifest = ? BlocksManifest.init( - blocks, - cid.version, - mhash.mcodec, - cid.mcodec) - - if cid != (? manifest.cid): - return failure("Content hashes don't match!") - - return manifest.success + Manifest(cid: cid.some, blocks: blocks).success diff --git a/dagger/node.nim b/dagger/node.nim index f41051a4..6d1e84d2 100644 --- a/dagger/node.nim +++ b/dagger/node.nim @@ -21,7 +21,7 @@ import pkg/libp2p/signed_envelope import ./chunker import ./blocktype as bt -import ./manifest +import ./blocksmanifest import ./stores/blockstore import ./blockexchange @@ -99,11 +99,10 @@ proc retrieve*( if mc == ManifestCodec: trace "Retrieving data set", cid, mc - let res = BlocksManifest.init(blk) - if (res.isErr): - return failure(res.error.msg) + without blockManifest =? BlocksManifest.init(blk.data): + return failure("Unable to construct manifest!") - asyncSpawn node.streamBlocks(stream, res.get()) + asyncSpawn node.streamBlocks(stream, blockManifest) else: asyncSpawn (proc(): Future[void] {.async.} = try: @@ -168,8 +167,8 @@ proc store*( return failure(cid.error.msg) trace "Stored data", manifestCid = manifest.cid, - contentCid = !cid, - blocks = blockManifest.len + contentCid = !cid, + blocks = blockManifest.len return manifest.cid.success diff --git a/tests/dagger/testmanifest.nim b/tests/dagger/testmanifest.nim index c43f9d36..5bc1a64e 100644 --- a/tests/dagger/testmanifest.nim +++ b/tests/dagger/testmanifest.nim @@ -9,7 +9,7 @@ import pkg/stew/byteutils import pkg/dagger/chunker import pkg/dagger/blocktype as bt -import pkg/dagger/manifest +import pkg/dagger/blocksmanifest import ./helpers @@ -47,10 +47,10 @@ suite "Manifest": ) var - manifest = BlocksManifest.init(blocks).tryGet() + blocksManifest = BlocksManifest.init(blocks).tryGet() let - e = manifest.encode().tryGet() - (cid, decoded) = BlocksManifest.decode(e).tryGet() + e = blocksManifest.encode().tryGet() + manifest = Manifest.decode(e).tryGet() - check decoded == blocks + check manifest.blocks == blocks diff --git a/tests/dagger/testnode.nim b/tests/dagger/testnode.nim index 3a666a74..a03f93f5 100644 --- a/tests/dagger/testnode.nim +++ b/tests/dagger/testnode.nim @@ -12,7 +12,7 @@ import pkg/dagger/stores import pkg/dagger/blockexchange import pkg/dagger/chunker import pkg/dagger/node -import pkg/dagger/manifest +import pkg/dagger/blocksmanifest import pkg/dagger/blocktype as bt import ./helpers @@ -75,7 +75,7 @@ suite "Test Node": var manifestBlock = (await localStore.getBlock(manifestCid)).tryGet() - localManifest = BlocksManifest.init(manifestBlock).tryGet() + localManifest = BlocksManifest.init(manifestBlock.data).tryGet() check: manifest.len == localManifest.len