From 3222f436cc1c6cee897929f520f6a10b1d741fac Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Mon, 4 Apr 2022 18:46:13 -0600 Subject: [PATCH] Protected manifest (#69) * add protected manifes type * encode protected manifest * add empty block type handling * add tests for protected manifest --- dagger/blocktype.nim | 93 +++++++++++++++++++++++++++++- dagger/manifest/coders.nim | 72 ++++++++++++++++++++---- dagger/manifest/manifest.nim | 103 ++++++++++++++++++++++------------ tests/dagger/testmanifest.nim | 58 ++++++++++++++++--- 4 files changed, 269 insertions(+), 57 deletions(-) diff --git a/dagger/blocktype.nim b/dagger/blocktype.nim index 2318228d..ce48d497 100644 --- a/dagger/blocktype.nim +++ b/dagger/blocktype.nim @@ -7,6 +7,9 @@ ## This file may not be copied, modified, or distributed except according to ## those terms. +import std/tables +export tables + import pkg/upraises push: {.upraises: [].} @@ -18,14 +21,100 @@ import pkg/questionable/results import ./errors +template EmptyCid*: untyped = + var + emptyCid {.global, threadvar.}: + array[CIDv0..CIDv1, Table[MultiCodec, Cid]] + + once: + emptyCid = [ + CIDv0: { + multiCodec("sha2-256"): Cid + .init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") + .get() + }.toTable, + CIDv1: { + multiCodec("sha2-256"): Cid + .init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku") + .get() + }.toTable, + ] + + emptyCid + +template EmptyDigests*: untyped = + var + emptyDigests {.global, threadvar.}: + array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]] + + once: + emptyDigests = [ + CIDv0: { + multiCodec("sha2-256"): EmptyCid[CIDv0] + .catch + .get()[multiCodec("sha2-256")] + .catch + .get() + .mhash + .get() + }.toTable, + CIDv1: { + multiCodec("sha2-256"): EmptyCid[CIDv0] + .catch + .get()[multiCodec("sha2-256")] + .catch + .get() + .mhash + .get() + }.toTable, + ] + + emptyDigests + +template EmptyBlock*: untyped = + var + emptyBlock {.global, threadvar.}: + array[CIDv0..CIDv1, Table[MultiCodec, Block]] + + once: + emptyBlock = [ + CIDv0: { + multiCodec("sha2-256"): Block( + cid: EmptyCid[CIDv1][multiCodec("sha2-256")]) + }.toTable, + CIDv1: { + multiCodec("sha2-256"): Block( + cid: EmptyCid[CIDv1][multiCodec("sha2-256")]) + }.toTable, + ] + + emptyBlock + const - BlockSize* = 31 * 64 * 4 # file chunk read size + BlockSize* = 31 * 64 * 4 # block size type Block* = ref object of RootObj cid*: Cid data*: seq[byte] +proc isEmpty*(cid: Cid): bool = + cid == EmptyCid[cid.cidver] + .catch + .get()[cid.mhash.get().mcodec] + .catch + .get() + +proc isEmpty*(blk: Block): bool = + blk.cid.isEmpty + +proc emptyBlock*(cid: Cid): Block = + EmptyBlock[cid.cidver] + .catch + .get()[cid.mhash.get().mcodec] + .catch + .get() + proc `$`*(b: Block): string = result &= "cid: " & $b.cid result &= "\ndata: " & string.fromBytes(b.data) @@ -62,6 +151,6 @@ func new*( mcodec = mhash.mcodec) if verify and cid != b.cid: - return "Cid's don't match!".failure + return "Cid and content don't match!".failure success b diff --git a/dagger/manifest/coders.nim b/dagger/manifest/coders.nim index 1189f677..d7ba0a2c 100644 --- a/dagger/manifest/coders.nim +++ b/dagger/manifest/coders.nim @@ -52,10 +52,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] = # contains the following protobuf `Message` # # ```protobuf + # Message ErasureInfo { + # optional uint32 K = 1; # number of encoded blocks + # optional uint32 M = 2; # number of parity blocks + # optional bytes cid = 3; # cid of the original dataset + # optional uint32 original = 4; # number of original blocks + # } # Message Header { - # optional bytes rootHash = 1; # the root (tree) hash - # optional uint32 blockSize = 2; # size of a single block - # optional uint32 blocksLen = 3; # total amount of blocks + # optional bytes rootHash = 1; # the root (tree) hash + # optional uint32 blockSize = 2; # size of a single block + # optional uint32 blocksLen = 3; # total amount of blocks + # optional ErasureInfo erasure = 4; # erasure coding info # } # ``` # @@ -65,8 +72,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] = header.write(1, cid.data.buffer) header.write(2, manifest.blockSize.uint32) header.write(3, manifest.len.uint32) + if manifest.protected: + var erasureInfo = initProtoBuffer() + erasureInfo.write(1, manifest.K.uint32) + erasureInfo.write(2, manifest.M.uint32) + erasureInfo.write(3, manifest.originalCid.data.buffer) + erasureInfo.write(4, manifest.originalLen.uint32) + erasureInfo.finish() - pbNode.write(1, header.buffer) # set the rootHash Cid as the data field + header.write(4, erasureInfo) + + pbNode.write(1, header) # set the rootHash Cid as the data field pbNode.finish() return pbNode.buffer.success @@ -78,9 +94,13 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest = var pbNode = initProtoBuffer(data) pbHeader: ProtoBuffer + pbErasureInfo: ProtoBuffer rootHash: seq[byte] + originalCid: seq[byte] blockSize: uint32 blocksLen: uint32 + originalLen: uint32 + K, M: uint32 blocks: seq[Cid] # Decode `Header` message @@ -97,6 +117,22 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest = if pbHeader.getField(3, blocksLen).isErr: return failure("Unable to decode `blocksLen` from manifest!") + if pbHeader.getField(4, pbErasureInfo).isErr: + return failure("Unable to decode `erasureInfo` from manifest!") + + if pbErasureInfo.buffer.len > 0: + if pbErasureInfo.getField(1, K).isErr: + return failure("Unable to decode `K` from manifest!") + + if pbErasureInfo.getField(2, M).isErr: + return failure("Unable to decode `M` from manifest!") + + if pbErasureInfo.getField(3, originalCid).isErr: + return failure("Unable to decode `originalCid` from manifest!") + + if pbErasureInfo.getField(4, originalLen).isErr: + return failure("Unable to decode `originalLen` from manifest!") + let rootHashCid = ? Cid.init(rootHash).mapFailure var linksBuf: seq[seq[byte]] if pbNode.getRepeatedField(2, linksBuf).isOk: @@ -112,15 +148,27 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest = if blocksLen.int != blocks.len: return failure("Total blocks and length of blocks in header don't match!") - Manifest( - rootHash: rootHashCid.some, - blockSize: blockSize.int, - blocks: blocks, - hcodec: (? rootHashCid.mhash.mapFailure).mcodec, - codec: rootHashCid.mcodec, - version: rootHashCid.cidver).success + var + self = Manifest( + rootHash: rootHashCid.some, + blockSize: blockSize.int, + blocks: blocks, + hcodec: (? rootHashCid.mhash.mapFailure).mcodec, + codec: rootHashCid.mcodec, + version: rootHashCid.cidver, + protected: pbErasureInfo.buffer.len > 0) -proc encode*(self: var Manifest, encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] = + if self.protected: + self.K = K.int + self.M = M.int + self.originalCid = ? Cid.init(originalCid).mapFailure + self.originalLen = originalLen.int + + self.success + +proc encode*( + self: Manifest, + encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] = ## Encode a manifest using `encoder` ## diff --git a/dagger/manifest/manifest.nim b/dagger/manifest/manifest.nim index aa8e5a49..5903c52e 100644 --- a/dagger/manifest/manifest.nim +++ b/dagger/manifest/manifest.nim @@ -11,7 +11,6 @@ import pkg/upraises push: {.upraises: [].} -import std/tables import pkg/libp2p/protobuf/minprotobuf import pkg/libp2p import pkg/questionable @@ -21,39 +20,22 @@ import pkg/chronicles import ../errors import ../blocktype -template EmptyDigests: untyped = - var - emptyDigests {.global, threadvar.}: - array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]] - - once: - emptyDigests = [ - CIDv0: { - multiCodec("sha2-256"): Cid - .init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku") - .get() - .mhash - .get() - }.toTable, - CIDv1: { - multiCodec("sha2-256"): Cid - .init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") - .get() - .mhash - .get() - }.toTable, - ] - - emptyDigests - type Manifest* = ref object of RootObj - rootHash*: ?Cid # root (tree) hash of the contained data set - blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed) - blocks*: seq[Cid] # block Cid - version*: CidVersion # Cid version - hcodec*: MultiCodec # Multihash codec - codec*: MultiCodec # Data set codec + rootHash*: ?Cid # root (tree) hash of the contained data set + blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed) + blocks*: seq[Cid] # block Cid + version*: CidVersion # Cid version + hcodec*: MultiCodec # Multihash codec + codec*: MultiCodec # Data set codec + case protected*: bool # Protected datasets have erasure coded info + of true: + K*: int # Number of blocks to encode + M*: int # Number of resulting parity blocks + originalCid*: Cid # The original Cid of the dataset being erasure coded + originalLen*: int # The length of the original manifest + else: + discard func len*(self: Manifest): int = self.blocks.len @@ -84,6 +66,10 @@ iterator items*(self: Manifest): Cid = for b in self.blocks: yield b +iterator pairs*(self: Manifest): tuple[key: int, val: Cid] = + for pair in self.blocks.pairs(): + yield pair + func contains*(self: Manifest, cid: Cid): bool = cid in self.blocks @@ -122,7 +108,16 @@ proc makeRoot*(self: Manifest): ?!void = self.rootHash = cid.some - ok() + success() + +func rounded*(self: Manifest): int = + if (self.originalLen mod self.K) != 0: + return self.originalLen + (self.K - (self.originalLen mod self.K)) + + self.originalLen + +func steps*(self: Manifest): int = + self.rounded div self.K # number of blocks per row proc cid*(self: Manifest): ?!Cid = ## Generate a root hash using the treehash algorithm @@ -136,6 +131,7 @@ proc cid*(self: Manifest): ?!Cid = proc new*( T: type Manifest, blocks: openArray[Cid] = [], + protected = false, version = CIDv1, hcodec = multiCodec("sha2-256"), codec = multiCodec("raw"), @@ -151,8 +147,45 @@ proc new*( version: version, codec: codec, hcodec: hcodec, - blockSize: blockSize - ).success + blockSize: blockSize, + protected: protected).success + +proc new*( + T: type Manifest, + manifest: Manifest, + K, M: int): ?!Manifest = + ## Create an erasure protected dataset from an + ## un-protected one + ## + + var + self = Manifest( + version: manifest.version, + codec: manifest.codec, + hcodec: manifest.hcodec, + blockSize: manifest.blockSize, + protected: true, + K: K, M: M, + originalCid: ? manifest.cid, + originalLen: manifest.len) + + let + encodedLen = self.rounded + (self.steps * M) + + self.blocks = newSeq[Cid](encodedLen) + + # copy original manifest blocks + for i in 0.. 0 + check MultiHash.decode(encoded, mh).tryGet() > 0 - let checkSumCid = Cid.init(manifest.version, manifest.codec, mh).tryGet() - check checkSumCid == manifest.cid.tryGet() + let encodedCid = Cid.init(manifest.version, manifest.codec, mh).tryGet() + check: + encodedCid == manifest.cid.tryGet() test "Should encode/decode to/from manifest": let @@ -46,10 +47,51 @@ suite "Manifest": ) var - blocksManifest = Manifest.new(blocks).tryGet() + manifest = Manifest.new(blocks).tryGet() let - e = blocksManifest.encode().tryGet() - manifest = Manifest.decode(e).tryGet() + e = manifest.encode().tryGet() + decoded = Manifest.decode(e).tryGet() - check manifest.blocks == blocks + check: + decoded.blocks == blocks + decoded.protected == false + + test "Should produce a protected manifest": + let + blocks = (0..<333).mapIt( + Block.new(("Block " & $it).toBytes).tryGet().cid + ) + manifest = Manifest.new(blocks).tryGet() + protected = Manifest.new(manifest, 2, 2).tryGet() + + check: + protected.originalCid == manifest.cid.tryGet() + protected.blocks[0..<333] == manifest.blocks + protected.protected == true + protected.originalLen == manifest.len + + # fill up with empty Cid's + for i in protected.rounded..