Protected manifest (#69)

* add protected manifes type

* encode protected manifest

* add empty block type handling

* add tests for protected manifest
This commit is contained in:
Dmitriy Ryajov 2022-04-04 18:46:13 -06:00 committed by GitHub
parent 8146198869
commit 3222f436cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 269 additions and 57 deletions

View File

@ -7,6 +7,9 @@
## This file may not be copied, modified, or distributed except according to ## This file may not be copied, modified, or distributed except according to
## those terms. ## those terms.
import std/tables
export tables
import pkg/upraises import pkg/upraises
push: {.upraises: [].} push: {.upraises: [].}
@ -18,14 +21,100 @@ import pkg/questionable/results
import ./errors import ./errors
template EmptyCid*: untyped =
var
emptyCid {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, Cid]]
once:
emptyCid = [
CIDv0: {
multiCodec("sha2-256"): Cid
.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n")
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Cid
.init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
.get()
}.toTable,
]
emptyCid
template EmptyDigests*: untyped =
var
emptyDigests {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]]
once:
emptyDigests = [
CIDv0: {
multiCodec("sha2-256"): EmptyCid[CIDv0]
.catch
.get()[multiCodec("sha2-256")]
.catch
.get()
.mhash
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): EmptyCid[CIDv0]
.catch
.get()[multiCodec("sha2-256")]
.catch
.get()
.mhash
.get()
}.toTable,
]
emptyDigests
template EmptyBlock*: untyped =
var
emptyBlock {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, Block]]
once:
emptyBlock = [
CIDv0: {
multiCodec("sha2-256"): Block(
cid: EmptyCid[CIDv1][multiCodec("sha2-256")])
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Block(
cid: EmptyCid[CIDv1][multiCodec("sha2-256")])
}.toTable,
]
emptyBlock
const const
BlockSize* = 31 * 64 * 4 # file chunk read size BlockSize* = 31 * 64 * 4 # block size
type type
Block* = ref object of RootObj Block* = ref object of RootObj
cid*: Cid cid*: Cid
data*: seq[byte] data*: seq[byte]
proc isEmpty*(cid: Cid): bool =
cid == EmptyCid[cid.cidver]
.catch
.get()[cid.mhash.get().mcodec]
.catch
.get()
proc isEmpty*(blk: Block): bool =
blk.cid.isEmpty
proc emptyBlock*(cid: Cid): Block =
EmptyBlock[cid.cidver]
.catch
.get()[cid.mhash.get().mcodec]
.catch
.get()
proc `$`*(b: Block): string = proc `$`*(b: Block): string =
result &= "cid: " & $b.cid result &= "cid: " & $b.cid
result &= "\ndata: " & string.fromBytes(b.data) result &= "\ndata: " & string.fromBytes(b.data)
@ -62,6 +151,6 @@ func new*(
mcodec = mhash.mcodec) mcodec = mhash.mcodec)
if verify and cid != b.cid: if verify and cid != b.cid:
return "Cid's don't match!".failure return "Cid and content don't match!".failure
success b success b

View File

@ -52,10 +52,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] =
# contains the following protobuf `Message` # contains the following protobuf `Message`
# #
# ```protobuf # ```protobuf
# Message ErasureInfo {
# optional uint32 K = 1; # number of encoded blocks
# optional uint32 M = 2; # number of parity blocks
# optional bytes cid = 3; # cid of the original dataset
# optional uint32 original = 4; # number of original blocks
# }
# Message Header { # Message Header {
# optional bytes rootHash = 1; # the root (tree) hash # optional bytes rootHash = 1; # the root (tree) hash
# optional uint32 blockSize = 2; # size of a single block # optional uint32 blockSize = 2; # size of a single block
# optional uint32 blocksLen = 3; # total amount of blocks # optional uint32 blocksLen = 3; # total amount of blocks
# optional ErasureInfo erasure = 4; # erasure coding info
# } # }
# ``` # ```
# #
@ -65,8 +72,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] =
header.write(1, cid.data.buffer) header.write(1, cid.data.buffer)
header.write(2, manifest.blockSize.uint32) header.write(2, manifest.blockSize.uint32)
header.write(3, manifest.len.uint32) header.write(3, manifest.len.uint32)
if manifest.protected:
var erasureInfo = initProtoBuffer()
erasureInfo.write(1, manifest.K.uint32)
erasureInfo.write(2, manifest.M.uint32)
erasureInfo.write(3, manifest.originalCid.data.buffer)
erasureInfo.write(4, manifest.originalLen.uint32)
erasureInfo.finish()
pbNode.write(1, header.buffer) # set the rootHash Cid as the data field header.write(4, erasureInfo)
pbNode.write(1, header) # set the rootHash Cid as the data field
pbNode.finish() pbNode.finish()
return pbNode.buffer.success return pbNode.buffer.success
@ -78,9 +94,13 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
var var
pbNode = initProtoBuffer(data) pbNode = initProtoBuffer(data)
pbHeader: ProtoBuffer pbHeader: ProtoBuffer
pbErasureInfo: ProtoBuffer
rootHash: seq[byte] rootHash: seq[byte]
originalCid: seq[byte]
blockSize: uint32 blockSize: uint32
blocksLen: uint32 blocksLen: uint32
originalLen: uint32
K, M: uint32
blocks: seq[Cid] blocks: seq[Cid]
# Decode `Header` message # Decode `Header` message
@ -97,6 +117,22 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
if pbHeader.getField(3, blocksLen).isErr: if pbHeader.getField(3, blocksLen).isErr:
return failure("Unable to decode `blocksLen` from manifest!") return failure("Unable to decode `blocksLen` from manifest!")
if pbHeader.getField(4, pbErasureInfo).isErr:
return failure("Unable to decode `erasureInfo` from manifest!")
if pbErasureInfo.buffer.len > 0:
if pbErasureInfo.getField(1, K).isErr:
return failure("Unable to decode `K` from manifest!")
if pbErasureInfo.getField(2, M).isErr:
return failure("Unable to decode `M` from manifest!")
if pbErasureInfo.getField(3, originalCid).isErr:
return failure("Unable to decode `originalCid` from manifest!")
if pbErasureInfo.getField(4, originalLen).isErr:
return failure("Unable to decode `originalLen` from manifest!")
let rootHashCid = ? Cid.init(rootHash).mapFailure let rootHashCid = ? Cid.init(rootHash).mapFailure
var linksBuf: seq[seq[byte]] var linksBuf: seq[seq[byte]]
if pbNode.getRepeatedField(2, linksBuf).isOk: if pbNode.getRepeatedField(2, linksBuf).isOk:
@ -112,15 +148,27 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
if blocksLen.int != blocks.len: if blocksLen.int != blocks.len:
return failure("Total blocks and length of blocks in header don't match!") return failure("Total blocks and length of blocks in header don't match!")
Manifest( var
rootHash: rootHashCid.some, self = Manifest(
blockSize: blockSize.int, rootHash: rootHashCid.some,
blocks: blocks, blockSize: blockSize.int,
hcodec: (? rootHashCid.mhash.mapFailure).mcodec, blocks: blocks,
codec: rootHashCid.mcodec, hcodec: (? rootHashCid.mhash.mapFailure).mcodec,
version: rootHashCid.cidver).success codec: rootHashCid.mcodec,
version: rootHashCid.cidver,
protected: pbErasureInfo.buffer.len > 0)
proc encode*(self: var Manifest, encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] = if self.protected:
self.K = K.int
self.M = M.int
self.originalCid = ? Cid.init(originalCid).mapFailure
self.originalLen = originalLen.int
self.success
proc encode*(
self: Manifest,
encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] =
## Encode a manifest using `encoder` ## Encode a manifest using `encoder`
## ##

View File

@ -11,7 +11,6 @@ import pkg/upraises
push: {.upraises: [].} push: {.upraises: [].}
import std/tables
import pkg/libp2p/protobuf/minprotobuf import pkg/libp2p/protobuf/minprotobuf
import pkg/libp2p import pkg/libp2p
import pkg/questionable import pkg/questionable
@ -21,39 +20,22 @@ import pkg/chronicles
import ../errors import ../errors
import ../blocktype import ../blocktype
template EmptyDigests: untyped =
var
emptyDigests {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]]
once:
emptyDigests = [
CIDv0: {
multiCodec("sha2-256"): Cid
.init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
.get()
.mhash
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Cid
.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n")
.get()
.mhash
.get()
}.toTable,
]
emptyDigests
type type
Manifest* = ref object of RootObj Manifest* = ref object of RootObj
rootHash*: ?Cid # root (tree) hash of the contained data set rootHash*: ?Cid # root (tree) hash of the contained data set
blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed) blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed)
blocks*: seq[Cid] # block Cid blocks*: seq[Cid] # block Cid
version*: CidVersion # Cid version version*: CidVersion # Cid version
hcodec*: MultiCodec # Multihash codec hcodec*: MultiCodec # Multihash codec
codec*: MultiCodec # Data set codec codec*: MultiCodec # Data set codec
case protected*: bool # Protected datasets have erasure coded info
of true:
K*: int # Number of blocks to encode
M*: int # Number of resulting parity blocks
originalCid*: Cid # The original Cid of the dataset being erasure coded
originalLen*: int # The length of the original manifest
else:
discard
func len*(self: Manifest): int = func len*(self: Manifest): int =
self.blocks.len self.blocks.len
@ -84,6 +66,10 @@ iterator items*(self: Manifest): Cid =
for b in self.blocks: for b in self.blocks:
yield b yield b
iterator pairs*(self: Manifest): tuple[key: int, val: Cid] =
for pair in self.blocks.pairs():
yield pair
func contains*(self: Manifest, cid: Cid): bool = func contains*(self: Manifest, cid: Cid): bool =
cid in self.blocks cid in self.blocks
@ -122,7 +108,16 @@ proc makeRoot*(self: Manifest): ?!void =
self.rootHash = cid.some self.rootHash = cid.some
ok() success()
func rounded*(self: Manifest): int =
if (self.originalLen mod self.K) != 0:
return self.originalLen + (self.K - (self.originalLen mod self.K))
self.originalLen
func steps*(self: Manifest): int =
self.rounded div self.K # number of blocks per row
proc cid*(self: Manifest): ?!Cid = proc cid*(self: Manifest): ?!Cid =
## Generate a root hash using the treehash algorithm ## Generate a root hash using the treehash algorithm
@ -136,6 +131,7 @@ proc cid*(self: Manifest): ?!Cid =
proc new*( proc new*(
T: type Manifest, T: type Manifest,
blocks: openArray[Cid] = [], blocks: openArray[Cid] = [],
protected = false,
version = CIDv1, version = CIDv1,
hcodec = multiCodec("sha2-256"), hcodec = multiCodec("sha2-256"),
codec = multiCodec("raw"), codec = multiCodec("raw"),
@ -151,8 +147,45 @@ proc new*(
version: version, version: version,
codec: codec, codec: codec,
hcodec: hcodec, hcodec: hcodec,
blockSize: blockSize blockSize: blockSize,
).success protected: protected).success
proc new*(
T: type Manifest,
manifest: Manifest,
K, M: int): ?!Manifest =
## Create an erasure protected dataset from an
## un-protected one
##
var
self = Manifest(
version: manifest.version,
codec: manifest.codec,
hcodec: manifest.hcodec,
blockSize: manifest.blockSize,
protected: true,
K: K, M: M,
originalCid: ? manifest.cid,
originalLen: manifest.len)
let
encodedLen = self.rounded + (self.steps * M)
self.blocks = newSeq[Cid](encodedLen)
# copy original manifest blocks
for i in 0..<self.rounded:
if i < manifest.len:
self.blocks[i] = manifest[i]
else:
self.blocks[i] = EmptyCid[manifest.version]
.catch
.get()[manifest.hcodec]
.catch
.get()
self.success
proc new*( proc new*(
T: type Manifest, T: type Manifest,

View File

@ -27,17 +27,18 @@ suite "Manifest":
]).tryGet() ]).tryGet()
let let
checksum = @[18.byte, 32, 227, 176, 196, 66, 152, encoded = @[byte 18, 32, 227, 176, 196, 66, 152,
252, 28, 20, 154, 251, 244, 200, 153, 252, 28, 20, 154, 251, 244, 200, 153,
111, 185, 36, 39, 174, 65, 228, 100, 111, 185, 36, 39, 174, 65, 228, 100,
155, 147, 76, 164, 149, 153, 27, 120, 155, 147, 76, 164, 149, 153, 27, 120,
82, 184, 85] 82, 184, 85]
var mh: MultiHash var mh: MultiHash
check MultiHash.decode(checksum, mh).tryGet() > 0 check MultiHash.decode(encoded, mh).tryGet() > 0
let checkSumCid = Cid.init(manifest.version, manifest.codec, mh).tryGet() let encodedCid = Cid.init(manifest.version, manifest.codec, mh).tryGet()
check checkSumCid == manifest.cid.tryGet() check:
encodedCid == manifest.cid.tryGet()
test "Should encode/decode to/from manifest": test "Should encode/decode to/from manifest":
let let
@ -46,10 +47,51 @@ suite "Manifest":
) )
var var
blocksManifest = Manifest.new(blocks).tryGet() manifest = Manifest.new(blocks).tryGet()
let let
e = blocksManifest.encode().tryGet() e = manifest.encode().tryGet()
manifest = Manifest.decode(e).tryGet() decoded = Manifest.decode(e).tryGet()
check manifest.blocks == blocks check:
decoded.blocks == blocks
decoded.protected == false
test "Should produce a protected manifest":
let
blocks = (0..<333).mapIt(
Block.new(("Block " & $it).toBytes).tryGet().cid
)
manifest = Manifest.new(blocks).tryGet()
protected = Manifest.new(manifest, 2, 2).tryGet()
check:
protected.originalCid == manifest.cid.tryGet()
protected.blocks[0..<333] == manifest.blocks
protected.protected == true
protected.originalLen == manifest.len
# fill up with empty Cid's
for i in protected.rounded..<protected.len:
protected.blocks[i] = EmptyCid[manifest.version]
.catch
.get()[manifest.hcodec]
.catch
.get()
var
encoded = protected.encode().tryGet()
decoded = Manifest.decode(encoded).tryGet()
check:
decoded.protected == true
decoded.originalLen == manifest.len
decoded.K == protected.K
decoded.M == protected.M
decoded.originalCid == protected.originalCid
decoded.originalCid == manifest.cid.tryGet()
decoded.blocks == protected.blocks
decoded.blocks[0..<333] == manifest.blocks