Protected manifest (#69)

* add protected manifes type

* encode protected manifest

* add empty block type handling

* add tests for protected manifest
This commit is contained in:
Dmitriy Ryajov 2022-04-04 18:46:13 -06:00 committed by GitHub
parent 8146198869
commit 3222f436cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 269 additions and 57 deletions

View File

@ -7,6 +7,9 @@
## This file may not be copied, modified, or distributed except according to
## those terms.
import std/tables
export tables
import pkg/upraises
push: {.upraises: [].}
@ -18,14 +21,100 @@ import pkg/questionable/results
import ./errors
template EmptyCid*: untyped =
var
emptyCid {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, Cid]]
once:
emptyCid = [
CIDv0: {
multiCodec("sha2-256"): Cid
.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n")
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Cid
.init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
.get()
}.toTable,
]
emptyCid
template EmptyDigests*: untyped =
var
emptyDigests {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]]
once:
emptyDigests = [
CIDv0: {
multiCodec("sha2-256"): EmptyCid[CIDv0]
.catch
.get()[multiCodec("sha2-256")]
.catch
.get()
.mhash
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): EmptyCid[CIDv0]
.catch
.get()[multiCodec("sha2-256")]
.catch
.get()
.mhash
.get()
}.toTable,
]
emptyDigests
template EmptyBlock*: untyped =
var
emptyBlock {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, Block]]
once:
emptyBlock = [
CIDv0: {
multiCodec("sha2-256"): Block(
cid: EmptyCid[CIDv1][multiCodec("sha2-256")])
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Block(
cid: EmptyCid[CIDv1][multiCodec("sha2-256")])
}.toTable,
]
emptyBlock
const
BlockSize* = 31 * 64 * 4 # file chunk read size
BlockSize* = 31 * 64 * 4 # block size
type
Block* = ref object of RootObj
cid*: Cid
data*: seq[byte]
proc isEmpty*(cid: Cid): bool =
cid == EmptyCid[cid.cidver]
.catch
.get()[cid.mhash.get().mcodec]
.catch
.get()
proc isEmpty*(blk: Block): bool =
blk.cid.isEmpty
proc emptyBlock*(cid: Cid): Block =
EmptyBlock[cid.cidver]
.catch
.get()[cid.mhash.get().mcodec]
.catch
.get()
proc `$`*(b: Block): string =
result &= "cid: " & $b.cid
result &= "\ndata: " & string.fromBytes(b.data)
@ -62,6 +151,6 @@ func new*(
mcodec = mhash.mcodec)
if verify and cid != b.cid:
return "Cid's don't match!".failure
return "Cid and content don't match!".failure
success b

View File

@ -52,10 +52,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] =
# contains the following protobuf `Message`
#
# ```protobuf
# Message ErasureInfo {
# optional uint32 K = 1; # number of encoded blocks
# optional uint32 M = 2; # number of parity blocks
# optional bytes cid = 3; # cid of the original dataset
# optional uint32 original = 4; # number of original blocks
# }
# Message Header {
# optional bytes rootHash = 1; # the root (tree) hash
# optional uint32 blockSize = 2; # size of a single block
# optional uint32 blocksLen = 3; # total amount of blocks
# optional bytes rootHash = 1; # the root (tree) hash
# optional uint32 blockSize = 2; # size of a single block
# optional uint32 blocksLen = 3; # total amount of blocks
# optional ErasureInfo erasure = 4; # erasure coding info
# }
# ```
#
@ -65,8 +72,17 @@ func encode*(_: DagPBCoder, manifest: Manifest): ?!seq[byte] =
header.write(1, cid.data.buffer)
header.write(2, manifest.blockSize.uint32)
header.write(3, manifest.len.uint32)
if manifest.protected:
var erasureInfo = initProtoBuffer()
erasureInfo.write(1, manifest.K.uint32)
erasureInfo.write(2, manifest.M.uint32)
erasureInfo.write(3, manifest.originalCid.data.buffer)
erasureInfo.write(4, manifest.originalLen.uint32)
erasureInfo.finish()
pbNode.write(1, header.buffer) # set the rootHash Cid as the data field
header.write(4, erasureInfo)
pbNode.write(1, header) # set the rootHash Cid as the data field
pbNode.finish()
return pbNode.buffer.success
@ -78,9 +94,13 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
var
pbNode = initProtoBuffer(data)
pbHeader: ProtoBuffer
pbErasureInfo: ProtoBuffer
rootHash: seq[byte]
originalCid: seq[byte]
blockSize: uint32
blocksLen: uint32
originalLen: uint32
K, M: uint32
blocks: seq[Cid]
# Decode `Header` message
@ -97,6 +117,22 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
if pbHeader.getField(3, blocksLen).isErr:
return failure("Unable to decode `blocksLen` from manifest!")
if pbHeader.getField(4, pbErasureInfo).isErr:
return failure("Unable to decode `erasureInfo` from manifest!")
if pbErasureInfo.buffer.len > 0:
if pbErasureInfo.getField(1, K).isErr:
return failure("Unable to decode `K` from manifest!")
if pbErasureInfo.getField(2, M).isErr:
return failure("Unable to decode `M` from manifest!")
if pbErasureInfo.getField(3, originalCid).isErr:
return failure("Unable to decode `originalCid` from manifest!")
if pbErasureInfo.getField(4, originalLen).isErr:
return failure("Unable to decode `originalLen` from manifest!")
let rootHashCid = ? Cid.init(rootHash).mapFailure
var linksBuf: seq[seq[byte]]
if pbNode.getRepeatedField(2, linksBuf).isOk:
@ -112,15 +148,27 @@ func decode*(_: DagPBCoder, data: openArray[byte]): ?!Manifest =
if blocksLen.int != blocks.len:
return failure("Total blocks and length of blocks in header don't match!")
Manifest(
rootHash: rootHashCid.some,
blockSize: blockSize.int,
blocks: blocks,
hcodec: (? rootHashCid.mhash.mapFailure).mcodec,
codec: rootHashCid.mcodec,
version: rootHashCid.cidver).success
var
self = Manifest(
rootHash: rootHashCid.some,
blockSize: blockSize.int,
blocks: blocks,
hcodec: (? rootHashCid.mhash.mapFailure).mcodec,
codec: rootHashCid.mcodec,
version: rootHashCid.cidver,
protected: pbErasureInfo.buffer.len > 0)
proc encode*(self: var Manifest, encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] =
if self.protected:
self.K = K.int
self.M = M.int
self.originalCid = ? Cid.init(originalCid).mapFailure
self.originalLen = originalLen.int
self.success
proc encode*(
self: Manifest,
encoder = ManifestContainers[$DagPBCodec]): ?!seq[byte] =
## Encode a manifest using `encoder`
##

View File

@ -11,7 +11,6 @@ import pkg/upraises
push: {.upraises: [].}
import std/tables
import pkg/libp2p/protobuf/minprotobuf
import pkg/libp2p
import pkg/questionable
@ -21,39 +20,22 @@ import pkg/chronicles
import ../errors
import ../blocktype
template EmptyDigests: untyped =
var
emptyDigests {.global, threadvar.}:
array[CIDv0..CIDv1, Table[MultiCodec, MultiHash]]
once:
emptyDigests = [
CIDv0: {
multiCodec("sha2-256"): Cid
.init("bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
.get()
.mhash
.get()
}.toTable,
CIDv1: {
multiCodec("sha2-256"): Cid
.init("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n")
.get()
.mhash
.get()
}.toTable,
]
emptyDigests
type
Manifest* = ref object of RootObj
rootHash*: ?Cid # root (tree) hash of the contained data set
blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed)
blocks*: seq[Cid] # block Cid
version*: CidVersion # Cid version
hcodec*: MultiCodec # Multihash codec
codec*: MultiCodec # Data set codec
rootHash*: ?Cid # root (tree) hash of the contained data set
blockSize*: int # size of each contained block (might not be needed if blocks are len-prefixed)
blocks*: seq[Cid] # block Cid
version*: CidVersion # Cid version
hcodec*: MultiCodec # Multihash codec
codec*: MultiCodec # Data set codec
case protected*: bool # Protected datasets have erasure coded info
of true:
K*: int # Number of blocks to encode
M*: int # Number of resulting parity blocks
originalCid*: Cid # The original Cid of the dataset being erasure coded
originalLen*: int # The length of the original manifest
else:
discard
func len*(self: Manifest): int =
self.blocks.len
@ -84,6 +66,10 @@ iterator items*(self: Manifest): Cid =
for b in self.blocks:
yield b
iterator pairs*(self: Manifest): tuple[key: int, val: Cid] =
for pair in self.blocks.pairs():
yield pair
func contains*(self: Manifest, cid: Cid): bool =
cid in self.blocks
@ -122,7 +108,16 @@ proc makeRoot*(self: Manifest): ?!void =
self.rootHash = cid.some
ok()
success()
func rounded*(self: Manifest): int =
if (self.originalLen mod self.K) != 0:
return self.originalLen + (self.K - (self.originalLen mod self.K))
self.originalLen
func steps*(self: Manifest): int =
self.rounded div self.K # number of blocks per row
proc cid*(self: Manifest): ?!Cid =
## Generate a root hash using the treehash algorithm
@ -136,6 +131,7 @@ proc cid*(self: Manifest): ?!Cid =
proc new*(
T: type Manifest,
blocks: openArray[Cid] = [],
protected = false,
version = CIDv1,
hcodec = multiCodec("sha2-256"),
codec = multiCodec("raw"),
@ -151,8 +147,45 @@ proc new*(
version: version,
codec: codec,
hcodec: hcodec,
blockSize: blockSize
).success
blockSize: blockSize,
protected: protected).success
proc new*(
T: type Manifest,
manifest: Manifest,
K, M: int): ?!Manifest =
## Create an erasure protected dataset from an
## un-protected one
##
var
self = Manifest(
version: manifest.version,
codec: manifest.codec,
hcodec: manifest.hcodec,
blockSize: manifest.blockSize,
protected: true,
K: K, M: M,
originalCid: ? manifest.cid,
originalLen: manifest.len)
let
encodedLen = self.rounded + (self.steps * M)
self.blocks = newSeq[Cid](encodedLen)
# copy original manifest blocks
for i in 0..<self.rounded:
if i < manifest.len:
self.blocks[i] = manifest[i]
else:
self.blocks[i] = EmptyCid[manifest.version]
.catch
.get()[manifest.hcodec]
.catch
.get()
self.success
proc new*(
T: type Manifest,

View File

@ -27,17 +27,18 @@ suite "Manifest":
]).tryGet()
let
checksum = @[18.byte, 32, 227, 176, 196, 66, 152,
encoded = @[byte 18, 32, 227, 176, 196, 66, 152,
252, 28, 20, 154, 251, 244, 200, 153,
111, 185, 36, 39, 174, 65, 228, 100,
155, 147, 76, 164, 149, 153, 27, 120,
82, 184, 85]
var mh: MultiHash
check MultiHash.decode(checksum, mh).tryGet() > 0
check MultiHash.decode(encoded, mh).tryGet() > 0
let checkSumCid = Cid.init(manifest.version, manifest.codec, mh).tryGet()
check checkSumCid == manifest.cid.tryGet()
let encodedCid = Cid.init(manifest.version, manifest.codec, mh).tryGet()
check:
encodedCid == manifest.cid.tryGet()
test "Should encode/decode to/from manifest":
let
@ -46,10 +47,51 @@ suite "Manifest":
)
var
blocksManifest = Manifest.new(blocks).tryGet()
manifest = Manifest.new(blocks).tryGet()
let
e = blocksManifest.encode().tryGet()
manifest = Manifest.decode(e).tryGet()
e = manifest.encode().tryGet()
decoded = Manifest.decode(e).tryGet()
check manifest.blocks == blocks
check:
decoded.blocks == blocks
decoded.protected == false
test "Should produce a protected manifest":
let
blocks = (0..<333).mapIt(
Block.new(("Block " & $it).toBytes).tryGet().cid
)
manifest = Manifest.new(blocks).tryGet()
protected = Manifest.new(manifest, 2, 2).tryGet()
check:
protected.originalCid == manifest.cid.tryGet()
protected.blocks[0..<333] == manifest.blocks
protected.protected == true
protected.originalLen == manifest.len
# fill up with empty Cid's
for i in protected.rounded..<protected.len:
protected.blocks[i] = EmptyCid[manifest.version]
.catch
.get()[manifest.hcodec]
.catch
.get()
var
encoded = protected.encode().tryGet()
decoded = Manifest.decode(encoded).tryGet()
check:
decoded.protected == true
decoded.originalLen == manifest.len
decoded.K == protected.K
decoded.M == protected.M
decoded.originalCid == protected.originalCid
decoded.originalCid == manifest.cid.tryGet()
decoded.blocks == protected.blocks
decoded.blocks[0..<333] == manifest.blocks