From afec86b3cfb1fe897f75bebf5cbbc03504b61456 Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Wed, 7 Feb 2024 14:54:57 -0600 Subject: [PATCH] cleanup manifest and wire in strategy (#696) --- codex/erasure/backend.nim | 14 +-- codex/erasure/erasure.nim | 39 +++++---- codex/manifest/coders.nim | 27 +++++- codex/manifest/manifest.nim | 120 ++++++++++++++++---------- tests/codex/slots/provingtestenv.nim | 3 +- tests/codex/slots/testslotbuilder.nim | 9 +- tests/codex/testerasure.nim | 1 + tests/codex/testmanifest.nim | 4 +- 8 files changed, 136 insertions(+), 81 deletions(-) diff --git a/codex/erasure/backend.nim b/codex/erasure/backend.nim index 2fa53d68..728c8bed 100644 --- a/codex/erasure/backend.nim +++ b/codex/erasure/backend.nim @@ -14,17 +14,17 @@ push: {.upraises: [].} import ../stores type - Backend* = ref object of RootObj + ErasureBackend* = ref object of RootObj blockSize*: int # block size in bytes buffers*: int # number of original pieces parity*: int # number of redundancy pieces - EncoderBackend* = ref object of Backend - DecoderBackend* = ref object of Backend + EncoderBackend* = ref object of ErasureBackend + DecoderBackend* = ref object of ErasureBackend -method release*(self: Backend) {.base.} = +method release*(self: ErasureBackend) {.base.} = ## release the backend - ## + ## raiseAssert("not implemented!") method encode*( @@ -33,7 +33,7 @@ method encode*( parity: var openArray[seq[byte]] ): Result[void, cstring] {.base.} = ## encode buffers using a backend - ## + ## raiseAssert("not implemented!") method decode*( @@ -43,5 +43,5 @@ method decode*( recovered: var openArray[seq[byte]] ): Result[void, cstring] {.base.} = ## decode buffers using a backend - ## + ## raiseAssert("not implemented!") diff --git a/codex/erasure/erasure.nim b/codex/erasure/erasure.nim index 6ef6e4ee..92b227aa 100644 --- a/codex/erasure/erasure.nim +++ b/codex/erasure/erasure.nim @@ -77,6 +77,7 @@ type rounded: Natural steps: Natural blocksCount: Natural + strategy: StrategyType func indexToPos(steps, idx, step: int): int {.inline.} = ## Convert an index to a position in the encoded @@ -130,7 +131,7 @@ proc prepareEncodingData( ## let - strategy = SteppedStrategy.init( + strategy = params.strategy.init( firstIndex = 0, lastIndex = params.rounded - 1, iterations = params.steps @@ -179,7 +180,7 @@ proc prepareDecodingData( ## let - strategy = SteppedStrategy.init( + strategy = encoded.protectedStrategy.init( firstIndex = 0, lastIndex = encoded.blocksCount - 1, iterations = encoded.steps @@ -229,7 +230,8 @@ proc prepareDecodingData( proc init*( _: type EncodingParams, manifest: Manifest, - ecK: Natural, ecM: Natural): ?!EncodingParams = + ecK: Natural, ecM: Natural, + strategy: StrategyType): ?!EncodingParams = if ecK > manifest.blocksCount: return failure( "Unable to encode manifest, not enough blocks, ecK = " & @@ -242,13 +244,14 @@ proc init*( steps = divUp(manifest.blocksCount, ecK) blocksCount = rounded + (steps * ecM) - EncodingParams( + success EncodingParams( ecK: ecK, ecM: ecM, rounded: rounded, steps: steps, - blocksCount: blocksCount - ).success + blocksCount: blocksCount, + strategy: strategy + ) proc encodeData( self: Erasure, @@ -327,11 +330,12 @@ proc encodeData( treeCid = treeCid, datasetSize = (manifest.blockSize.int * params.blocksCount).NBytes, ecK = params.ecK, - ecM = params.ecM + ecM = params.ecM, + strategy = params.strategy ) trace "Encoded data successfully", treeCid, blocksCount = params.blocksCount - return encodedManifest.success + success encodedManifest except CancelledError as exc: trace "Erasure coding encoding cancelled" raise exc # cancellation needs to be propagated @@ -345,7 +349,8 @@ proc encode*( self: Erasure, manifest: Manifest, blocks: Natural, - parity: Natural): Future[?!Manifest] {.async.} = + parity: Natural, + strategy = SteppedStrategy): Future[?!Manifest] {.async.} = ## Encode a manifest into one that is erasure protected. ## ## `manifest` - the original manifest to be encoded @@ -353,7 +358,7 @@ proc encode*( ## `parity` - the number of parity blocks to generate - M ## - without params =? EncodingParams.init(manifest, blocks.int, parity.int), err: + without params =? EncodingParams.init(manifest, blocks.int, parity.int, strategy), err: return failure(err) without encodedManifest =? await self.encodeData(manifest, params), err: @@ -362,9 +367,8 @@ proc encode*( return success encodedManifest proc decode*( - self: Erasure, - encoded: Manifest -): Future[?!Manifest] {.async.} = + self: Erasure, + encoded: Manifest): Future[?!Manifest] {.async.} = ## Decode a protected manifest into it's original ## manifest ## @@ -465,11 +469,10 @@ proc stop*(self: Erasure) {.async.} = return proc new*( - T: type Erasure, - store: BlockStore, - encoderProvider: EncoderProvider, - decoderProvider: DecoderProvider -): Erasure = + T: type Erasure, + store: BlockStore, + encoderProvider: EncoderProvider, + decoderProvider: DecoderProvider): Erasure = ## Create a new Erasure instance for encoding and decoding manifests Erasure( diff --git a/codex/manifest/coders.nim b/codex/manifest/coders.nim index 1106c9bc..2e824e4d 100644 --- a/codex/manifest/coders.nim +++ b/codex/manifest/coders.nim @@ -25,6 +25,7 @@ import ./manifest import ../errors import ../blocktype import ../logutils +import ../indexingstrategy proc encode*(manifest: Manifest): ?!seq[byte] = ## Encode the manifest into a ``ManifestCodec`` @@ -75,13 +76,16 @@ proc encode*(manifest: Manifest): ?!seq[byte] = erasureInfo.write(2, manifest.ecM.uint32) erasureInfo.write(3, manifest.originalTreeCid.data.buffer) erasureInfo.write(4, manifest.originalDatasetSize.uint32) + erasureInfo.write(5, manifest.protectedStrategy.uint32) if manifest.verifiable: var verificationInfo = initProtoBuffer() verificationInfo.write(1, manifest.verifyRoot.data.buffer) for slotRoot in manifest.slotRoots: verificationInfo.write(2, slotRoot.data.buffer) - erasureInfo.write(5, verificationInfo) + verificationInfo.write(3, manifest.cellSize.uint32) + verificationInfo.write(4, manifest.verifiableStrategy.uint32) + erasureInfo.write(6, verificationInfo) erasureInfo.finish() header.write(7, erasureInfo) @@ -109,8 +113,11 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = blockSize: uint32 originalDatasetSize: uint32 ecK, ecM: uint32 + protectedStrategy: uint32 verifyRoot: seq[byte] slotRoots: seq[seq[byte]] + cellSize: uint32 + verifiableStrategy: uint32 # Decode `Header` message if pbNode.getField(1, pbHeader).isErr: @@ -153,7 +160,10 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = if pbErasureInfo.getField(4, originalDatasetSize).isErr: return failure("Unable to decode `originalDatasetSize` from manifest!") - if pbErasureInfo.getField(5, pbVerificationInfo).isErr: + if pbErasureInfo.getField(5, protectedStrategy).isErr: + return failure("Unable to decode `protectedStrategy` from manifest!") + + if pbErasureInfo.getField(6, pbVerificationInfo).isErr: return failure("Unable to decode `verificationInfo` from manifest!") verifiable = pbVerificationInfo.buffer.len > 0 @@ -164,6 +174,12 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = if pbVerificationInfo.getRequiredRepeatedField(2, slotRoots).isErr: return failure("Unable to decode `slotRoots` from manifest!") + if pbVerificationInfo.getField(3, cellSize).isErr: + return failure("Unable to decode `cellSize` from manifest!") + + if pbVerificationInfo.getField(4, verifiableStrategy).isErr: + return failure("Unable to decode `verifiableStrategy` from manifest!") + let treeCid = ? Cid.init(treeCidBuf).mapFailure @@ -179,7 +195,8 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = ecK = ecK.int, ecM = ecM.int, originalTreeCid = ? Cid.init(originalTreeCid).mapFailure, - originalDatasetSize = originalDatasetSize.NBytes) + originalDatasetSize = originalDatasetSize.NBytes, + strategy = StrategyType(protectedStrategy)) else: Manifest.new( treeCid = treeCid, @@ -199,7 +216,9 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest = return Manifest.new( manifest = self, verifyRoot = verifyRootCid, - slotRoots = slotRootCids + slotRoots = slotRootCids, + cellSize = cellSize.NBytes, + strategy = StrategyType(verifiableStrategy) ) self.success diff --git a/codex/manifest/manifest.nim b/codex/manifest/manifest.nim index 8b491a17..922a1062 100644 --- a/codex/manifest/manifest.nim +++ b/codex/manifest/manifest.nim @@ -22,12 +22,16 @@ import ../utils import ../utils/json import ../units import ../blocktype +import ../indexingstrategy import ../logutils + +# TODO: Manifest should be reworked to more concrete types, +# perhaps using inheritance type Manifest* = ref object of RootObj treeCid {.serialize.}: Cid # Root of the merkle tree - datasetSize {.serialize.}: NBytes # Total size of all blocks + datasetSize {.serialize.}: NBytes # Total size of all blocks blockSize {.serialize.}: NBytes # Size of each contained block (might not be needed if blocks are len-prefixed) codec: MultiCodec # Dataset codec hcodec: MultiCodec # Multihash codec @@ -38,10 +42,13 @@ type ecM: int # Number of resulting parity blocks originalTreeCid: Cid # The original root of the dataset being erasure coded originalDatasetSize: NBytes + protectedStrategy: StrategyType # Indexing strategy used to build the slot roots case verifiable {.serialize.}: bool # Verifiable datasets can be used to generate storage proofs of true: verifyRoot: Cid # Root of the top level merkle tree built from slot roots slotRoots: seq[Cid] # Individual slot root built from the original dataset blocks + cellSize: NBytes # Size of each slot cell + verifiableStrategy: StrategyType # Indexing strategy used to build the slot roots else: discard else: @@ -51,59 +58,68 @@ type # Accessors ############################################################ -proc blockSize*(self: Manifest): NBytes = +func blockSize*(self: Manifest): NBytes = self.blockSize -proc datasetSize*(self: Manifest): NBytes = +func datasetSize*(self: Manifest): NBytes = self.datasetSize -proc version*(self: Manifest): CidVersion = +func version*(self: Manifest): CidVersion = self.version -proc hcodec*(self: Manifest): MultiCodec = +func hcodec*(self: Manifest): MultiCodec = self.hcodec -proc codec*(self: Manifest): MultiCodec = +func codec*(self: Manifest): MultiCodec = self.codec -proc protected*(self: Manifest): bool = +func protected*(self: Manifest): bool = self.protected -proc ecK*(self: Manifest): int = +func ecK*(self: Manifest): int = self.ecK -proc ecM*(self: Manifest): int = +func ecM*(self: Manifest): int = self.ecM -proc originalTreeCid*(self: Manifest): Cid = +func originalTreeCid*(self: Manifest): Cid = self.originalTreeCid -proc originalBlocksCount*(self: Manifest): int = +func originalBlocksCount*(self: Manifest): int = divUp(self.originalDatasetSize.int, self.blockSize.int) -proc originalDatasetSize*(self: Manifest): NBytes = +func originalDatasetSize*(self: Manifest): NBytes = self.originalDatasetSize -proc treeCid*(self: Manifest): Cid = +func treeCid*(self: Manifest): Cid = self.treeCid -proc blocksCount*(self: Manifest): int = +func blocksCount*(self: Manifest): int = divUp(self.datasetSize.int, self.blockSize.int) -proc verifiable*(self: Manifest): bool = - self.verifiable +func verifiable*(self: Manifest): bool = + bool (self.protected and self.verifiable) -proc verifyRoot*(self: Manifest): Cid = +func verifyRoot*(self: Manifest): Cid = self.verifyRoot -proc slotRoots*(self: Manifest): seq[Cid] = +func slotRoots*(self: Manifest): seq[Cid] = self.slotRoots -proc numSlots*(self: Manifest): int = - if not self.protected: - 0 - else: - self.ecK + self.ecM +func numSlots*(self: Manifest): int = + self.ecK + self.ecM + +func cellSize*(self: Manifest): NBytes = + self.cellSize + +func protectedStrategy*(self: Manifest): StrategyType = + self.protectedStrategy + +func verifiableStrategy*(self: Manifest): StrategyType = + self.verifiableStrategy + +func numSlotBlocks*(self: Manifest): int = + divUp(self.blocksCount, self.numSlots) ############################################################ # Operations on block list @@ -143,10 +159,10 @@ func verify*(self: Manifest): ?!void = return success() -proc cid*(self: Manifest): ?!Cid {.deprecated: "use treeCid instead".} = +func cid*(self: Manifest): ?!Cid {.deprecated: "use treeCid instead".} = self.treeCid.success -proc `==`*(a, b: Manifest): bool = +func `==`*(a, b: Manifest): bool = (a.treeCid == b.treeCid) and (a.datasetSize == b.datasetSize) and (a.blockSize == b.blockSize) and @@ -159,16 +175,19 @@ proc `==`*(a, b: Manifest): bool = (a.ecM == b.ecM) and (a.originalTreeCid == b.originalTreeCid) and (a.originalDatasetSize == b.originalDatasetSize) and + (a.protectedStrategy == b.protectedStrategy) and (a.verifiable == b.verifiable) and (if a.verifiable: (a.verifyRoot == b.verifyRoot) and - (a.slotRoots == b.slotRoots) + (a.slotRoots == b.slotRoots) and + (a.cellSize == b.cellSize) and + (a.verifiableStrategy == b.verifiableStrategy) else: true) else: true) -proc `$`*(self: Manifest): string = +func `$`*(self: Manifest): string = "treeCid: " & $self.treeCid & ", datasetSize: " & $self.datasetSize & ", blockSize: " & $self.blockSize & @@ -194,7 +213,7 @@ proc `$`*(self: Manifest): string = # Constructors ############################################################ -proc new*( +func new*( T: type Manifest, treeCid: Cid, blockSize: NBytes, @@ -213,12 +232,13 @@ proc new*( hcodec: hcodec, protected: protected) -proc new*( +func new*( T: type Manifest, manifest: Manifest, treeCid: Cid, datasetSize: NBytes, - ecK, ecM: int): Manifest = + ecK, ecM: int, + strategy: StrategyType): Manifest = ## Create an erasure protected dataset from an ## unprotected one ## @@ -233,9 +253,10 @@ proc new*( protected: true, ecK: ecK, ecM: ecM, originalTreeCid: manifest.treeCid, - originalDatasetSize: manifest.datasetSize) + originalDatasetSize: manifest.datasetSize, + protectedStrategy: strategy) -proc new*( +func new*( T: type Manifest, manifest: Manifest): Manifest = ## Create an unprotected dataset from an @@ -251,15 +272,7 @@ proc new*( blockSize: manifest.blockSize, protected: false) -proc new*( - T: type Manifest, - data: openArray[byte]): ?!Manifest = - ## Create a manifest instance from given data - ## - - Manifest.decode(data) - -proc new*( +func new*( T: type Manifest, treeCid: Cid, datasetSize: NBytes, @@ -270,7 +283,8 @@ proc new*( ecK: int, ecM: int, originalTreeCid: Cid, - originalDatasetSize: NBytes): Manifest = + originalDatasetSize: NBytes, + strategy: StrategyType): Manifest = Manifest( treeCid: treeCid, @@ -283,14 +297,16 @@ proc new*( ecK: ecK, ecM: ecM, originalTreeCid: originalTreeCid, - originalDatasetSize: originalDatasetSize - ) + originalDatasetSize: originalDatasetSize, + protectedStrategy: strategy) -proc new*( +func new*( T: type Manifest, manifest: Manifest, verifyRoot: Cid, - slotRoots: openArray[Cid]): ?!Manifest = + slotRoots: openArray[Cid], + cellSize = DefaultCellSize, + strategy = SteppedStrategy): ?!Manifest = ## Create a verifiable dataset from an ## protected one ## @@ -317,4 +333,14 @@ proc new*( originalDatasetSize: manifest.originalDatasetSize, verifiable: true, verifyRoot: verifyRoot, - slotRoots: @slotRoots) + slotRoots: @slotRoots, + cellSize: cellSize, + verifiableStrategy: strategy) + +func new*( + T: type Manifest, + data: openArray[byte]): ?!Manifest = + ## Create a manifest instance from given data + ## + + Manifest.decode(data) diff --git a/tests/codex/slots/provingtestenv.nim b/tests/codex/slots/provingtestenv.nim index 7581a3da..a40a89b8 100644 --- a/tests/codex/slots/provingtestenv.nim +++ b/tests/codex/slots/provingtestenv.nim @@ -142,7 +142,8 @@ proc createManifest(self: ProvingTestEnvironment): Future[void] {.async.} = treeCid = treeCid, datasetSize = self.manifest.datasetSize, ecK = totalNumberOfSlots, - ecM = 0 + ecM = 0, + strategy = StrategyType.SteppedStrategy ) # Verifiable manifest: diff --git a/tests/codex/slots/testslotbuilder.nim b/tests/codex/slots/testslotbuilder.nim index d8e71b1b..ffc6dd4c 100644 --- a/tests/codex/slots/testslotbuilder.nim +++ b/tests/codex/slots/testslotbuilder.nim @@ -100,7 +100,8 @@ suite "Slot builder": treeCid = protectedTreeCid, datasetSize = totalDatasetSize.NBytes, ecK = ecK, - ecM = ecM) + ecM = ecM, + strategy = StrategyType.SteppedStrategy) let manifestBlock = bt.Block.new( @@ -166,7 +167,8 @@ suite "Slot builder": treeCid = Cid.example, datasetSize = totalDatasetSize.NBytes, ecK = ecK - 1, - ecM = ecM) + ecM = ecM, + strategy = StrategyType.SteppedStrategy) check: SlotsBuilder.new(localStore, mismatchManifest, cellSize = cellSize) @@ -182,7 +184,8 @@ suite "Slot builder": treeCid = Cid.example, datasetSize = (totalDatasetSize - 1).NBytes, ecK = ecK, - ecM = ecM) + ecM = ecM, + strategy = StrategyType.SteppedStrategy) check: SlotsBuilder.new(localStore, mismatchManifest, cellSize = cellSize) diff --git a/tests/codex/testerasure.nim b/tests/codex/testerasure.nim index dbb6e5f1..27775180 100644 --- a/tests/codex/testerasure.nim +++ b/tests/codex/testerasure.nim @@ -11,6 +11,7 @@ import pkg/codex/stores import pkg/codex/blocktype as bt import pkg/codex/rng import pkg/codex/utils +import pkg/codex/indexingstrategy import ../asynctest import ./helpers diff --git a/tests/codex/testmanifest.nim b/tests/codex/testmanifest.nim index 4fe39f47..10fb7772 100644 --- a/tests/codex/testmanifest.nim +++ b/tests/codex/testmanifest.nim @@ -9,6 +9,7 @@ import pkg/poseidon2 import pkg/codex/slots import pkg/codex/merkletree +import pkg/codex/indexingstrategy import ../asynctest import ./helpers @@ -27,7 +28,8 @@ checksuite "Manifest": treeCid = Cid.example, datasetSize = 200.MiBs, eck = 2, - ecM = 2 + ecM = 2, + strategy = SteppedStrategy ) leaves = [