## Nim-Codex ## Copyright (c) 2022 Status Research & Development GmbH ## Licensed under either of ## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) ## * MIT license ([LICENSE-MIT](LICENSE-MIT)) ## at your option. ## This file may not be copied, modified, or distributed except according to ## those terms. import pkg/upraises push: {.upraises: [].} import std/sequtils import std/sugar import pkg/chronos import pkg/libp2p/[multicodec, cid, multihash] import pkg/libp2p/protobuf/minprotobuf import pkg/taskpools import ../logutils import ../manifest import ../merkletree import ../stores import ../blocktype as bt import ../utils import ../utils/asynciter import ../indexingstrategy import ../errors import pkg/stew/byteutils import pkg/stew/io2 import ./backend import ./asyncbackend export backend logScope: topics = "codex erasure" type ## Encode a manifest into one that is erasure protected. ## ## The new manifest has K `blocks` that are encoded into ## additional M `parity` blocks. The resulting dataset ## is padded with empty blocks if it doesn't have a square ## shape. ## ## NOTE: The padding blocks could be excluded ## from transmission, but they aren't for now. ## ## The resulting dataset is logically divided into rows ## where a row is made up of B blocks. There are then, ## K + M = N rows in total, each of length B blocks. Rows ## are assumed to be of the same number of (B) blocks. ## ## The encoding is systematic and the rows can be ## read sequentially by any node without decoding. ## ## Decoding is possible with any K rows or partial K ## columns (with up to M blocks missing per column), ## or any combination there of. ## EncoderProvider* = proc(size, blocks, parity: int): EncoderBackend {.raises: [Defect], noSideEffect.} DecoderProvider* = proc(size, blocks, parity: int): DecoderBackend {.raises: [Defect], noSideEffect.} Erasure* = ref object encoderProvider*: EncoderProvider decoderProvider*: DecoderProvider store*: BlockStore taskpool: Taskpool EncodingParams = object ecK: Natural ecM: Natural rounded: Natural steps: Natural blocksCount: Natural strategy: StrategyType ErasureError* = object of CodexError InsufficientBlocksError* = object of ErasureError # Minimum size, in bytes, that the dataset must have had # for the encoding request to have succeeded with the parameters # provided. minSize*: NBytes func indexToPos(steps, idx, step: int): int {.inline.} = ## Convert an index to a position in the encoded ## dataset ## `idx` - the index to convert ## `step` - the current step ## `pos` - the position in the encoded dataset ## (idx - step) div steps proc getPendingBlocks( self: Erasure, manifest: Manifest, indicies: seq[int]): AsyncIter[(?!bt.Block, int)] = ## Get pending blocks iterator ## var indiciesIter = Iter[int].new(indicies) pendingBlocks = mapAsync[int, (?!bt.Block, int)](indiciesIter, (i: int) => self.store.getBlock( BlockAddress.init(manifest.treeCid, i) ).map((r: ?!bt.Block) => (r, i)) # Get the data blocks (first K) ) pendingBlocks proc getPendingBlocks2( self: Erasure, manifest: Manifest, indicies: seq[int]): AsyncIter[(?!bt.Block, int)] = ## Get pending blocks iterator ## let shift = indicies[0] let newIndicies = @[0, 203, 196, 189, 182, 175, 168, 161, 154, 147, 140, 133, 126, 119, 112, 105, 98, 91, 84, 77].mapIt(it + shift) var indiciesIter = Iter[int].new(newIndicies) # indiciesIter = Iter[int].new(indicies.filterIt((it mod 3) != 2)) # indiciesIter = Iter[int].new(@(@[indicies[29]] & indicies[5..<25])) pendingBlocks = mapAsync[int, (?!bt.Block, int)](indiciesIter, (i: int) => self.store.getBlock( BlockAddress.init(manifest.treeCid, i) ).map((r: ?!bt.Block) => (r, i)) # Get the data blocks (first K) ) pendingBlocks proc getPendingBlocks3( self: Erasure, manifest: Manifest, indicies: seq[int]): AsyncIter[(?!bt.Block, int)] = ## Get pending blocks iterator ## var # request blocks from the store pendingBlocks = indicies.map( (i: int) => self.store.getBlock( BlockAddress.init(manifest.treeCid, i) ).map((r: ?!bt.Block) => (r, i)) # Get the data blocks (first K) ) proc isFinished(): bool = pendingBlocks.len == 0 proc genNext(): Future[(?!bt.Block, int)] {.async.} = let completedFut = await one(pendingBlocks) if (let i = pendingBlocks.find(completedFut); i >= 0): pendingBlocks.del(i) return await completedFut else: let (_, index) = await completedFut raise newException( CatchableError, "Future for block id not found, tree cid: " & $manifest.treeCid & ", index: " & $index) AsyncIter[(?!bt.Block, int)].new(genNext, isFinished) proc prepareEncodingData( self: Erasure, manifest: Manifest, params: EncodingParams, step: Natural, data: ref seq[seq[byte]], cids: ref seq[Cid], emptyBlock: seq[byte]): Future[?!Natural] {.async.} = ## Prepare data for encoding ## let strategy = params.strategy.init( firstIndex = 0, lastIndex = params.rounded - 1, iterations = params.steps ) indicies = toSeq(strategy.getIndicies(step)) pendingBlocksIter = self.getPendingBlocks(manifest, indicies.filterIt(it < manifest.blocksCount)) var resolved = 0 for fut in pendingBlocksIter: let (blkOrErr, idx) = await fut without blk =? blkOrErr, err: warn "Failed retreiving a block", treeCid = manifest.treeCid, idx, msg = err.msg continue let pos = indexToPos(params.steps, idx, step) shallowCopy(data[pos], if blk.isEmpty: emptyBlock else: blk.data) cids[idx] = blk.cid resolved.inc() for idx in indicies.filterIt(it >= manifest.blocksCount): let pos = indexToPos(params.steps, idx, step) trace "Padding with empty block", idx shallowCopy(data[pos], emptyBlock) without emptyBlockCid =? emptyCid(manifest.version, manifest.hcodec, manifest.codec), err: return failure(err) cids[idx] = emptyBlockCid success(resolved.Natural) proc prepareDecodingData( self: Erasure, encoded: Manifest, step: Natural, data: ref seq[seq[byte]], parityData: ref seq[seq[byte]], cids: ref seq[Cid], emptyBlock: seq[byte]): Future[?!(Natural, Natural)] {.async.} = ## Prepare data for decoding ## `encoded` - the encoded manifest ## `step` - the current step ## `data` - the data to be prepared ## `parityData` - the parityData to be prepared ## `cids` - cids of prepared data ## `emptyBlock` - the empty block to be used for padding ## var recIndicies = newSeq[int]() let strategy = encoded.protectedStrategy.init( firstIndex = 0, lastIndex = encoded.blocksCount - 1, iterations = encoded.steps ) indicies = toSeq(strategy.getIndicies(step)) pendingBlocksIter = self.getPendingBlocks2(encoded, indicies) var dataPieces = 0 parityPieces = 0 resolved = 0 for fut in pendingBlocksIter: # Continue to receive blocks until we have just enough for decoding # or no more blocks can arrive if resolved >= encoded.ecK: break let (blkOrErr, idx) = await fut without blk =? blkOrErr, err: trace "Failed retreiving a block", idx, treeCid = encoded.treeCid, msg = err.msg continue recIndicies.add(idx) let pos = indexToPos(encoded.steps, idx, step) logScope: cid = blk.cid idx = idx pos = pos step = step empty = blk.isEmpty cids[idx] = blk.cid if idx >= encoded.rounded: trace "Retrieved parity block" shallowCopy(parityData[pos - encoded.ecK], if blk.isEmpty: emptyBlock else: blk.data) parityPieces.inc else: trace "Retrieved data block" shallowCopy(data[pos], if blk.isEmpty: emptyBlock else: blk.data) dataPieces.inc resolved.inc let recCids = collect: for i in recIndicies: cids[i] without recTree =? CodexTree.init(recCids), err: return failure(err) return success (dataPieces.Natural, parityPieces.Natural) proc init*( _: type EncodingParams, manifest: Manifest, ecK: Natural, ecM: Natural, strategy: StrategyType): ?!EncodingParams = if ecK > manifest.blocksCount: let exc = (ref InsufficientBlocksError)( msg: "Unable to encode manifest, not enough blocks, ecK = " & $ecK & ", blocksCount = " & $manifest.blocksCount, minSize: ecK.NBytes * manifest.blockSize) return failure(exc) let rounded = roundUp(manifest.blocksCount, ecK) steps = divUp(rounded, ecK) blocksCount = rounded + (steps * ecM) success EncodingParams( ecK: ecK, ecM: ecM, rounded: rounded, steps: steps, blocksCount: blocksCount, strategy: strategy ) proc encodeData( self: Erasure, manifest: Manifest, params: EncodingParams ): Future[?!Manifest] {.async.} = ## Encode blocks pointed to by the protected manifest ## ## `manifest` - the manifest to encode ## logScope: steps = params.steps rounded_blocks = params.rounded blocks_count = params.blocksCount ecK = params.ecK ecM = params.ecM var cids = seq[Cid].new() encoder = self.encoderProvider(manifest.blockSize.int, params.ecK, params.ecM) emptyBlock = newSeq[byte](manifest.blockSize.int) cids[].setLen(params.blocksCount) try: for step in 0.. 0: copyMem(addr buf[offset], addr bytes[i][0], bytes[i].len) offset = offset + bytes[i].len let mhash = MultiHash.digest("sha2-256", buf).mapFailure() return mhash.get().hex # without mh =? mhash, err: # return "error " & err.msg # return mh.hex # proc unsafeHashOf(bytes: seq[pointer], lens: seq[int]): string = # var totalLen = 0 # for l in lens: # totalLen = totalLen + l # var buf = newSeq[byte]() # buf.setLen(totalLen) # var offset = 0 # for l in lens: # if l > 0: # copyMem(addr buf[offset], bytes[i], l) # offset = offset + l # let mhash = MultiHash.digest("sha2-256", buf) # return $mhash proc decode*( self: Erasure, encoded: Manifest): Future[?!Manifest] {.async.} = ## Decode a protected manifest into it's original ## manifest ## ## `encoded` - the encoded (protected) manifest to ## be recovered ## logScope: steps = encoded.steps rounded_blocks = encoded.rounded new_manifest = encoded.blocksCount var cids = seq[Cid].new() recoveredIndices = newSeq[Natural]() decoder = self.decoderProvider(encoded.blockSize.int, encoded.ecK, encoded.ecM) emptyBlock = newSeq[byte](encoded.blockSize.int) cids[].setLen(encoded.blocksCount) try: for step in 0..= encoded.ecK: trace "Retrieved all the required data blocks" continue trace "Erasure decoding data" without recovered =? await asyncDecode(self.taskpool, decoder, data, parity, encoded.blockSize.int), err: trace "Error decoding data", err = err.msg return failure(err) echo "hash of recovered " & hashOf(recovered) # GC_fullCollect() for i in 0.. i < tree.leavesCount) if err =? (await self.store.putSomeProofs(tree, idxIter)).errorOption: return failure(err) let decoded = Manifest.new(encoded) return decoded.success proc start*(self: Erasure) {.async.} = return proc stop*(self: Erasure) {.async.} = return proc new*( T: type Erasure, store: BlockStore, encoderProvider: EncoderProvider, decoderProvider: DecoderProvider, taskpool: Taskpool): Erasure = ## Create a new Erasure instance for encoding and decoding manifests ## Erasure( store: store, encoderProvider: encoderProvider, decoderProvider: decoderProvider, taskpool: taskpool)