This commit is contained in:
Dmitriy Ryajov 2023-12-19 20:36:23 -06:00
parent 9f05f6d16f
commit 98f60411a6
No known key found for this signature in database
GPG Key ID: DA8C680CE7C657A4
4 changed files with 61 additions and 86 deletions

View File

@ -25,22 +25,22 @@ import ../blocktype
type
Manifest* = ref object of RootObj
treeCid {.serialize.}: Cid # Root of the merkle tree
datasetSize {.serialize.}: NBytes # Total size of all blocks
blockSize {.serialize.}: NBytes # Size of each contained block (might not be needed if blocks are len-prefixed)
codec: MultiCodec # Dataset codec
hcodec: MultiCodec # Multihash codec
version: CidVersion # Cid version
case protected {.serialize.}: bool # Protected datasets have erasure coded info
treeCid* {.serialize.}: Cid # Root of the merkle tree
datasetSize* {.serialize.}: NBytes # Total size of all blocks
blockSize* {.serialize.}: NBytes # Size of each contained block (might not be needed if blocks are len-prefixed)
codec*: MultiCodec # Dataset codec
hcodec*: MultiCodec # Multihash codec
version*: CidVersion # Cid version
case protected* {.serialize.}: bool # Protected datasets have erasure coded info
of true:
ecK: int # Number of blocks to encode
ecM: int # Number of resulting parity blocks
originalTreeCid: Cid # The original root of the dataset being erasure coded
originalDatasetSize: NBytes
case verifiable {.serialize.}: bool # Verifiable datasets can be used to generate storage proofs
ecK*: int # Number of blocks to encode
ecM*: int # Number of resulting parity blocks
originalTreeCid*: Cid # The original root of the dataset being erasure coded
originalDatasetSize*: NBytes
case verifiable* {.serialize.}: bool # Verifiable datasets can be used to generate storage proofs
of true:
verificationRoot: Cid
slotRoots: seq[Cid]
verificationRoot*: Cid
slotRoots*: seq[Cid]
else:
discard
else:
@ -50,54 +50,12 @@ type
# Accessors
############################################################
proc blockSize*(self: Manifest): NBytes =
self.blockSize
proc datasetSize*(self: Manifest): NBytes =
self.datasetSize
proc version*(self: Manifest): CidVersion =
self.version
proc hcodec*(self: Manifest): MultiCodec =
self.hcodec
proc codec*(self: Manifest): MultiCodec =
self.codec
proc protected*(self: Manifest): bool =
self.protected
proc ecK*(self: Manifest): int =
self.ecK
proc ecM*(self: Manifest): int =
self.ecM
proc originalTreeCid*(self: Manifest): Cid =
self.originalTreeCid
proc originalBlocksCount*(self: Manifest): int =
divUp(self.originalDatasetSize.int, self.blockSize.int)
proc originalDatasetSize*(self: Manifest): NBytes =
self.originalDatasetSize
proc treeCid*(self: Manifest): Cid =
self.treeCid
proc blocksCount*(self: Manifest): int =
divUp(self.datasetSize.int, self.blockSize.int)
proc verifiable*(self: Manifest): bool =
self.verifiable
proc verificationRoot*(self: Manifest): Cid =
self.verificationRoot
proc slotRoots*(self: Manifest): seq[Cid] =
self.slotRoots
proc numberOfSlots*(self: Manifest): int =
if not self.protected:
0

View File

@ -39,12 +39,12 @@ type
ByteHash* = seq[byte]
ByteTree* = MerkleTree[ByteHash, ByteTreeKey]
ByteTreeProof* = MerkleProof[ByteHash, ByteTreeKey]
ByteProof* = MerkleProof[ByteHash, ByteTreeKey]
CodexTree* = ref object of ByteTree
mhash: MHash
CodexProof* = ref object of ByteTreeProof
CodexProof* = ref object of ByteProof
mhash: MHash
func getMhash*(mcodec: MultiCodec): ?!MHash =

View File

@ -8,8 +8,9 @@ import ../merkletree
import ../stores
import ../manifest
import ../utils
import ../utils/digest
let
const
# TODO: Unified with the CellSize specified in branch "data-sampler"
# Number of bytes in a cell. A cell is the smallest unit of data used
# in the proving circuit.
@ -19,7 +20,7 @@ type
SlotBuilder* = object of RootObj
blockStore: BlockStore
manifest: Manifest
numberOfSlotBlocks: int
slotBlocks: int
proc new*(
T: type SlotBuilder,
@ -36,57 +37,73 @@ proc new*(
if (manifest.blockSize.int mod CellSize) != 0:
return failure("Block size must be divisable by cell size.")
let numberOfSlotBlocks = manifest.blocksCount div manifest.ecK
success(SlotBuilder(
let slotBlocks = manifest.blocksCount div manifest.numberOfSlots
success SlotBuilder(
blockStore: blockStore,
manifest: manifest,
numberOfSlotBlocks: numberOfSlotBlocks
))
slotBlocks: slotBlocks)
proc cellsPerBlock(self: SlotBuilder): int =
self.manifest.blockSize.int div CellSize
proc selectSlotBlocks*(self: SlotBuilder, datasetSlotIndex: int): Future[?!seq[Cid]] {.async.} =
var cids = newSeq[Cid]()
proc selectSlotBlocks*(
self: SlotBuilder,
slotIndex: int): Future[?!seq[Poseidon2Hash]] {.async.} =
let
datasetTreeCid = self.manifest.treeCid
treeCid = self.manifest.treeCid
blockCount = self.manifest.blocksCount
numberOfSlots = self.manifest.numberOfSlots
strategy = SteppedIndexingStrategy.new(0, blockCount - 1, numberOfSlots)
for datasetBlockIndex in strategy.getIndicies(datasetSlotIndex):
without slotBlockCid =? await self.blockStore.getCid(datasetTreeCid, datasetBlockIndex), err:
error "Failed to get block CID for tree at index", index=datasetBlockIndex, tree=datasetTreeCid
logScope:
treeCid = treeCid
blockCount = blockCount
numberOfSlots = numberOfSlots
index = blockIndex
var blocks = newSeq[Poseidon2Hash]()
for blockIndex in strategy.getIndicies(slotIndex):
without blk =? await self.blockStore.getBlock(treeCid, blockIndex), err:
error "Failed to get block CID for tree at index"
return failure(err)
cids.add(slotBlockCid)
without digestTree =? Poseidon2Tree.digest(blk.data, CellSize) and
blockDigest =? digestTree.root, err:
error "Failed to create digest for block"
return failure(err)
blocks.add(blockDigest)
# TODO: Remove this sleep. It's here to prevent us from locking up the thread.
await sleepAsync(10.millis)
return success(cids)
success blocks
proc calculateNumberOfPaddingCells*(self: SlotBuilder, numberOfSlotBlocks: int): int =
proc numPaddingCells*(self: SlotBuilder, slotBlocks: int): int =
let
numberOfCells = numberOfSlotBlocks * self.cellsPerBlock
numberOfCells = slotBlocks * self.cellsPerBlock
nextPowerOfTwo = nextPowerOfTwo(numberOfCells)
return nextPowerOfTwo - numberOfCells
proc buildSlotTree*(self: SlotBuilder, slotBlocks: seq[Cid], numberOfPaddingCells: int): ?!MerkleTree =
proc buildSlotTree*(self: SlotBuilder, slotBlocks: seq[Cid], paddingCells: int): ?!Poseidon2Tree =
without emptyCid =? emptyCid(self.manifest.version, self.manifest.hcodec, self.manifest.codec), err:
error "Unable to initialize empty cid"
return failure(err)
let numberOfPadBlocks = divUp(numberOfPaddingCells, self.cellsPerBlock)
let padding = newSeqWith(numberOfPadBlocks, emptyCid)
let paddingBlocks = divUp(paddingCells, self.cellsPerBlock)
let padding = newSeqWith(paddingBlocks, emptyCid)
MerkleTree.init(slotBlocks & padding)
Poseidon2Tree.init(slotBlocks & padding)
proc createSlotTree*(self: SlotBuilder, datasetSlotIndex: int): Future[?!MerkleTree] {.async.} =
without slotBlocks =? await self.selectSlotBlocks(datasetSlotIndex), err:
proc createSlots*(self: SlotBuilder, slotIndex: int): Future[?!Manifest] {.async.} =
without slotBlocks =? await self.selectSlotBlocks(slotIndex), err:
error "Failed to select slot blocks"
return failure(err)
let numberOfPaddingCells = self.calculateNumberOfPaddingCells(slotBlocks.len)
let paddingCells = self.numPaddingCells(slotBlocks.len)
trace "Creating slot tree", datasetSlotIndex=datasetSlotIndex, nSlotBlocks=slotBlocks.len, nPaddingCells=numberOfPaddingCells
return self.buildSlotTree(slotBlocks, numberOfPaddingCells)
trace "Creating slot tree", slotIndex, nSlotBlocks = slotBlocks.len, paddingCells
return self.buildSlotTree(slotBlocks, paddingCells)

View File

@ -13,8 +13,8 @@ import pkg/codex/utils
import ../helpers
import ../examples
import codex/manifest/indexingstrategy
import codex/slotbuilder/slotbuilder
import pkg/codex/manifest/indexingstrategy
import pkg/codex/slotbuilder/slotbuilder
asyncchecksuite "Slot builder":
let
@ -45,7 +45,7 @@ asyncchecksuite "Slot builder":
proc createProtectedManifest(): Future[void] {.async.} =
let
cids = datasetBlocks.mapIt(it.cid)
tree = MerkleTree.init(cids).tryGet()
tree = Poseidon2Tree.init(cids).tryGet()
treeCid = tree.rootCid().tryGet()
for index, cid in cids: