logos-storage-nim/tests/codex/proof/testdatasampler.nim
2024-01-15 10:50:40 -06:00

326 lines
10 KiB
Nim

import std/sequtils
import std/sugar
import std/random
import pkg/questionable/results
import pkg/constantine/math/arithmetic
import pkg/constantine/math/io/io_fields
import pkg/poseidon2/types
import pkg/poseidon2/io
import pkg/poseidon2
import pkg/chronos
import pkg/asynctest
import pkg/codex/stores/cachestore
import pkg/codex/chunker
import pkg/codex/stores
import pkg/codex/blocktype as bt
import pkg/codex/contracts/requests
import pkg/codex/contracts
import pkg/codex/merkletree
import pkg/codex/stores/cachestore
import pkg/codex/proof/datasampler
import pkg/codex/proof/misc
import pkg/codex/proof/types
import ../helpers
import ../examples
import testdatasampler_expected
let
bytesPerBlock = 64 * 1024
challenge: FieldElement = toF(12345)
datasetRootHash: FieldElement = toF(6789)
asyncchecksuite "Test proof datasampler - components":
let
numberOfSlotBlocks = 16
slot = Slot(
request: StorageRequest(
ask: StorageAsk(
slots: 10,
slotSize: u256(bytesPerBlock * numberOfSlotBlocks),
),
content: StorageContent(
cid: $Cid.example
)
),
slotIndex: u256(3)
)
test "Number of cells is a power of two":
# This is to check that the data used for testing is sane.
proc isPow2(value: int): bool =
let log2 = ceilingLog2(value)
return (1 shl log2) == value
let numberOfCells = getNumberOfCellsInSlot(slot).int
check:
isPow2(numberOfCells)
test "Extract low bits":
proc extract(value: uint64, nBits: int): uint64 =
let big = toF(value).toBig()
return extractLowBits(big, nBits)
check:
extract(0x88, 4) == 0x8.uint64
extract(0x88, 7) == 0x8.uint64
extract(0x9A, 5) == 0x1A.uint64
extract(0x9A, 7) == 0x1A.uint64
extract(0x1248, 10) == 0x248.uint64
extract(0x1248, 12) == 0x248.uint64
extract(0x1248306A560C9AC0.uint64, 10) == 0x2C0.uint64
extract(0x1248306A560C9AC0.uint64, 12) == 0xAC0.uint64
extract(0x1248306A560C9AC0.uint64, 50) == 0x306A560C9AC0.uint64
extract(0x1248306A560C9AC0.uint64, 52) == 0x8306A560C9AC0.uint64
test "Should calculate total number of cells in Slot":
let
slotSizeInBytes = (slot.request.ask.slotSize).truncate(uint64)
expectedNumberOfCells = slotSizeInBytes div CellSize
check:
expectedNumberOfCells == 512
expectedNumberOfCells == getNumberOfCellsInSlot(slot)
asyncchecksuite "Test proof datasampler - main":
let
# The number of slot blocks and number of slots, combined with
# the bytes per block, make it so that there are exactly 256 cells
# in the dataset.
numberOfSlotBlocks = 4
totalNumberOfSlots = 2
datasetSlotIndex = 1
localStore = CacheStore.new()
datasetToSlotProof = MerkleProof.example
var
manifest: Manifest
manifestBlock: bt.Block
slot: Slot
datasetBlocks: seq[bt.Block]
slotPoseidonTree: MerkleTree
dataSampler: DataSampler
proc createDatasetBlocks(): Future[void] {.async.} =
let numberOfCellsNeeded = (numberOfSlotBlocks * totalNumberOfSlots * bytesPerBlock).uint64 div CellSize
var data: seq[byte] = @[]
# This generates a number of blocks that have different data, such that
# Each cell in each block is unique, but nothing is random.
for i in 0 ..< numberOfCellsNeeded:
data = data & (i.byte).repeat(CellSize)
let chunker = MockChunker.new(
dataset = data,
chunkSize = bytesPerBlock)
while true:
let chunk = await chunker.getBytes()
if chunk.len <= 0:
break
let b = bt.Block.new(chunk).tryGet()
datasetBlocks.add(b)
discard await localStore.putBlock(b)
proc createManifest(): Future[void] {.async.} =
let
cids = datasetBlocks.mapIt(it.cid)
tree = MerkleTree.init(cids).tryGet()
treeCid = tree.rootCid().tryGet()
for index, cid in cids:
let proof = tree.getProof(index).tryget()
discard await localStore.putBlockCidAndProof(treeCid, index, cid, proof)
manifest = Manifest.new(
treeCid = treeCid,
blockSize = bytesPerBlock.NBytes,
datasetSize = (bytesPerBlock * numberOfSlotBlocks * totalNumberOfSlots).NBytes)
manifestBlock = bt.Block.new(manifest.encode().tryGet(), codec = DagPBCodec).tryGet()
proc createSlot(): void =
slot = Slot(
request: StorageRequest(
ask: StorageAsk(
slotSize: u256(bytesPerBlock * numberOfSlotBlocks)
),
content: StorageContent(
cid: $manifestBlock.cid
),
),
slotIndex: u256(datasetSlotIndex)
)
proc createSlotPoseidonTree(): void =
let
slotSize = slot.request.ask.slotSize.truncate(uint64)
blocksInSlot = slotSize div bytesPerBlock.uint64
datasetSlotIndex = slot.slotIndex.truncate(uint64)
datasetBlockIndexFirst = datasetSlotIndex * blocksInSlot
datasetBlockIndexLast = datasetBlockIndexFirst + numberOfSlotBlocks.uint64
slotBlocks = datasetBlocks[datasetBlockIndexFirst ..< datasetBlockIndexLast]
slotBlockCids = slotBlocks.mapIt(it.cid)
slotPoseidonTree = MerkleTree.init(slotBlockCids).tryGet()
proc createDataSampler(): Future[void] {.async.} =
dataSampler = (await DataSampler.new(
slot,
localStore,
datasetRootHash,
slotPoseidonTree,
datasetToSlotProof
)).tryGet()
setup:
await createDatasetBlocks()
await createManifest()
createSlot()
discard await localStore.putBlock(manifestBlock)
createSlotPoseidonTree()
await createDataSampler()
test "Number of cells is a power of two":
# This is to check that the data used for testing is sane.
proc isPow2(value: int): bool =
let log2 = ceilingLog2(value)
return (1 shl log2) == value
let numberOfCells = getNumberOfCellsInSlot(slot).int
check:
isPow2(numberOfCells)
let knownIndices = @[50.uint64, 21.uint64, 110.uint64]
test "Can find single slot-cell index":
proc slotCellIndex(i: int): uint64 =
let counter: FieldElement = toF(i)
return dataSampler.findSlotCellIndex(challenge, counter)
proc getExpectedIndex(i: int): uint64 =
let
numberOfCellsInSlot = (bytesPerBlock * numberOfSlotBlocks) div CellSize.int
hash = Sponge.digest(@[datasetRootHash, challenge, toF(i)], rate = 2)
return extractLowBits(hash.toBig(), ceilingLog2(numberOfCellsInSlot))
check:
slotCellIndex(1) == getExpectedIndex(1)
slotCellIndex(1) == knownIndices[0]
slotCellIndex(2) == getExpectedIndex(2)
slotCellIndex(2) == knownIndices[1]
slotCellIndex(3) == getExpectedIndex(3)
slotCellIndex(3) == knownIndices[2]
test "Can find sequence of slot-cell indices":
proc slotCellIndices(n: int): seq[uint64] =
dataSampler.findSlotCellIndices(challenge, n)
proc getExpectedIndices(n: int): seq[uint64] =
return collect(newSeq, (for i in 1..n: dataSampler.findSlotCellIndex(challenge, toF(i))))
check:
slotCellIndices(3) == getExpectedIndices(3)
slotCellIndices(3) == knownIndices
let
bytes = newSeqWith(bytesPerBlock, rand(uint8))
blk = bt.Block.new(bytes).tryGet()
cell0Bytes = bytes[0..<CellSize]
cell1Bytes = bytes[CellSize..<(CellSize*2)]
cell2Bytes = bytes[(CellSize*2)..<(CellSize*3)]
test "Can get cell from block":
let
sample0 = dataSampler.getCellFromBlock(blk, 0)
sample1 = dataSampler.getCellFromBlock(blk, 1)
sample2 = dataSampler.getCellFromBlock(blk, 2)
check:
sample0 == cell0Bytes
sample1 == cell1Bytes
sample2 == cell2Bytes
test "Can convert block into cells":
let cells = dataSampler.getBlockCells(blk)
check:
cells.len == (bytesPerBlock div CellSize.int)
cells[0] == cell0Bytes
cells[1] == cell1Bytes
cells[2] == cell2Bytes
test "Can create mini tree for block cells":
let miniTree = dataSampler.getBlockCellMiniTree(blk).tryGet()
let
cell0Proof = miniTree.getProof(0).tryGet()
cell1Proof = miniTree.getProof(1).tryGet()
cell2Proof = miniTree.getProof(2).tryGet()
check:
cell0Proof.verifyDataBlock(cell0Bytes, miniTree.root).tryGet()
cell1Proof.verifyDataBlock(cell1Bytes, miniTree.root).tryGet()
cell2Proof.verifyDataBlock(cell2Bytes, miniTree.root).tryGet()
test "Can gather proof input":
# This is the main entry point for this module, and what it's all about.
let
nSamples = 3
input = (await dataSampler.getProofInput(challenge, nSamples)).tryget()
proc toStr(proof: MerkleProof): string =
toHex(proof.nodesBuffer)
let
expectedSlotToBlockProofs = getExpectedSlotToBlockProofs()
expectedBlockToCellProofs = getExpectedBlockToCellProofs()
expectedSampleData = getExpectedSampleData()
proc equal(a: FieldElement, b: FieldElement): bool =
a.toDecimal() == b.toDecimal()
check:
# datasetRoot*: FieldElement
equal(input.datasetRoot, datasetRootHash)
# entropy*: FieldElement
equal(input.entropy, challenge)
# numberOfCellsInSlot*: uint64
input.numberOfCellsInSlot == (bytesPerBlock * numberOfSlotBlocks).uint64 div CellSize
# numberOfSlots*: uint64
input.numberOfSlots == slot.request.ask.slots
# datasetSlotIndex*: uint64
input.datasetSlotIndex == slot.slotIndex.truncate(uint64)
# slotRoot*: FieldElement
equal(input.slotRoot, toF(1234)) # TODO - when slotPoseidonTree is a poseidon tree, its root should be a FieldElement.
# datasetToSlotProof*: MerkleProof
input.datasetToSlotProof == datasetToSlotProof
# proofSamples*: seq[ProofSample]
# yeah
# input.slotToBlockProofs.mapIt(toStr(it)) == expectedSlotToBlockProofs
# input.blockToCellProofs.mapIt(toStr(it)) == expectedBlockToCellProofs
# toHex(input.sampleData) == expectedSampleData
for (input, expected) in [(10, 0), (31, 0), (32, 1), (63, 1), (64, 2)]:
test "Can get slotBlockIndex from slotCellIndex (" & $input & " -> " & $expected & ")":
let
slotCellIndex = input.uint64
slotBlockIndex = dataSampler.getSlotBlockIndexForSlotCellIndex(slotCellIndex)
check:
slotBlockIndex == expected.uint64
for (input, expected) in [(10, 10), (31, 31), (32, 0), (63, 31), (64, 0)]:
test "Can get blockCellIndex from slotCellIndex (" & $input & " -> " & $expected & ")":
let
slotCellIndex = input.uint64
blockCellIndex = dataSampler.getBlockCellIndexForSlotCellIndex(slotCellIndex)
check:
blockCellIndex == expected.uint64