nim-dagger/tests/codex/testerasure.nim

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

375 lines
11 KiB
Nim
Raw Normal View History

import std/sequtils
import std/sugar
import std/times
import pkg/chronos
import pkg/questionable/results
2022-05-19 14:56:03 -05:00
import pkg/codex/erasure
import pkg/codex/manifest
import pkg/codex/stores
import pkg/codex/blocktype as bt
import pkg/codex/rng
import pkg/codex/utils
import pkg/codex/indexingstrategy
import pkg/taskpools
import pkg/codex/utils/arrayutils
import ../asynctest
import ./helpers
import ./examples
Integrate slot builder (#666) * rework merkle tree support * rename merkletree -> codexmerkletree * treed and proof encoding/decoding * style * adding codex merkle and coders tests * use default hash codec * proof size changed * add from nodes test * shorte file names * wip poseidon tree * shorten file names * root returns a result * import poseidon tests * fix merge issues and cleanup a few warnings * setting up slot builder * Getting cids in slot * ensures blocks are devisable by number of slots * wip * Implements indexing strategies * Swaps in indexing strategy into erasure. * wires slot and indexing tests up * Fixes issue where indexing strategy stepped gives wrong values for smallest of ranges * debugs indexing strategies * Can select slot blocks * finding number of pad cells * Implements building slot tree * finishes implementing slot builder * Adds check that block size is a multiple of cell size * Cleanup slotbuilder * Review comments by Tomasz * Fixes issue where ecK was used as numberOfSlots. * rework merkle tree support * deps * rename merkletree -> codexmerkletree * treed and proof encoding/decoding * style * adding codex merkle and coders tests * remove new codecs for now * proof size changed * add from nodes test * shorte file names * wip poseidon tree * shorten file names * fix bad `elements` iter * bump * bump * wip * reworking slotbuilder * move out of manifest * expose getCidAndProof * import index strat... * remove getMHash * remove unused artifacts * alias zero * add digest for multihash * merge issues * remove unused hashes * add option to result converter * misc * fix tests * add helper to derive EC block count * rename method * misc * bump * extract slot root building into own proc * revert to manifest to accessor --------- Co-authored-by: benbierens <thatbenbierens@gmail.com>
2024-01-08 16:52:46 -06:00
suite "Erasure encode/decode":
const BlockSize = 1024'nb
2022-05-10 14:10:17 +02:00
const dataSetSize = BlockSize * 123 # weird geometry
var rng: Rng
var chunker: Chunker
var manifest: Manifest
var store: BlockStore
var erasure: Erasure
let repoTmp = TempLevelDb.new()
let metaTmp = TempLevelDb.new()
var taskpool: Taskpool
2022-05-10 14:10:17 +02:00
setup:
let
repoDs = repoTmp.newDb()
metaDs = metaTmp.newDb()
2022-05-10 14:10:17 +02:00
rng = Rng.instance()
chunker = RandomChunker.new(rng, size = dataSetSize, chunkSize = BlockSize)
store = RepoStore.new(repoDs, metaDs)
taskpool = Taskpool.new()
erasure = Erasure.new(store, leoEncoderProvider, leoDecoderProvider, taskpool)
manifest = await storeDataGetManifest(store, chunker)
teardown:
await repoTmp.destroyDb()
await metaTmp.destroyDb()
taskpool.shutdown()
2022-05-10 14:10:17 +02:00
proc encode(buffers, parity: int): Future[Manifest] {.async.} =
let encoded =
(await erasure.encode(manifest, buffers.Natural, parity.Natural)).tryGet()
check:
encoded.blocksCount mod (buffers + parity) == 0
encoded.rounded == roundUp(manifest.blocksCount, buffers)
encoded.steps == encoded.rounded div buffers
2022-05-10 14:10:17 +02:00
return encoded
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should tolerate losing M data blocks in a single random column":
2022-05-10 14:10:17 +02:00
const
buffers = 20
parity = 10
let encoded = await encode(buffers, parity)
var
column = rng.rand((encoded.blocksCount div encoded.steps) - 1) # random column
dropped: seq[int]
for _ in 0 ..< encoded.ecM:
dropped.add(column)
(await store.delBlock(encoded.treeCid, column)).tryGet()
(await store.delBlock(manifest.treeCid, column)).tryGet()
column = (column + encoded.steps) mod encoded.blocksCount # wrap around
var decoded = (await erasure.decode(encoded)).tryGet()
check:
decoded.treeCid == manifest.treeCid
decoded.treeCid == encoded.originalTreeCid
decoded.blocksCount == encoded.originalBlocksCount
for d in dropped:
if d < manifest.blocksCount: # we don't support returning parity blocks yet
let present = await store.hasBlock(manifest.treeCid, d)
check present.tryGet()
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should not tolerate losing more than M data blocks in a single random column":
const
buffers = 20
parity = 10
2022-05-10 14:10:17 +02:00
let encoded = await encode(buffers, parity)
var
column = rng.rand((encoded.blocksCount div encoded.steps) - 1) # random column
dropped: seq[int]
for _ in 0 ..< encoded.ecM + 1:
dropped.add(column)
(await store.delBlock(encoded.treeCid, column)).tryGet()
(await store.delBlock(manifest.treeCid, column)).tryGet()
column = (column + encoded.steps) mod encoded.blocksCount # wrap around
var decoded: Manifest
expect ResultFailure:
decoded = (await erasure.decode(encoded)).tryGet()
for d in dropped:
let present = await store.hasBlock(manifest.treeCid, d)
check not present.tryGet()
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should tolerate losing M data blocks in M random columns":
const
buffers = 20
parity = 10
2022-05-10 14:10:17 +02:00
let encoded = await encode(buffers, parity)
var
blocks: seq[int]
offset = 0
while offset < encoded.steps - 1:
let blockIdx = toSeq(countup(offset, encoded.blocksCount - 1, encoded.steps))
for _ in 0 ..< encoded.ecM:
blocks.add(rng.sample(blockIdx, blocks))
offset.inc
for idx in blocks:
(await store.delBlock(encoded.treeCid, idx)).tryGet()
(await store.delBlock(manifest.treeCid, idx)).tryGet()
discard
2022-05-10 14:10:17 +02:00
discard (await erasure.decode(encoded)).tryGet()
for d in 0 ..< manifest.blocksCount:
let present = await store.hasBlock(manifest.treeCid, d)
check present.tryGet()
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should not tolerate losing more than M data blocks in M random columns":
const
buffers = 20
parity = 10
2022-05-10 14:10:17 +02:00
let encoded = await encode(buffers, parity)
var
blocks: seq[int]
offset = 0
while offset < encoded.steps:
let blockIdx = toSeq(countup(offset, encoded.blocksCount - 1, encoded.steps))
for _ in 0 ..< encoded.ecM + 1: # NOTE: the +1
2022-04-07 17:08:43 -06:00
var idx: int
while true:
idx = rng.sample(blockIdx, blocks)
let blk = (await store.getBlock(encoded.treeCid, idx)).tryGet()
if not blk.isEmpty:
2022-04-07 17:08:43 -06:00
break
blocks.add(idx)
offset.inc
for idx in blocks:
(await store.delBlock(encoded.treeCid, idx)).tryGet()
(await store.delBlock(manifest.treeCid, idx)).tryGet()
discard
var decoded: Manifest
expect ResultFailure:
decoded = (await erasure.decode(encoded)).tryGet()
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should tolerate losing M (a.k.a row) contiguous data blocks":
const
buffers = 20
parity = 10
2022-05-10 14:10:17 +02:00
let encoded = await encode(buffers, parity)
# loose M original (systematic) symbols/blocks
for b in 0 ..< (encoded.steps * encoded.ecM):
(await store.delBlock(encoded.treeCid, b)).tryGet()
(await store.delBlock(manifest.treeCid, b)).tryGet()
2022-05-10 14:10:17 +02:00
discard (await erasure.decode(encoded)).tryGet()
for d in 0 ..< manifest.blocksCount:
let present = await store.hasBlock(manifest.treeCid, d)
check present.tryGet()
Speed up development (#303) * [ci] Extract setup of nimbus build system into separate file * [ci] Cleanup NodeJS setup * [ci] Use amd64 by default * [ci] Separate steps for unit, contract and integration tests * [ci] Separate build job for coverage that is allowed to fail * [ci] Separate build job for Nim 1.2 * [ci] Calculate coverage on unit tests only This enables faster development cycles at the expense of code coverage accuracy. * [PoR] Test proofs of retrievability with a small block size To get a reasonable test execution time. * [ci] Set NIM_COMMIT environment variable To prevent subsequent `make` command to build a different version of Nim. * [ci] Checkout submodules recursively So that nimbus-build-system doesn't have to do it afterwards. * [ci] Update caches on every run Will automatically update caches when a new minor version of Nim is released. See also: https://github.com/actions/cache/blob/main/workarounds.md#update-a-cache * [ci] Set cache_nonce for all jobs, not just those in the matrix * [build] update to latest nimbus-build-system Requires an update to latest nim-nat-traversal * [erasure] Test erasure coding with a small block size To get a reasonable test execution time. * [erasure] fix typo * [PoR] Test PoR store with a small block size To get a reasonable test execution time. * [PoR] Test PoR network with a small block size To get a reasonable test execution time. * [ci] Ensure that unit tests are not dependent on Ethereum Start Ethereum node *after* the unit tests are run. * [ci] Cancel all other jobs when one fails Allows for faster feedback loops; a next attempt to run the CI will start sooner.
2022-11-14 08:50:00 -05:00
test "Should tolerate losing M (a.k.a row) contiguous parity blocks":
const
buffers = 20
parity = 10
let
encoded = await encode(buffers, parity)
blocks = collect:
for i in 0 .. encoded.blocksCount:
i
# loose M parity (all!) symbols/blocks from the dataset
for b in blocks[^(encoded.steps * encoded.ecM) ..^ 1]:
(await store.delBlock(encoded.treeCid, b)).tryGet()
(await store.delBlock(manifest.treeCid, b)).tryGet()
2022-05-10 14:10:17 +02:00
discard (await erasure.decode(encoded)).tryGet()
for d in 0 ..< manifest.blocksCount:
let present = await store.hasBlock(manifest.treeCid, d)
check present.tryGet()
test "Handles edge case of 0 parity blocks":
const
buffers = 20
parity = 0
2022-05-10 14:10:17 +02:00
let encoded = await encode(buffers, parity)
2022-05-10 14:10:17 +02:00
discard (await erasure.decode(encoded)).tryGet()
test "Should concurrently encode/decode multiple datasets":
const iterations = 2
let
datasetSize = 1.MiBs
ecK = 10.Natural
ecM = 10.Natural
var encodeTasks = newSeq[Future[?!Manifest]]()
var decodeTasks = newSeq[Future[?!Manifest]]()
var manifests = newSeq[Manifest]()
for i in 0 ..< iterations:
let
# create random data and store it
blockSize = rng.sample(@[1, 2, 4, 8, 16, 32, 64].mapIt(it.KiBs))
chunker = RandomChunker.new(rng, size = datasetSize, chunkSize = blockSize)
manifest = await storeDataGetManifest(store, chunker)
manifests.add(manifest)
# encode the data concurrently
encodeTasks.add(erasure.encode(manifest, ecK, ecM))
# wait for all encoding tasks to finish
let encodeResults = await allFinished(encodeTasks)
# decode the data concurrently
for i in 0 ..< encodeResults.len:
decodeTasks.add(erasure.decode(encodeResults[i].read().tryGet()))
# wait for all decoding tasks to finish
let decodeResults = await allFinished(decodeTasks) # TODO: use allFutures
for j in 0 ..< decodeTasks.len:
let
decoded = decodeResults[j].read().tryGet()
encoded = encodeResults[j].read().tryGet()
check:
decoded.treeCid == manifests[j].treeCid
decoded.treeCid == encoded.originalTreeCid
decoded.blocksCount == encoded.originalBlocksCount
test "Should handle verifiable manifests":
const
buffers = 20
parity = 10
let
encoded = await encode(buffers, parity)
slotCids = collect(newSeq):
for i in 0 ..< encoded.numSlots:
Cid.example
verifiable = Manifest.new(encoded, Cid.example, slotCids).tryGet()
decoded = (await erasure.decode(verifiable)).tryGet()
check:
decoded.treeCid == manifest.treeCid
decoded.treeCid == verifiable.originalTreeCid
decoded.blocksCount == verifiable.originalBlocksCount
for i in 1 .. 5:
test "Should encode/decode using various parameters " & $i & "/5":
let
blockSize = rng.sample(@[1, 2, 4, 8, 16, 32, 64].mapIt(it.KiBs))
datasetSize = 1.MiBs
ecK = 10.Natural
ecM = 10.Natural
let
chunker = RandomChunker.new(rng, size = datasetSize, chunkSize = blockSize)
manifest = await storeDataGetManifest(store, chunker)
encoded = (await erasure.encode(manifest, ecK, ecM)).tryGet()
decoded = (await erasure.decode(encoded)).tryGet()
check:
decoded.treeCid == manifest.treeCid
decoded.treeCid == encoded.originalTreeCid
decoded.blocksCount == encoded.originalBlocksCount
test "Should complete encode/decode task when cancelled":
let
blocksLen = 10000
parityLen = 10
data = seq[seq[byte]].new()
chunker = RandomChunker.new(
rng, size = (blocksLen * BlockSize.int), chunkSize = BlockSize
)
data[].setLen(blocksLen)
for i in 0 ..< blocksLen:
let chunk = await chunker.getBytes()
shallowCopy(data[i], @(chunk))
let
parity = createDoubleArray(parityLen, BlockSize.int)
paritySeq = seq[seq[byte]].new()
recovered = createDoubleArray(blocksLen, BlockSize.int)
cancelledTaskParity = createDoubleArray(parityLen, BlockSize.int)
cancelledTaskRecovered = createDoubleArray(blocksLen, BlockSize.int)
paritySeq[].setLen(parityLen)
defer:
freeDoubleArray(parity, parityLen)
freeDoubleArray(cancelledTaskParity, parityLen)
freeDoubleArray(recovered, blocksLen)
freeDoubleArray(cancelledTaskRecovered, blocksLen)
for i in 0 ..< parityLen:
paritySeq[i] = cast[seq[byte]](parity[i])
# call encodeAsync to get the parity
let encFut =
await erasure.encodeAsync(BlockSize.int, blocksLen, parityLen, data, parity)
check encFut.isOk
let decFut = await erasure.decodeAsync(
BlockSize.int, blocksLen, parityLen, data, paritySeq, recovered
)
check decFut.isOk
# call encodeAsync and cancel the task
let encodeFut = erasure.encodeAsync(
BlockSize.int, blocksLen, parityLen, data, cancelledTaskParity
)
encodeFut.cancel()
try:
discard await encodeFut
except CatchableError as exc:
check exc of CancelledError
finally:
for i in 0 ..< parityLen:
check equalMem(parity[i], cancelledTaskParity[i], BlockSize.int)
# call decodeAsync and cancel the task
let decodeFut = erasure.decodeAsync(
BlockSize.int, blocksLen, parityLen, data, paritySeq, cancelledTaskRecovered
)
decodeFut.cancel()
try:
discard await decodeFut
except CatchableError as exc:
check exc of CancelledError
finally:
for i in 0 ..< blocksLen:
check equalMem(recovered[i], cancelledTaskRecovered[i], BlockSize.int)