2022-04-05 13:12:59 -06:00
|
|
|
import std/sequtils
|
2023-11-14 10:53:06 -06:00
|
|
|
import std/sugar
|
2024-03-23 10:56:35 +01:00
|
|
|
import std/cpuinfo
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
import pkg/chronos
|
2023-08-21 12:51:04 +10:00
|
|
|
import pkg/datastore
|
2022-04-05 13:12:59 -06:00
|
|
|
import pkg/questionable/results
|
|
|
|
|
2022-05-19 14:56:03 -05:00
|
|
|
import pkg/codex/erasure
|
|
|
|
import pkg/codex/manifest
|
|
|
|
import pkg/codex/stores
|
|
|
|
import pkg/codex/blocktype as bt
|
|
|
|
import pkg/codex/rng
|
2023-11-14 10:53:06 -06:00
|
|
|
import pkg/codex/utils
|
2024-02-07 14:54:57 -06:00
|
|
|
import pkg/codex/indexingstrategy
|
2024-03-23 10:56:35 +01:00
|
|
|
import pkg/taskpools
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2024-01-29 21:03:51 +01:00
|
|
|
import ../asynctest
|
2022-04-05 13:12:59 -06:00
|
|
|
import ./helpers
|
2024-06-26 22:02:39 +02:00
|
|
|
import ./examples
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2024-01-08 16:52:46 -06:00
|
|
|
suite "Erasure encode/decode":
|
2023-07-06 16:23:27 -07:00
|
|
|
const BlockSize = 1024'nb
|
2022-05-10 14:10:17 +02:00
|
|
|
const dataSetSize = BlockSize * 123 # weird geometry
|
|
|
|
|
|
|
|
var rng: Rng
|
|
|
|
var chunker: Chunker
|
|
|
|
var manifest: Manifest
|
|
|
|
var store: BlockStore
|
|
|
|
var erasure: Erasure
|
2024-03-23 10:56:35 +01:00
|
|
|
var taskpool: Taskpool
|
2024-05-30 08:57:10 +02:00
|
|
|
let repoTmp = TempLevelDb.new()
|
|
|
|
let metaTmp = TempLevelDb.new()
|
2022-05-10 14:10:17 +02:00
|
|
|
|
|
|
|
setup:
|
2023-11-14 13:02:17 +01:00
|
|
|
let
|
2024-05-30 08:57:10 +02:00
|
|
|
repoDs = repoTmp.newDb()
|
|
|
|
metaDs = metaTmp.newDb()
|
2022-05-10 14:10:17 +02:00
|
|
|
rng = Rng.instance()
|
|
|
|
chunker = RandomChunker.new(rng, size = dataSetSize, chunkSize = BlockSize)
|
2023-08-21 12:51:04 +10:00
|
|
|
store = RepoStore.new(repoDs, metaDs)
|
2024-03-23 10:56:35 +01:00
|
|
|
taskpool = Taskpool.new(num_threads = countProcessors())
|
|
|
|
erasure = Erasure.new(store, leoEncoderProvider, leoDecoderProvider, taskpool)
|
2023-11-14 13:02:17 +01:00
|
|
|
manifest = await storeDataGetManifest(store, chunker)
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2024-05-30 08:57:10 +02:00
|
|
|
teardown:
|
|
|
|
await repoTmp.destroyDb()
|
|
|
|
await metaTmp.destroyDb()
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
proc encode(buffers, parity: int): Future[Manifest] {.async.} =
|
2022-04-05 13:12:59 -06:00
|
|
|
let
|
|
|
|
encoded = (await erasure.encode(
|
|
|
|
manifest,
|
2024-01-11 10:45:23 -06:00
|
|
|
buffers.Natural,
|
|
|
|
parity.Natural)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
check:
|
2023-11-14 13:02:17 +01:00
|
|
|
encoded.blocksCount mod (buffers + parity) == 0
|
2023-11-14 10:53:06 -06:00
|
|
|
encoded.rounded == roundUp(manifest.blocksCount, buffers)
|
2022-04-05 13:12:59 -06:00
|
|
|
encoded.steps == encoded.rounded div buffers
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
return encoded
|
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should tolerate losing M data blocks in a single random column":
|
2022-05-10 14:10:17 +02:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
|
|
|
let encoded = await encode(buffers, parity)
|
|
|
|
|
2022-04-05 13:12:59 -06:00
|
|
|
var
|
2023-11-14 10:53:06 -06:00
|
|
|
column = rng.rand((encoded.blocksCount div encoded.steps) - 1) # random column
|
2023-11-14 13:02:17 +01:00
|
|
|
dropped: seq[int]
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-03-10 08:02:54 +01:00
|
|
|
for _ in 0..<encoded.ecM:
|
2023-11-14 13:02:17 +01:00
|
|
|
dropped.add(column)
|
|
|
|
(await store.delBlock(encoded.treeCid, column)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, column)).tryGet()
|
2023-11-14 10:53:06 -06:00
|
|
|
column = (column + encoded.steps) mod encoded.blocksCount # wrap around
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
|
|
|
decoded = (await erasure.decode(encoded)).tryGet()
|
|
|
|
|
|
|
|
check:
|
2023-11-14 13:02:17 +01:00
|
|
|
decoded.treeCid == manifest.treeCid
|
|
|
|
decoded.treeCid == encoded.originalTreeCid
|
|
|
|
decoded.blocksCount == encoded.originalBlocksCount
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
for d in dropped:
|
2023-11-14 10:53:06 -06:00
|
|
|
if d < manifest.blocksCount: # we don't support returning parity blocks yet
|
|
|
|
let present = await store.hasBlock(manifest.treeCid, d)
|
|
|
|
check present.tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should not tolerate losing more than M data blocks in a single random column":
|
2022-04-05 13:12:59 -06:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
let encoded = await encode(buffers, parity)
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
2023-11-14 10:53:06 -06:00
|
|
|
column = rng.rand((encoded.blocksCount div encoded.steps) - 1) # random column
|
2023-11-14 13:02:17 +01:00
|
|
|
dropped: seq[int]
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-03-10 08:02:54 +01:00
|
|
|
for _ in 0..<encoded.ecM + 1:
|
2023-11-14 13:02:17 +01:00
|
|
|
dropped.add(column)
|
|
|
|
(await store.delBlock(encoded.treeCid, column)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, column)).tryGet()
|
2023-11-14 10:53:06 -06:00
|
|
|
column = (column + encoded.steps) mod encoded.blocksCount # wrap around
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
|
|
|
decoded: Manifest
|
|
|
|
|
|
|
|
expect ResultFailure:
|
|
|
|
decoded = (await erasure.decode(encoded)).tryGet()
|
|
|
|
|
|
|
|
for d in dropped:
|
2023-11-14 13:02:17 +01:00
|
|
|
let present = await store.hasBlock(manifest.treeCid, d)
|
2022-07-28 03:39:17 +03:00
|
|
|
check not present.tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should tolerate losing M data blocks in M random columns":
|
2022-04-05 13:12:59 -06:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
let encoded = await encode(buffers, parity)
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
|
|
|
blocks: seq[int]
|
|
|
|
offset = 0
|
|
|
|
|
|
|
|
while offset < encoded.steps - 1:
|
|
|
|
let
|
2023-11-14 13:02:17 +01:00
|
|
|
blockIdx = toSeq(countup(offset, encoded.blocksCount - 1, encoded.steps))
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-03-10 08:02:54 +01:00
|
|
|
for _ in 0..<encoded.ecM:
|
2022-04-05 13:12:59 -06:00
|
|
|
blocks.add(rng.sample(blockIdx, blocks))
|
|
|
|
offset.inc
|
|
|
|
|
|
|
|
for idx in blocks:
|
2023-11-14 13:02:17 +01:00
|
|
|
(await store.delBlock(encoded.treeCid, idx)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, idx)).tryGet()
|
|
|
|
discard
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
discard (await erasure.decode(encoded)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-11-14 13:02:17 +01:00
|
|
|
for d in 0..<manifest.blocksCount:
|
|
|
|
let present = await store.hasBlock(manifest.treeCid, d)
|
2022-07-28 03:39:17 +03:00
|
|
|
check present.tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should not tolerate losing more than M data blocks in M random columns":
|
2022-04-05 13:12:59 -06:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
let encoded = await encode(buffers, parity)
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
|
|
|
blocks: seq[int]
|
|
|
|
offset = 0
|
|
|
|
|
2023-11-14 10:53:06 -06:00
|
|
|
while offset < encoded.steps:
|
2022-04-05 13:12:59 -06:00
|
|
|
let
|
2023-11-14 13:02:17 +01:00
|
|
|
blockIdx = toSeq(countup(offset, encoded.blocksCount - 1, encoded.steps))
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-03-10 08:02:54 +01:00
|
|
|
for _ in 0..<encoded.ecM + 1: # NOTE: the +1
|
2022-04-07 17:08:43 -06:00
|
|
|
var idx: int
|
|
|
|
while true:
|
|
|
|
idx = rng.sample(blockIdx, blocks)
|
2023-11-14 13:02:17 +01:00
|
|
|
let blk = (await store.getBlock(encoded.treeCid, idx)).tryGet()
|
|
|
|
if not blk.isEmpty:
|
2022-04-07 17:08:43 -06:00
|
|
|
break
|
|
|
|
|
|
|
|
blocks.add(idx)
|
2022-04-05 13:12:59 -06:00
|
|
|
offset.inc
|
|
|
|
|
|
|
|
for idx in blocks:
|
2023-11-14 13:02:17 +01:00
|
|
|
(await store.delBlock(encoded.treeCid, idx)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, idx)).tryGet()
|
|
|
|
discard
|
2022-04-05 13:12:59 -06:00
|
|
|
|
|
|
|
var
|
|
|
|
decoded: Manifest
|
|
|
|
|
|
|
|
expect ResultFailure:
|
|
|
|
decoded = (await erasure.decode(encoded)).tryGet()
|
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should tolerate losing M (a.k.a row) contiguous data blocks":
|
2022-04-05 13:12:59 -06:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
let encoded = await encode(buffers, parity)
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-11-14 10:53:06 -06:00
|
|
|
# loose M original (systematic) symbols/blocks
|
|
|
|
for b in 0..<(encoded.steps * encoded.ecM):
|
2023-11-14 13:02:17 +01:00
|
|
|
(await store.delBlock(encoded.treeCid, b)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, b)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
discard (await erasure.decode(encoded)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-11-14 13:02:17 +01:00
|
|
|
for d in 0..<manifest.blocksCount:
|
|
|
|
let present = await store.hasBlock(manifest.treeCid, d)
|
2022-07-28 03:39:17 +03:00
|
|
|
check present.tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-11-14 08:50:00 -05:00
|
|
|
test "Should tolerate losing M (a.k.a row) contiguous parity blocks":
|
2022-04-05 13:12:59 -06:00
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
2023-11-14 10:53:06 -06:00
|
|
|
let
|
|
|
|
encoded = await encode(buffers, parity)
|
|
|
|
blocks = collect:
|
|
|
|
for i in 0..encoded.blocksCount:
|
|
|
|
i
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-11-14 10:53:06 -06:00
|
|
|
# loose M parity (all!) symbols/blocks from the dataset
|
|
|
|
for b in blocks[^(encoded.steps * encoded.ecM)..^1]:
|
2023-11-14 13:02:17 +01:00
|
|
|
(await store.delBlock(encoded.treeCid, b)).tryGet()
|
|
|
|
(await store.delBlock(manifest.treeCid, b)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
discard (await erasure.decode(encoded)).tryGet()
|
2022-04-05 13:12:59 -06:00
|
|
|
|
2023-11-14 13:02:17 +01:00
|
|
|
for d in 0..<manifest.blocksCount:
|
|
|
|
let present = await store.hasBlock(manifest.treeCid, d)
|
2022-07-28 03:39:17 +03:00
|
|
|
check present.tryGet()
|
2022-05-10 13:50:22 +02:00
|
|
|
|
|
|
|
test "handles edge case of 0 parity blocks":
|
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 0
|
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
let encoded = await encode(buffers, parity)
|
2022-05-10 13:50:22 +02:00
|
|
|
|
2022-05-10 14:10:17 +02:00
|
|
|
discard (await erasure.decode(encoded)).tryGet()
|
2024-06-26 22:02:39 +02:00
|
|
|
|
|
|
|
test "Should handle verifiable manifests":
|
|
|
|
const
|
|
|
|
buffers = 20
|
|
|
|
parity = 10
|
|
|
|
|
|
|
|
let
|
|
|
|
encoded = await encode(buffers, parity)
|
|
|
|
slotCids = collect(newSeq):
|
|
|
|
for i in 0..<encoded.numSlots: Cid.example
|
|
|
|
|
|
|
|
verifiable = Manifest.new(encoded, Cid.example, slotCids).tryGet()
|
|
|
|
|
|
|
|
decoded = (await erasure.decode(verifiable)).tryGet()
|
|
|
|
|
|
|
|
check:
|
|
|
|
decoded.treeCid == manifest.treeCid
|
|
|
|
decoded.treeCid == verifiable.originalTreeCid
|
|
|
|
decoded.blocksCount == verifiable.originalBlocksCount
|
2024-07-03 16:44:00 +02:00
|
|
|
|
|
|
|
for i in 1..5:
|
|
|
|
test "Should encode/decode using various parameters " & $i & "/5":
|
|
|
|
let
|
|
|
|
blockSize = rng.sample(@[1, 2, 4, 8, 16, 32, 64].mapIt(it.KiBs))
|
|
|
|
datasetSize = 1.MiBs
|
|
|
|
ecK = 10.Natural
|
|
|
|
ecM = 10.Natural
|
|
|
|
|
|
|
|
let
|
|
|
|
chunker = RandomChunker.new(rng, size = datasetSize, chunkSize = blockSize)
|
|
|
|
manifest = await storeDataGetManifest(store, chunker)
|
|
|
|
encoded = (await erasure.encode(manifest, ecK, ecM)).tryGet()
|
|
|
|
decoded = (await erasure.decode(encoded)).tryGet()
|
|
|
|
|
|
|
|
check:
|
|
|
|
decoded.treeCid == manifest.treeCid
|
|
|
|
decoded.treeCid == encoded.originalTreeCid
|
|
|
|
decoded.blocksCount == encoded.originalBlocksCount
|