Fix erasure coding threading bug
This commit is contained in:
parent
1e2ad95659
commit
ffd71fa1e0
|
@ -8,18 +8,5 @@
|
|||
## those terms.
|
||||
|
||||
import ./erasure/erasure
|
||||
import ./erasure/backends/leopard
|
||||
|
||||
export erasure
|
||||
|
||||
func leoEncoderProvider*(
|
||||
size, buffers, parity: int
|
||||
): EncoderBackend {.raises: [Defect].} =
|
||||
## create new Leo Encoder
|
||||
LeoEncoderBackend.new(size, buffers, parity)
|
||||
|
||||
func leoDecoderProvider*(
|
||||
size, buffers, parity: int
|
||||
): DecoderBackend {.raises: [Defect].} =
|
||||
## create new Leo Decoder
|
||||
LeoDecoderBackend.new(size, buffers, parity)
|
||||
|
|
|
@ -14,6 +14,7 @@ import pkg/taskpools/flowvars
|
|||
import pkg/chronos
|
||||
import pkg/chronos/threadsync
|
||||
import pkg/questionable/results
|
||||
import pkg/leopard
|
||||
|
||||
import ./backend
|
||||
import ../errors
|
||||
|
@ -29,169 +30,109 @@ const
|
|||
type
|
||||
EncoderBackendPtr = ptr EncoderBackend
|
||||
DecoderBackendPtr = ptr DecoderBackend
|
||||
DecoderPtr = ptr LeoDecoder
|
||||
EncoderPtr = ptr LeoEncoder
|
||||
|
||||
# Args objects are missing seq[seq[byte]] field, to avoid unnecessary data copy
|
||||
EncodeTaskArgs = object
|
||||
signal: ThreadSignalPtr
|
||||
backend: EncoderBackendPtr
|
||||
blockSize: int
|
||||
ecM: int
|
||||
encoder: EncoderPtr
|
||||
|
||||
DecodeTaskArgs = object
|
||||
signal: ThreadSignalPtr
|
||||
backend: DecoderBackendPtr
|
||||
blockSize: int
|
||||
ecK: int
|
||||
decoder: DecoderPtr
|
||||
|
||||
SharedArrayHolder*[T] = object
|
||||
data: ptr UncheckedArray[T]
|
||||
size: int
|
||||
|
||||
EncodeTaskResult = Result[SharedArrayHolder[byte], cstring]
|
||||
DecodeTaskResult = Result[SharedArrayHolder[byte], cstring]
|
||||
|
||||
proc encodeTask(args: EncodeTaskArgs, data: seq[seq[byte]]): EncodeTaskResult =
|
||||
var
|
||||
data = data.unsafeAddr
|
||||
parity = newSeqWith[seq[byte]](args.ecM, newSeq[byte](args.blockSize))
|
||||
TaskResult = Result[void, cstring]
|
||||
|
||||
proc encodeTask(args: EncodeTaskArgs): TaskResult =
|
||||
try:
|
||||
let res = args.backend[].encode(data[], parity)
|
||||
|
||||
if res.isOk:
|
||||
let
|
||||
resDataSize = parity.len * args.blockSize
|
||||
resData = cast[ptr UncheckedArray[byte]](allocShared0(resDataSize))
|
||||
arrHolder = SharedArrayHolder[byte](
|
||||
data: resData,
|
||||
size: resDataSize
|
||||
)
|
||||
|
||||
for i in 0..<parity.len:
|
||||
copyMem(addr resData[i * args.blockSize], addr parity[i][0], args.blockSize)
|
||||
|
||||
return ok(arrHolder)
|
||||
else:
|
||||
return err(res.error)
|
||||
except CatchableError as exception:
|
||||
return err(exception.msg.cstring)
|
||||
return args.encoder[].encodePrepared()
|
||||
finally:
|
||||
if err =? args.signal.fireSync().mapFailure.errorOption():
|
||||
error "Error firing signal", msg = err.msg
|
||||
|
||||
proc decodeTask(args: DecodeTaskArgs, data: seq[seq[byte]], parity: seq[seq[byte]]): DecodeTaskResult =
|
||||
var
|
||||
data = data.unsafeAddr
|
||||
parity = parity.unsafeAddr
|
||||
recovered = newSeqWith[seq[byte]](args.ecK, newSeq[byte](args.blockSize))
|
||||
|
||||
proc decodeTask(args: DecodeTaskArgs): TaskResult =
|
||||
try:
|
||||
let res = args.backend[].decode(data[], parity[], recovered)
|
||||
|
||||
if res.isOk:
|
||||
let
|
||||
resDataSize = recovered.len * args.blockSize
|
||||
resData = cast[ptr UncheckedArray[byte]](allocShared0(resDataSize))
|
||||
arrHolder = SharedArrayHolder[byte](
|
||||
data: resData,
|
||||
size: resDataSize
|
||||
)
|
||||
|
||||
for i in 0..<recovered.len:
|
||||
copyMem(addr resData[i * args.blockSize], addr recovered[i][0], args.blockSize)
|
||||
|
||||
return ok(arrHolder)
|
||||
else:
|
||||
return err(res.error)
|
||||
except CatchableError as exception:
|
||||
return err(exception.msg.cstring)
|
||||
return args.decoder[].decodePrepared()
|
||||
finally:
|
||||
if err =? args.signal.fireSync().mapFailure.errorOption():
|
||||
error "Error firing signal", msg = err.msg
|
||||
|
||||
proc proxySpawnEncodeTask(
|
||||
tp: Taskpool,
|
||||
args: EncodeTaskArgs,
|
||||
data: ref seq[seq[byte]]
|
||||
): Flowvar[EncodeTaskResult] =
|
||||
# FIXME Uncomment the code below after addressing an issue:
|
||||
# https://github.com/codex-storage/nim-codex/issues/854
|
||||
|
||||
# tp.spawn encodeTask(args, data[])
|
||||
|
||||
let fv = EncodeTaskResult.newFlowVar
|
||||
fv.readyWith(encodeTask(args, data[]))
|
||||
return fv
|
||||
args: EncodeTaskArgs
|
||||
): Flowvar[TaskResult] =
|
||||
tp.spawn encodeTask(args)
|
||||
|
||||
proc proxySpawnDecodeTask(
|
||||
tp: Taskpool,
|
||||
args: DecodeTaskArgs,
|
||||
data: ref seq[seq[byte]],
|
||||
parity: ref seq[seq[byte]]
|
||||
): Flowvar[DecodeTaskResult] =
|
||||
# FIXME Uncomment the code below after addressing an issue:
|
||||
# https://github.com/codex-storage/nim-codex/issues/854
|
||||
|
||||
# tp.spawn decodeTask(args, data[], parity[])
|
||||
args: DecodeTaskArgs
|
||||
): Flowvar[TaskResult] =
|
||||
tp.spawn decodeTask(args)
|
||||
|
||||
let fv = DecodeTaskResult.newFlowVar
|
||||
fv.readyWith(decodeTask(args, data[], parity[]))
|
||||
return fv
|
||||
|
||||
proc awaitResult[T](signal: ThreadSignalPtr, handle: Flowvar[T]): Future[?!T] {.async.} =
|
||||
proc awaitTaskResult(signal: ThreadSignalPtr, handle: Flowvar[TaskResult]): Future[?!void] {.async.} =
|
||||
await wait(signal)
|
||||
|
||||
var
|
||||
res: T
|
||||
res: TaskResult
|
||||
awaitTotal: Duration
|
||||
while awaitTotal < CompletitionTimeout:
|
||||
if handle.tryComplete(res):
|
||||
return success(res)
|
||||
if handle.tryComplete(res):
|
||||
if res.isOk:
|
||||
return success()
|
||||
else:
|
||||
awaitTotal += CompletitionRetryDelay
|
||||
await sleepAsync(CompletitionRetryDelay)
|
||||
return failure($res.error)
|
||||
else:
|
||||
awaitTotal += CompletitionRetryDelay
|
||||
await sleepAsync(CompletitionRetryDelay)
|
||||
|
||||
return failure("Task signaled finish but didn't return any result within " & $CompletitionRetryDelay)
|
||||
|
||||
proc asyncEncode*(
|
||||
tp: Taskpool,
|
||||
backend: EncoderBackend,
|
||||
encoder: sink LeoEncoder,
|
||||
data: ref seq[seq[byte]],
|
||||
blockSize: int,
|
||||
ecM: int
|
||||
): Future[?!ref seq[seq[byte]]] {.async.} =
|
||||
if ecM == 0:
|
||||
return success(seq[seq[byte]].new())
|
||||
|
||||
without signal =? ThreadSignalPtr.new().mapFailure, err:
|
||||
return failure(err)
|
||||
|
||||
try:
|
||||
let
|
||||
blockSize = data[0].len
|
||||
args = EncodeTaskArgs(signal: signal, backend: unsafeAddr backend, blockSize: blockSize, ecM: ecM)
|
||||
handle = proxySpawnEncodeTask(tp, args, data)
|
||||
|
||||
without res =? await awaitResult(signal, handle), err:
|
||||
if err =? encoder.prepareEncode(data[]).mapFailure.errorOption():
|
||||
return failure(err)
|
||||
|
||||
if res.isOk:
|
||||
var parity = seq[seq[byte]].new()
|
||||
parity[].setLen(ecM)
|
||||
let
|
||||
args = EncodeTaskArgs(signal: signal, encoder: addr encoder)
|
||||
handle = proxySpawnEncodeTask(tp, args)
|
||||
|
||||
for i in 0..<parity[].len:
|
||||
parity[i] = newSeq[byte](blockSize)
|
||||
copyMem(addr parity[i][0], addr res.value.data[i * blockSize], blockSize)
|
||||
if err =? (await awaitTaskResult(signal, handle)).errorOption():
|
||||
return failure(err)
|
||||
|
||||
var parity = seq[seq[byte]].new()
|
||||
parity[].setLen(ecM)
|
||||
|
||||
deallocShared(res.value.data)
|
||||
for i in 0..<parity[].len:
|
||||
parity[i] = newSeq[byte](blockSize)
|
||||
|
||||
return success(parity)
|
||||
else:
|
||||
return failure($res.error)
|
||||
if err =? encoder.readParity(parity[]).mapFailure.errorOption():
|
||||
return failure(err)
|
||||
|
||||
return success(parity)
|
||||
finally:
|
||||
if err =? signal.close().mapFailure.errorOption():
|
||||
error "Error closing signal", msg = $err.msg
|
||||
|
||||
proc asyncDecode*(
|
||||
tp: Taskpool,
|
||||
backend: DecoderBackend,
|
||||
decoder: sink LeoDecoder,
|
||||
data, parity: ref seq[seq[byte]],
|
||||
blockSize: int
|
||||
): Future[?!ref seq[seq[byte]]] {.async.} =
|
||||
|
@ -199,27 +140,25 @@ proc asyncDecode*(
|
|||
return failure(err)
|
||||
|
||||
try:
|
||||
let
|
||||
ecK = data[].len
|
||||
args = DecodeTaskArgs(signal: signal, backend: unsafeAddr backend, blockSize: blockSize, ecK: ecK)
|
||||
handle = proxySpawnDecodeTask(tp, args, data, parity)
|
||||
|
||||
without res =? await awaitResult(signal, handle), err:
|
||||
if err =? decoder.prepareDecode(data[], parity[]).mapFailure.errorOption():
|
||||
return failure(err)
|
||||
|
||||
if res.isOk:
|
||||
var recovered = seq[seq[byte]].new()
|
||||
recovered[].setLen(ecK)
|
||||
let
|
||||
args = DecodeTaskArgs(signal: signal, decoder: addr decoder)
|
||||
handle = proxySpawnDecodeTask(tp, args)
|
||||
|
||||
for i in 0..<recovered[].len:
|
||||
recovered[i] = newSeq[byte](blockSize)
|
||||
copyMem(addr recovered[i][0], addr res.value.data[i * blockSize], blockSize)
|
||||
if err =? (await awaitTaskResult(signal, handle)).errorOption():
|
||||
return failure(err)
|
||||
|
||||
deallocShared(res.value.data)
|
||||
var recovered = seq[seq[byte]].new()
|
||||
recovered[].setLen(data[].len)
|
||||
for i in 0..<recovered[].len:
|
||||
recovered[i] = newSeq[byte](blockSize)
|
||||
|
||||
return success(recovered)
|
||||
else:
|
||||
return failure($res.error)
|
||||
if err =? decoder.readDecoded(recovered[]).mapFailure.errorOption():
|
||||
return failure(err)
|
||||
|
||||
return success(recovered)
|
||||
finally:
|
||||
if err =? signal.close().mapFailure.errorOption():
|
||||
error "Error closing signal", msg = $err.msg
|
||||
|
|
|
@ -18,6 +18,7 @@ import pkg/chronos
|
|||
import pkg/libp2p/[multicodec, cid, multihash]
|
||||
import pkg/libp2p/protobuf/minprotobuf
|
||||
import pkg/taskpools
|
||||
import pkg/leopard
|
||||
|
||||
import ../logutils
|
||||
import ../manifest
|
||||
|
@ -31,11 +32,8 @@ import ../errors
|
|||
|
||||
import pkg/stew/byteutils
|
||||
|
||||
import ./backend
|
||||
import ./asyncbackend
|
||||
|
||||
export backend
|
||||
|
||||
logScope:
|
||||
topics = "codex erasure"
|
||||
|
||||
|
@ -63,15 +61,7 @@ type
|
|||
## or any combination there of.
|
||||
##
|
||||
|
||||
EncoderProvider* = proc(size, blocks, parity: int): EncoderBackend
|
||||
{.raises: [Defect], noSideEffect.}
|
||||
|
||||
DecoderProvider* = proc(size, blocks, parity: int): DecoderBackend
|
||||
{.raises: [Defect], noSideEffect.}
|
||||
|
||||
Erasure* = ref object
|
||||
encoderProvider*: EncoderProvider
|
||||
decoderProvider*: DecoderProvider
|
||||
store*: BlockStore
|
||||
taskpool: Taskpool
|
||||
|
||||
|
@ -285,11 +275,13 @@ proc encodeData(
|
|||
|
||||
var
|
||||
cids = seq[Cid].new()
|
||||
encoder = self.encoderProvider(manifest.blockSize.int, params.ecK, params.ecM)
|
||||
emptyBlock = newSeq[byte](manifest.blockSize.int)
|
||||
|
||||
cids[].setLen(params.blocksCount)
|
||||
|
||||
without var encoder =? LeoEncoder.init(manifest.blockSize.int, params.ecK, params.ecM).mapFailure, err:
|
||||
return failure(err)
|
||||
|
||||
try:
|
||||
for step in 0..<params.steps:
|
||||
# TODO: Don't allocate a new seq every time, allocate once and zero out
|
||||
|
@ -349,7 +341,7 @@ proc encodeData(
|
|||
trace "Erasure coding encoding error", exc = exc.msg
|
||||
return failure(exc)
|
||||
finally:
|
||||
encoder.release()
|
||||
encoder.free()
|
||||
|
||||
proc encode*(
|
||||
self: Erasure,
|
||||
|
@ -390,9 +382,11 @@ proc decode*(
|
|||
var
|
||||
cids = seq[Cid].new()
|
||||
recoveredIndices = newSeq[Natural]()
|
||||
decoder = self.decoderProvider(encoded.blockSize.int, encoded.ecK, encoded.ecM)
|
||||
emptyBlock = newSeq[byte](encoded.blockSize.int)
|
||||
|
||||
without var decoder =? LeoDecoder.init(encoded.blockSize.int, encoded.ecK, encoded.ecM).mapFailure, err:
|
||||
return failure(err)
|
||||
|
||||
cids[].setLen(encoded.blocksCount)
|
||||
try:
|
||||
for step in 0..<encoded.steps:
|
||||
|
@ -439,7 +433,7 @@ proc decode*(
|
|||
trace "Erasure coding decoding error", exc = exc.msg
|
||||
return failure(exc)
|
||||
finally:
|
||||
decoder.release()
|
||||
decoder.free()
|
||||
|
||||
without tree =? CodexTree.init(cids[0..<encoded.originalBlocksCount]), err:
|
||||
return failure(err)
|
||||
|
@ -469,14 +463,10 @@ proc stop*(self: Erasure) {.async.} =
|
|||
proc new*(
|
||||
T: type Erasure,
|
||||
store: BlockStore,
|
||||
encoderProvider: EncoderProvider,
|
||||
decoderProvider: DecoderProvider,
|
||||
taskpool: Taskpool): Erasure =
|
||||
## Create a new Erasure instance for encoding and decoding manifests
|
||||
##
|
||||
|
||||
Erasure(
|
||||
store: store,
|
||||
encoderProvider: encoderProvider,
|
||||
decoderProvider: decoderProvider,
|
||||
taskpool: taskpool)
|
||||
|
|
|
@ -253,8 +253,6 @@ proc streamEntireDataset(
|
|||
let
|
||||
erasure = Erasure.new(
|
||||
self.networkStore,
|
||||
leoEncoderProvider,
|
||||
leoDecoderProvider,
|
||||
self.taskpool)
|
||||
without _ =? (await erasure.decode(manifest)), error:
|
||||
error "Unable to erasure decode manifest", manifestCid, exc = error.msg
|
||||
|
@ -433,8 +431,6 @@ proc setupRequest(
|
|||
let
|
||||
erasure = Erasure.new(
|
||||
self.networkStore.localStore,
|
||||
leoEncoderProvider,
|
||||
leoDecoderProvider,
|
||||
self.taskpool)
|
||||
|
||||
without encoded =? (await erasure.encode(manifest, ecK, ecM)), error:
|
||||
|
|
|
@ -82,7 +82,7 @@ asyncchecksuite "Test Node - Host contracts":
|
|||
manifestBlock = bt.Block.new(
|
||||
manifest.encode().tryGet(),
|
||||
codec = ManifestCodec).tryGet()
|
||||
erasure = Erasure.new(store, leoEncoderProvider, leoDecoderProvider, taskpool)
|
||||
erasure = Erasure.new(store, taskpool)
|
||||
|
||||
manifestCid = manifestBlock.cid
|
||||
manifestCidStr = $(manifestCid)
|
||||
|
|
|
@ -141,7 +141,7 @@ asyncchecksuite "Test Node - Basic":
|
|||
|
||||
test "Setup purchase request":
|
||||
let
|
||||
erasure = Erasure.new(store, leoEncoderProvider, leoDecoderProvider, taskpool)
|
||||
erasure = Erasure.new(store, taskpool)
|
||||
manifest = await storeDataGetManifest(localStore, chunker)
|
||||
manifestBlock = bt.Block.new(
|
||||
manifest.encode().tryGet(),
|
||||
|
|
|
@ -40,7 +40,7 @@ suite "Erasure encode/decode":
|
|||
chunker = RandomChunker.new(rng, size = dataSetSize, chunkSize = BlockSize)
|
||||
store = RepoStore.new(repoDs, metaDs)
|
||||
taskpool = Taskpool.new(num_threads = countProcessors())
|
||||
erasure = Erasure.new(store, leoEncoderProvider, leoDecoderProvider, taskpool)
|
||||
erasure = Erasure.new(store, taskpool)
|
||||
manifest = await storeDataGetManifest(store, chunker)
|
||||
|
||||
teardown:
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 895ff24ca6615d577acfb11811cdd5465f596c97
|
||||
Subproject commit 68e691583e83e98f0e23d6b5e4df3354966aa33c
|
Loading…
Reference in New Issue