Feature: handle seq[byte] and strings using openArray's and memory retention (#7)

Limited support for passing seq's of bytes and strings to tasklets using openArray
This commit is contained in:
Jaremy Creechley 2024-02-16 17:13:59 -07:00 committed by GitHub
parent 44d54bd4b3
commit 61ff2594d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 329 additions and 91 deletions

View File

@ -13,3 +13,4 @@ requires "threading"
requires "taskpools >= 0.0.5"
requires "chronicles"
include "build.nims"

8
build.nims Normal file
View File

@ -0,0 +1,8 @@
import std/[os, strutils]
task test, "unit tests":
for file in listFiles("tests"):
let name = file.splitPath().tail
if name.startsWith("t") and name.endsWith(".nim"):
exec "nim c -r " & file

View File

@ -1,2 +1,4 @@
--threads:on
include "build.nims"

View File

@ -1,3 +1,2 @@
import apatheia/tasks
export tasks

View File

@ -2,6 +2,7 @@ import std/tables
import std/macros
import ./queues
import ./memretainers
import taskpools
import chronos
@ -17,37 +18,55 @@ logScope:
## This module provides a simple way to submit jobs to taskpools
## and getting a result returned via an async future.
##
## Any compatible arguments of `seq[T]` or `string` args passed
## via the `submit` macro will be converted into the special `OpenArrayHolder[T]` type.
## The `submit` macro converts these arguments in this object and retains the
## memory associated with the original `seq[T]` or `string` object.
## This greatly simplifies the passing of these these types in `refc`.
##
## Note, for `arc` or `orc` GC's this setup will be replaced with a move operation in the future.
## These GC's also allow greater support for moving GC types across thread boundaries.
##
## Currently this module limits support for GC types to ensure `refc` safety.
##
type
JobId* = uint ## job id, should match `future.id()`
JobQueue*[T] = ref object
## job queue object
JobQueue*[T] = ref object ## job queue object
queue*: SignalQueue[(JobId, T)]
futures*: Table[JobId, Future[T]]
taskpool*: Taskpool
running*: bool
JobResult*[T] = object
## hold a job result to be returned by jobs
JobResult*[T] = object ## hold the result of a job after it finishes
id*: JobId
queue*: SignalQueue[(JobId, T)]
OpenArrayHolder*[T] = object
data*: ptr UncheckedArray[T]
size*: int
SupportedSeqTypes* = byte | SomeInteger | SomeFloat
template toOpenArray*[T](arr: OpenArrayHolder[T]): auto =
system.toOpenArray(arr.data, 0, arr.size)
func jobId*[T](fut: Future[T]): JobId =
JobId fut.id()
proc processJobs*[T](jobs: JobQueue[T]) {.async.} =
## Starts a "detached" async processor for a given job queue.
##
## This processor waits for events from the queue in the JobQueue
## and complete the associated futures.
## and completes the associated future.
const tn: string = $(JobQueue[T])
info "Processing jobs in job queue for type ", type = tn
while jobs.running:
let res = await(jobs.queue.wait()).get()
trace "got job result", jobResult = $res
let (id, ret) = res
var fut: Future[T]
if jobs.futures.pop(id, fut):
let (id, ret) = await(jobs.queue.wait()).get()
trace "got job result", jobId = id
releaseMemory(id) # always release any retained memory
if (var fut: Future[T]; jobs.futures.pop(id, fut)):
if not fut.finished():
fut.complete(ret)
else:
raise newException(IndexDefect, "missing future: " & $id)
@ -56,35 +75,114 @@ proc processJobs*[T](jobs: JobQueue[T]) {.async.} =
proc createFuture*[T](jobs: JobQueue[T], name: static string): (JobResult[T], Future[T]) =
## Creates a future that returns the result of the associated job.
let fut = newFuture[T](name)
let id = JobId fut.id()
let id = fut.jobId()
jobs.futures[id] = fut
trace "jobs added: ", numberJobs = jobs.futures.len()
return (JobResult[T](id: id, queue: jobs.queue), fut, )
trace "job added: ", numberJobs = jobs.futures.len()
return (JobResult[T](id: id, queue: jobs.queue), fut)
proc newJobQueue*[T](maxItems: int = 0, taskpool: Taskpool = Taskpool.new()): JobQueue[T] {.raises: [ApatheiaSignalErr].} =
proc newJobQueue*[T](
maxItems: int = 0, taskpool: Taskpool = Taskpool.new()
): JobQueue[T] {.raises: [ApatheiaSignalErr].} =
## Creates a new async-compatible threaded job queue.
result = JobQueue[T](queue: newSignalQueue[(uint, T)](maxItems), taskpool: taskpool, running: true)
result = JobQueue[T](
queue: newSignalQueue[(uint, T)](maxItems), taskpool: taskpool, running: true
)
asyncSpawn(processJobs(result))
template checkJobArgs*[T](exp: seq[T], fut: untyped): OpenArrayHolder[T] =
when T is SupportedSeqTypes:
let rval = SeqRetainer[T](data: exp)
retainMemory(fut.jobId(), rval)
let expPtr = OpenArrayHolder[T](
data: cast[ptr UncheckedArray[T]](unsafeAddr(rval.data[0])), size: rval.data.len()
)
expPtr
else:
{.error: "unsupported sequence type for job argument: " & $typeof(seq[T]).}
template checkJobArgs*(exp: string, fut: untyped): OpenArrayHolder[char] =
let rval = StrRetainer(data: exp)
retainMemory(fut.jobId(), rval)
let expPtr = OpenArrayHolder[char](
data: cast[ptr UncheckedArray[char]](unsafeAddr(rval.data[0])), size: rval.data.len()
)
expPtr
template checkJobArgs*(exp: typed, fut: untyped): auto =
exp
macro submitMacro(tp: untyped, jobs: untyped, exp: untyped): untyped =
## modifies the call expression to include the job queue and
## the job id parameters
let jobRes = genSym(nskLet, "jobRes")
let futName = genSym(nskLet, "fut")
let jobRes = ident("jobRes")
let futName = ident("fut")
let nm = newLit(repr(exp))
var argids = newSeq[NimNode]()
var letargs = nnkLetSection.newTree()
for i, p in exp[1 ..^ 1]:
let id = ident "arg" & $i
argids.add(id)
let pn = nnkCall.newTree(ident"checkJobArgs", p, `futName`)
letargs.add nnkIdentDefs.newTree(id, newEmptyNode(), pn)
var fncall = nnkCall.newTree(exp[0])
fncall.add(jobRes)
for p in exp[1..^1]: fncall.add(p)
for p in argids:
fncall.add(p)
result = quote do:
result = quote:
block:
let (`jobRes`, `futName`) = createFuture(`jobs`, `nm`)
`letargs`
when typeof(`fncall`) isnot void:
{.
error:
"Apatheia jobs cannot return values. The given proc returns type: " &
$(typeof(`fncall`)) & " for call " & astToStr(`fncall`)
.}
`jobs`.taskpool.spawn(`fncall`)
`futName`
# echo "submit: res:\n", result.repr
# echo ""
when isMainModule:
echo "\nSUBMIT MACRO::\n", result.repr
echo ""
template submit*[T](jobs: JobQueue[T], exp: untyped): Future[T] =
submitMacro(T, jobs, exp)
when isMainModule:
import os
import chronos/threadsync
import chronos/unittest2/asynctests
proc addNumValues(
jobResult: JobResult[float], base: float, vals: OpenArrayHolder[float]
) =
os.sleep(100)
var res = base
for x in vals.toOpenArray():
res += x
discard jobResult.queue.send((jobResult.id, res))
proc addStrings(jobResult: JobResult[float], vals: OpenArrayHolder[string]) =
discard
suite "async tests":
var tp = Taskpool.new(num_threads = 2) # Default to the number of hardware threads.
asyncTest "basic openarray":
var jobs = newJobQueue[float](taskpool = tp)
let job = jobs.submit(addNumValues(10.0, @[1.0.float, 2.0]))
let res = await job
check res == 13.0
asyncTest "don't compile":
check not compiles(
block:
var jobs = newJobQueue[float](taskpool = tp)
let job = jobs.submit(addStrings(@["a", "b", "c"]))
)

View File

@ -3,22 +3,19 @@ import std/[tables, strutils, typetraits, macros]
proc makeProcName*(s: string): string =
result = ""
for c in s:
if c.isAlphaNumeric: result.add c
if c.isAlphaNumeric:
result.add c
proc hasReturnType*(params: NimNode): bool =
if params != nil and params.len > 0 and params[0] != nil and
params[0].kind != nnkEmpty:
if params != nil and params.len > 0 and params[0] != nil and params[0].kind != nnkEmpty:
result = true
proc getReturnType*(params: NimNode): NimNode =
if params != nil and params.len > 0 and params[0] != nil and
params[0].kind != nnkEmpty:
if params != nil and params.len > 0 and params[0] != nil and params[0].kind != nnkEmpty:
result = params[0]
proc firstArgument*(params: NimNode): (NimNode, NimNode) =
if params != nil and
params.len > 0 and
params[1] != nil and
if params != nil and params.len > 0 and params[1] != nil and
params[1].kind == nnkIdentDefs:
result = (ident params[1][0].strVal, params[1][1])
else:
@ -35,9 +32,7 @@ iterator paramsIter*(params: NimNode): tuple[name, ntype: NimNode] =
proc signalTuple*(sig: NimNode): NimNode =
let otp = nnkEmpty.newTree()
# echo "signalObjRaw:sig1: ", sig.treeRepr
let sigTyp =
if sig.kind == nnkSym: sig.getTypeInst
else: sig.getTypeInst
let sigTyp = if sig.kind == nnkSym: sig.getTypeInst else: sig.getTypeInst
# echo "signalObjRaw:sig2: ", sigTyp.treeRepr
let stp =
if sigTyp.kind == nnkProcTy:
@ -57,7 +52,9 @@ proc signalTuple*(sig: NimNode): NimNode =
result = nnkTupleConstr.newTree()
if isGeneric:
template genArgs(n): auto = n[1][1]
template genArgs(n): auto =
n[1][1]
var genKinds: Table[string, NimNode]
for i in 1 ..< stp.genArgs.len:
genKinds[repr stp.genArgs[i]] = otp[i]
@ -72,19 +69,20 @@ proc signalTuple*(sig: NimNode): NimNode =
# echo ""
if result.len == 0:
# result = bindSym"void"
result = quote do:
result = quote:
tuple[]
proc mkParamsVars*(paramsIdent, paramsType, params: NimNode): NimNode =
## Create local variables for each parameter in the actual RPC call proc
if params.isNil: return
if params.isNil:
return
result = newStmtList()
var varList = newSeq[NimNode]()
var cnt = 0
for paramid, paramType in paramsIter(params):
let idx = newIntLitNode(cnt)
let vars = quote do:
let vars = quote:
var `paramid`: `paramType` = `paramsIdent`[`idx`]
varList.add vars
cnt.inc()
@ -106,9 +104,10 @@ proc mkParamsType*(paramsIdent, paramsType, params, genericParams: NimNode): Nim
##
## proc multiplyrpc(params: RpcType_multiplyrpc): int =
##
if params.isNil: return
if params.isNil:
return
var tup = quote do:
var tup = quote:
type `paramsType` = tuple[]
for paramIdent, paramType in paramsIter(params):
# processing multiple variables of one type
@ -129,7 +128,8 @@ proc procIdentAppend*(id: NimNode, name: string): NimNode =
proc mkCall*(callName, params: NimNode): NimNode =
## Create local variables for each parameter in the actual RPC call proc
if params.isNil: return
if params.isNil:
return
var argList = newSeq[NimNode]()
for paramId, paramType in paramsIter(params):
argList.add paramId
@ -138,9 +138,10 @@ proc mkCall*(callName, params: NimNode): NimNode =
proc mkProc*(name, params, body: NimNode): NimNode =
let args = params.copyNimTree()
result = quote do:
result = quote:
proc `name`() {.nimcall.} =
`body`
result[3].del(0)
for arg in args:
result.params.add arg

View File

@ -0,0 +1,30 @@
import std/tables
import ./types
export types
type
Retainer* = ref object of RootObj
SeqRetainer*[T] = ref object of Retainer
data*: seq[T]
StrRetainer* = ref object of Retainer
data*: string
var memoryRetainerTable = newTable[uint, seq[Retainer]]()
proc retainMemory*(id: JobId, mem: Retainer) {.gcsafe, raises: [].} =
{.cast(gcsafe).}:
memoryRetainerTable[].withValue(id, value):
value[].add(mem)
do:
memoryRetainerTable[id] = @[mem]
proc releaseMemory*(id: JobId) {.gcsafe, raises: [].} =
{.cast(gcsafe).}:
memoryRetainerTable.del(id)
proc retainedMemoryCount*(): int {.gcsafe, raises: [].} =
{.cast(gcsafe).}:
memoryRetainerTable.len()

View File

@ -10,14 +10,12 @@ export options
export threadsync
export chronos
type
ChanPtr[T] = ptr Channel[T]
type ChanPtr[T] = ptr Channel[T]
proc allocSharedChannel[T](): ChanPtr[T] =
cast[ChanPtr[T]](allocShared0(sizeof(Channel[T])))
type
SignalQueue*[T] = object
type SignalQueue*[T] = object
signal: ThreadSignalPtr
chan*: ChanPtr[T]
@ -26,7 +24,9 @@ proc dispose*[T](val: SignalQueue[T]) =
deallocShared(val.chan)
discard val.signal.close()
proc newSignalQueue*[T](maxItems: int = 0): SignalQueue[T] {.raises: [ApatheiaSignalErr].} =
proc newSignalQueue*[T](
maxItems: int = 0
): SignalQueue[T] {.raises: [ApatheiaSignalErr].} =
## Create a signal queue compatible with Chronos async.
let res = ThreadSignalPtr.new()
if res.isErr():

View File

@ -1,4 +1,3 @@
import std/[macros, strutils]
import macroutils
@ -6,14 +5,38 @@ import macroutils
import jobs
export jobs
# TODO: make these do something useful or remove them
## Tasks provide a convenience wrapper for using the jobs module. It also
## provides some extra conveniences like handling a subset of `openArray[T]`
## types in a safe manner using `OpenArrayHolder[T]` type.
##
## The `asyncTask` macro works by creating a wrapper proc around the
## annotated user proc. The transformation looks similar to:
##
## .. code-block::
## proc doHashes*(data: openArray[byte], opts: HashOptions): float {.asyncTask.} =
## result = 10.0
##
##
## .. code-block::
## proc doHashesTasklet*(data: openArray[byte]; opts: HashOptions): float {.nimcall.} =
## result = 10.0
##
## proc doHashes*(jobResult: JobResult[float]; data: OpenArrayHolder[byte];
## opts: HashOptions) {.nimcall.} =
## let val {.inject.} = doHashesTasklet(convertParamType(data),
## convertParamType(opts))
## discard jobResult.queue.send((jobResult.id, val))
##
## Paramters with type of `openArray[T]` have special support and are converted
## into the `OpenArrayHolder[T]` type from the jobs module. See the jobs module
## for more information.
##
template convertParamType*[T](obj: OpenArrayHolder[T]): auto =
static:
echo "CONVERTPARAMTYPE:: ", $typeof(obj)
obj.toOpenArray()
template checkParamType*(obj: object): auto =
# for name, field in obj.fieldPairs():
# echo "field name: ", name
obj
template checkParamType*(obj: typed): auto =
template convertParamType*(obj: typed): auto =
obj
macro asyncTask*(p: untyped): untyped =
@ -35,28 +58,38 @@ macro asyncTask*(p: untyped): untyped =
error("tasklet definition must have return type", p)
# setup inner tasklet proc
let tp = mkProc(procId.procIdentAppend("Tasklet"),
params, body)
let tp = mkProc(procId.procIdentAppend("Tasklet"), params, body)
# setup async wrapper code
var asyncBody = newStmtList()
let tcall = newCall(ident(name & "Tasklet"))
for paramId, paramType in paramsIter(params):
tcall.add newCall("checkParamType", paramId)
asyncBody = quote do:
tcall.add newCall("convertParamType", paramId)
asyncBody = quote:
let val {.inject.} = `tcall`
discard jobResult.queue.send((jobResult.id, val,))
discard jobResult.queue.send((jobResult.id, val))
let retType =
if not hasReturnType(params):
ident"void"
else:
params.getReturnType()
var asyncParams = params.copyNimTree()
let retType = if not hasReturnType(params): ident"void"
else: params.getReturnType()
let jobArg = nnkIdentDefs.newTree(
ident"jobResult",
nnkBracketExpr.newTree(ident"JobResult", retType),
newEmptyNode()
ident"jobResult", nnkBracketExpr.newTree(ident"JobResult", retType), newEmptyNode()
)
asyncParams[0] = newEmptyNode()
asyncParams.insert(1, jobArg)
var asyncParams = nnkFormalParams.newTree()
asyncParams.add newEmptyNode()
asyncParams.add jobArg
for i, p in params[1 ..^ 1]:
let pt = p[1]
if pt.kind == nnkBracketExpr and pt[0].repr == "openArray":
# special case openArray to support special OpenArrayHolder from jobs module
p[1] = nnkBracketExpr.newTree(ident"OpenArrayHolder", pt[1])
asyncParams.add p
else:
asyncParams.add p
let fn = mkProc(procId, asyncParams, asyncBody)
result = newStmtList()
@ -67,13 +100,9 @@ macro asyncTask*(p: untyped): untyped =
echo "asyncTask:body:\n", result.repr
when isMainModule:
type
HashOptions* = object
type HashOptions* = object
striped*: bool
proc doHashes2*(data: openArray[byte],
opts: HashOptions): float {.asyncTask.} =
proc doHashes*(data: openArray[byte], opts: HashOptions): float {.asyncTask.} =
echo "hashing"
result = 10.0

View File

@ -1,4 +1,5 @@
type
ApatheiaException* = object of CatchableError
ApatheiaSignalErr* = object of ApatheiaException
JobId* = uint ## job id, should match `future.id()`

View File

@ -8,6 +8,7 @@ import taskpools
import apatheia/queues
import apatheia/jobs
import apatheia/memretainers
proc addNumsRaw(a, b: float): float =
os.sleep(50)
@ -17,10 +18,29 @@ proc addNums(jobResult: JobResult[float], a, b: float) =
let res = addNumsRaw(a, b)
discard jobResult.queue.send((jobResult.id, res,))
proc addNumsIncorrect(jobResult: JobResult[float], vals: openArray[float]): float =
discard
proc addNumValues(jobResult: JobResult[float], base: float, vals: OpenArrayHolder[float]) =
os.sleep(100)
var res = base
for x in vals.toOpenArray():
res += x
discard jobResult.queue.send((jobResult.id, res,))
proc strCompute(jobResult: JobResult[int], vals: OpenArrayHolder[char]) =
discard jobResult.queue.send((jobResult.id, vals.size,))
proc addStrings(jobResult: JobResult[float], vals: OpenArrayHolder[string]) =
discard
suite "async tests":
var tp = Taskpool.new(num_threads = 2) # Default to the number of hardware threads.
asyncTest "cannot return value":
check not compiles(await jobs.submit(addNums(1.0, 2.0,)))
asyncTest "test":
var jobs = newJobQueue[float](taskpool = tp)
@ -28,3 +48,30 @@ suite "async tests":
check res == 3.0
asyncTest "testing seq":
var jobs = newJobQueue[float](taskpool = tp)
let res = await jobs.submit(addNumValues(10.0, @[1.0.float, 2.0]))
check res == 13.0
asyncTest "testing string":
var jobs = newJobQueue[int](taskpool = tp)
let res = await jobs.submit(strCompute("hello world!"))
check res == 12
asyncTest "testing arrays":
var jobs = newJobQueue[float](taskpool = tp)
let fut1 = jobs.submit(addNumValues(10.0, @[1.0.float, 2.0]))
let fut2 = jobs.submit(addNumValues(20.0, @[3.0.float, 4.0]))
check retainedMemoryCount() == 2
let res1 = await fut1
let res2 = await fut2
check res1 == 13.0
check res2 == 27.0
check retainedMemoryCount() == 0
asyncTest "don't compile":
check not compiles(
block:
var jobs = newJobQueue[float](taskpool = tp)
let job = jobs.submit(addStrings(@["a", "b", "c"]))
)

View File

@ -6,6 +6,7 @@ import chronos/unittest2/asynctests
import taskpools
import apatheia/tasks
import apatheia/memretainers
proc addNums(a, b: float): float {.asyncTask.} =
os.sleep(50)
@ -17,6 +18,11 @@ proc addNumValues(vals: openArray[float]): float {.asyncTask.} =
for x in vals:
result += x
proc strCompute(val: openArray[char]): int {.asyncTask.} =
## note includes null terminator!
return val.len()
suite "async tests":
var tp = Taskpool.new(num_threads = 2) # Default to the number of hardware threads.
var jobsVar = newJobQueue[float](taskpool = tp)
@ -35,3 +41,19 @@ suite "async tests":
let args = @[1.0, 2.0, 3.0]
let res = await jobs.submit(addNumValues(args))
check res == 6.0
asyncTest "test strCompute":
var jobs = newJobQueue[int](taskpool = tp)
let res = await jobs.submit(strCompute("hello world!"))
check res == 13 # note includes cstring null terminator
asyncTest "testing openArrays":
var jobs = newJobQueue[float](taskpool = tp)
let fut1 = jobs.submit(addNumValues(@[1.0.float, 2.0]))
let fut2 = jobs.submit(addNumValues(@[3.0.float, 4.0]))
check retainedMemoryCount() == 2
let res1 = await fut1
let res2 = await fut2
check res1 == 3.0
check res2 == 7.0
check retainedMemoryCount() == 0