Fix/clarify single mode for async sync scheduler (#1292)
why: Single mode here means there is only such (single mode) instance activated but multi mode instances for other peers are allowed. Erroneously, multi mode instances were held back waiting while some single mode instance was running which reduced the number of parallel download peers.
This commit is contained in:
parent
e14fd4b96c
commit
21837546c3
|
@ -274,10 +274,10 @@ proc runSingle*(buddy: SnapBuddyRef) {.async.} =
|
||||||
## This peer worker is invoked if the peer-local flag `buddy.ctrl.multiOk`
|
## This peer worker is invoked if the peer-local flag `buddy.ctrl.multiOk`
|
||||||
## is set `false` which is the default mode. This flag is updated by the
|
## is set `false` which is the default mode. This flag is updated by the
|
||||||
## worker when deemed appropriate.
|
## worker when deemed appropriate.
|
||||||
## * For all workers, there can be only one `runSingle()` function active
|
## * For all worker peerss, there can be only one `runSingle()` function
|
||||||
## simultaneously for all worker peers.
|
## active simultaneously.
|
||||||
## * There will be no `runMulti()` function active for the same worker peer
|
## * There will be no `runMulti()` function active for the very same worker
|
||||||
## simultaneously
|
## peer that runs the `runSingle()` function.
|
||||||
## * There will be no `runPool()` iterator active simultaneously.
|
## * There will be no `runPool()` iterator active simultaneously.
|
||||||
##
|
##
|
||||||
## Note that this function runs in `async` mode.
|
## Note that this function runs in `async` mode.
|
||||||
|
@ -329,6 +329,8 @@ proc runPool*(buddy: SnapBuddyRef, last: bool) =
|
||||||
|
|
||||||
# FIXME: This check might not be needed. It will visit *every* node
|
# FIXME: This check might not be needed. It will visit *every* node
|
||||||
# in the hexary trie for checking the account leaves.
|
# in the hexary trie for checking the account leaves.
|
||||||
|
#
|
||||||
|
# Note: This is insane on main net
|
||||||
if buddy.checkAccountsTrieIsComplete(env):
|
if buddy.checkAccountsTrieIsComplete(env):
|
||||||
env.accountsState = HealerDone
|
env.accountsState = HealerDone
|
||||||
|
|
||||||
|
|
|
@ -50,11 +50,11 @@
|
||||||
## This worker peer method is invoked if the peer-local flag
|
## This worker peer method is invoked if the peer-local flag
|
||||||
## `buddy.ctrl.multiOk` is set `false` which is the default mode. This flag
|
## `buddy.ctrl.multiOk` is set `false` which is the default mode. This flag
|
||||||
## is updated by the worker peer when deemed appropriate.
|
## is updated by the worker peer when deemed appropriate.
|
||||||
## + For all workers, there can be only one `runSingle()` function active
|
## + For all worker peerss, there can be only one `runSingle()` function
|
||||||
## simultaneously for all worker peers.
|
## active simultaneously.
|
||||||
## + There will be no `runMulti()` function active for the same worker peer
|
## + There will be no `runMulti()` function active for the very same worker
|
||||||
## simultaneously
|
## peer that runs the `runSingle()` function.
|
||||||
## + There will be no `runPool()` iterator active simultaneously.
|
## + There will be no `runPool()` iterator active.
|
||||||
##
|
##
|
||||||
## Note that this function runs in `async` mode.
|
## Note that this function runs in `async` mode.
|
||||||
##
|
##
|
||||||
|
@ -99,15 +99,28 @@ type
|
||||||
pool: PeerPool ## For starting the system
|
pool: PeerPool ## For starting the system
|
||||||
buddies: ActiveBuddies[S,W] ## LRU cache with worker descriptors
|
buddies: ActiveBuddies[S,W] ## LRU cache with worker descriptors
|
||||||
tickerOk: bool ## Ticker logger
|
tickerOk: bool ## Ticker logger
|
||||||
singleRunLock: bool ## For worker initialisation
|
singleRunLock: bool ## Some single mode runner is activated
|
||||||
monitorLock: bool ## For worker monitor
|
monitorLock: bool ## Monitor mode is activated
|
||||||
activeMulti: int ## Activated runners
|
activeMulti: int ## Number of activated runners in multi-mode
|
||||||
|
|
||||||
RunnerBuddyRef[S,W] = ref object
|
RunnerBuddyRef[S,W] = ref object
|
||||||
## Per worker peer descriptor
|
## Per worker peer descriptor
|
||||||
dsc: RunnerSyncRef[S,W] ## Scheduler descriptor
|
dsc: RunnerSyncRef[S,W] ## Scheduler descriptor
|
||||||
worker: BuddyRef[S,W] ## Worker peer data
|
worker: BuddyRef[S,W] ## Worker peer data
|
||||||
|
|
||||||
|
const
|
||||||
|
execLoopTimeElapsedMin = 50.milliseconds
|
||||||
|
## Minimum elapsed time the event loop needs for a single lap. If it
|
||||||
|
## is faster, asynchroneous sleep seconds are added. in order to avoid
|
||||||
|
## cpu overload.
|
||||||
|
|
||||||
|
execLoopTaskSwitcher = 1.nanoseconds
|
||||||
|
## Asynchroneous waiting time at the end of the exec loop unless some sleep
|
||||||
|
## seconds were added as decribed by `execLoopTimeElapsedMin`, above.
|
||||||
|
|
||||||
|
execLoopPollingTime = 50.milliseconds
|
||||||
|
## Single asynchroneous time interval wait state for event polling
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Private helpers
|
# Private helpers
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
@ -129,67 +142,65 @@ proc workerLoop[S,W](buddy: RunnerBuddyRef[S,W]) {.async.} =
|
||||||
peer = worker.peer
|
peer = worker.peer
|
||||||
|
|
||||||
# Continue until stopped
|
# Continue until stopped
|
||||||
while not worker.ctrl.stopped:
|
block taskExecLoop:
|
||||||
if dsc.monitorLock:
|
while worker.ctrl.running:
|
||||||
await sleepAsync(50.milliseconds)
|
# Enforce minimum time spend on this loop
|
||||||
continue
|
let startMoment = Moment.now()
|
||||||
|
|
||||||
# Invoke `runPool()` over all buddies if requested
|
if dsc.monitorLock:
|
||||||
if ctx.poolMode:
|
discard # suspend some time at the end of loop body
|
||||||
# Grab `monitorLock` (was `false` as checked above) and wait until clear
|
|
||||||
# to run as the only activated instance.
|
# Invoke `runPool()` over all buddies if requested
|
||||||
dsc.monitorLock = true
|
elif ctx.poolMode:
|
||||||
block poolModeExec:
|
# Grab `monitorLock` (was `false` as checked above) and wait until
|
||||||
while 0 < dsc.activeMulti:
|
# clear to run as the only activated instance.
|
||||||
await sleepAsync(50.milliseconds)
|
dsc.monitorLock = true
|
||||||
|
while 0 < dsc.activeMulti or dsc.singleRunLock:
|
||||||
|
await sleepAsync execLoopPollingTime
|
||||||
if worker.ctrl.stopped:
|
if worker.ctrl.stopped:
|
||||||
break poolModeExec
|
dsc.monitorLock = false
|
||||||
while dsc.singleRunLock:
|
break taskExecLoop
|
||||||
await sleepAsync(50.milliseconds)
|
|
||||||
if worker.ctrl.stopped:
|
|
||||||
break poolModeExec
|
|
||||||
var count = dsc.buddies.len
|
var count = dsc.buddies.len
|
||||||
for w in dsc.buddies.nextValues:
|
for w in dsc.buddies.nextValues:
|
||||||
count.dec
|
count.dec
|
||||||
worker.runPool(count == 0)
|
worker.runPool(count == 0)
|
||||||
# End `block poolModeExec`
|
dsc.monitorLock = false
|
||||||
dsc.monitorLock = false
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Rotate connection table so the most used entry is at the top/right
|
else:
|
||||||
# end. So zombies will end up leftish.
|
# Rotate connection table so the most used entry is at the top/right
|
||||||
discard dsc.buddies.lruFetch(peer.hash)
|
# end. So zombies will end up leftish.
|
||||||
|
discard dsc.buddies.lruFetch(peer.hash)
|
||||||
|
|
||||||
# Allow task switch
|
# Multi mode
|
||||||
await sleepAsync(1.milliseconds)
|
if worker.ctrl.multiOk:
|
||||||
if worker.ctrl.stopped:
|
if not dsc.singleRunLock:
|
||||||
break
|
dsc.activeMulti.inc
|
||||||
|
# Continue doing something, work a bit
|
||||||
|
await worker.runMulti()
|
||||||
|
dsc.activeMulti.dec
|
||||||
|
|
||||||
# Multi mode
|
elif dsc.singleRunLock:
|
||||||
if worker.ctrl.multiOk:
|
# Some other process is running single mode
|
||||||
if not dsc.singleRunLock:
|
discard # suspend some time at the end of loop body
|
||||||
dsc.activeMulti.inc
|
|
||||||
# Continue doing something, work a bit
|
|
||||||
await worker.runMulti()
|
|
||||||
dsc.activeMulti.dec
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Single mode as requested. The `multiOk` flag for this worker was just
|
else:
|
||||||
# found `false` in the pervious clause.
|
# Start single instance mode by grabbing `singleRunLock` (was
|
||||||
if not dsc.singleRunLock:
|
# `false` as checked above).
|
||||||
# Lock single instance mode and wait for other workers to finish
|
dsc.singleRunLock = true
|
||||||
dsc.singleRunLock = true
|
await worker.runSingle()
|
||||||
block singleModeExec:
|
dsc.singleRunLock = false
|
||||||
while 0 < dsc.activeMulti:
|
|
||||||
await sleepAsync(50.milliseconds)
|
|
||||||
if worker.ctrl.stopped:
|
|
||||||
break singleModeExec
|
|
||||||
# Run single instance and release afterwards
|
|
||||||
await worker.runSingle()
|
|
||||||
# End `block singleModeExec`
|
|
||||||
dsc.singleRunLock = false
|
|
||||||
|
|
||||||
# End while
|
if worker.ctrl.stopped:
|
||||||
|
break taskExecLoop
|
||||||
|
|
||||||
|
# Enforce minimum time spend on this loop so we never each 100% cpu load
|
||||||
|
# caused by some empty sub-tasks which are out of this scheduler control.
|
||||||
|
let
|
||||||
|
elapsed = Moment.now() - startMoment
|
||||||
|
suspend = if execLoopTimeElapsedMin <= elapsed: execLoopTaskSwitcher
|
||||||
|
else: execLoopTimeElapsedMin - elapsed
|
||||||
|
await sleepAsync suspend
|
||||||
|
# End while
|
||||||
|
|
||||||
# Note that `runStart()` was dispatched in `onPeerConnected()`
|
# Note that `runStart()` was dispatched in `onPeerConnected()`
|
||||||
worker.runStop()
|
worker.runStop()
|
||||||
|
|
Loading…
Reference in New Issue