Full sync peer negotiation control (#1390)
* Additional logging for scheduler * Fix duplicate occurrence of `bestNumber` why: Happened when the `block_queue` module was separated out of the `worker` module. Somehow testing was insufficient or skipped, at all. * Update `runPool()` mixin for scheduler details: Could be simplified * Dynamically adapt pivot header negotiation mode details: After accepting one peer and some timeout, do not search for more peers for start syncing but rather continue in relaxed mode with a single peer.
This commit is contained in:
parent
69204fffe9
commit
d55a72ae49
|
@ -23,33 +23,82 @@ logScope:
|
||||||
type
|
type
|
||||||
FullSyncRef* = RunnerSyncRef[CtxData,BuddyData]
|
FullSyncRef* = RunnerSyncRef[CtxData,BuddyData]
|
||||||
|
|
||||||
|
const
|
||||||
|
extraTraceMessages = false # or true
|
||||||
|
## Enable additional logging noise
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Private logging helpers
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template traceMsg(f, info: static[string]; args: varargs[untyped]) =
|
||||||
|
trace "Snap scheduler " & f & "() " & info, args
|
||||||
|
|
||||||
|
template traceMsgCtx(f, info: static[string]; c: FullCtxRef) =
|
||||||
|
when extraTraceMessages:
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
poolMode {.inject.} = c.poolMode
|
||||||
|
daemon {.inject.} = c.daemon
|
||||||
|
f.traceMsg info, poolMode, daemon
|
||||||
|
|
||||||
|
template traceMsgBuddy(f, info: static[string]; b: FullBuddyRef) =
|
||||||
|
when extraTraceMessages:
|
||||||
|
block:
|
||||||
|
let
|
||||||
|
peer {.inject.} = b.peer
|
||||||
|
runState {.inject.} = b.ctrl.state
|
||||||
|
multiOk {.inject.} = b.ctrl.multiOk
|
||||||
|
poolMode {.inject.} = b.ctx.poolMode
|
||||||
|
daemon {.inject.} = b.ctx.daemon
|
||||||
|
f.traceMsg info, peer, runState, multiOk, poolMode, daemon
|
||||||
|
|
||||||
|
|
||||||
|
template tracerFrameCtx(f: static[string]; c: FullCtxRef; code: untyped) =
|
||||||
|
f.traceMsgCtx "begin", c
|
||||||
|
code
|
||||||
|
f.traceMsgCtx "end", c
|
||||||
|
|
||||||
|
template tracerFrameBuddy(f: static[string]; b: FullBuddyRef; code: untyped) =
|
||||||
|
f.traceMsgBuddy "begin", b
|
||||||
|
code
|
||||||
|
f.traceMsgBuddy "end", b
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Virtual methods/interface, `mixin` functions
|
# Virtual methods/interface, `mixin` functions
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
proc runSetup(ctx: FullCtxRef; ticker: bool): bool =
|
proc runSetup(ctx: FullCtxRef; ticker: bool): bool =
|
||||||
worker.setup(ctx,ticker)
|
tracerFrameCtx("runSetup", ctx):
|
||||||
|
result = worker.setup(ctx,ticker)
|
||||||
|
|
||||||
proc runRelease(ctx: FullCtxRef) =
|
proc runRelease(ctx: FullCtxRef) =
|
||||||
worker.release(ctx)
|
tracerFrameCtx("runRelease", ctx):
|
||||||
|
worker.release(ctx)
|
||||||
|
|
||||||
proc runDaemon(ctx: FullCtxRef) {.async.} =
|
proc runDaemon(ctx: FullCtxRef) {.async.} =
|
||||||
discard
|
tracerFrameCtx("runDaemon", ctx):
|
||||||
|
await worker.runDaemon(ctx)
|
||||||
|
|
||||||
proc runStart(buddy: FullBuddyRef): bool =
|
proc runStart(buddy: FullBuddyRef): bool =
|
||||||
worker.start(buddy)
|
tracerFrameBuddy("runStart", buddy):
|
||||||
|
result = worker.start(buddy)
|
||||||
|
|
||||||
proc runStop(buddy: FullBuddyRef) =
|
proc runStop(buddy: FullBuddyRef) =
|
||||||
worker.stop(buddy)
|
tracerFrameBuddy("runStop", buddy):
|
||||||
|
worker.stop(buddy)
|
||||||
|
|
||||||
proc runPool(buddy: FullBuddyRef; last: bool): bool =
|
proc runPool(buddy: FullBuddyRef; last: bool): bool =
|
||||||
worker.runPool(buddy, last)
|
tracerFrameBuddy("runPool", buddy):
|
||||||
|
result = worker.runPool(buddy, last)
|
||||||
|
|
||||||
proc runSingle(buddy: FullBuddyRef) {.async.} =
|
proc runSingle(buddy: FullBuddyRef) {.async.} =
|
||||||
await worker.runSingle(buddy)
|
tracerFrameBuddy("runSingle", buddy):
|
||||||
|
await worker.runSingle(buddy)
|
||||||
|
|
||||||
proc runMulti(buddy: FullBuddyRef) {.async.} =
|
proc runMulti(buddy: FullBuddyRef) {.async.} =
|
||||||
await worker.runMulti(buddy)
|
tracerFrameBuddy("runMulti", buddy):
|
||||||
|
await worker.runMulti(buddy)
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Public functions
|
# Public functions
|
||||||
|
|
|
@ -20,29 +20,48 @@ import
|
||||||
{.push raises:[Defect].}
|
{.push raises:[Defect].}
|
||||||
|
|
||||||
logScope:
|
logScope:
|
||||||
topics = "full-sync"
|
topics = "full-buddy"
|
||||||
|
|
||||||
type
|
type
|
||||||
|
PivotState = enum
|
||||||
|
PivotStateInitial, ## Initial state
|
||||||
|
FirstPivotSeen, ## Starting, first pivot seen
|
||||||
|
FirstPivotAccepted, ## Accepted, waiting for second
|
||||||
|
FirstPivotUseRegardless ## Force pivot if available
|
||||||
|
PivotRunMode ## SNAFU after some magic
|
||||||
|
|
||||||
BuddyData* = object
|
BuddyData* = object
|
||||||
## Local descriptor data extension
|
## Local descriptor data extension
|
||||||
pivot: BestPivotWorkerRef ## Local pivot worker descriptor
|
pivot: BestPivotWorkerRef ## Local pivot worker descriptor
|
||||||
bQueue: BlockQueueWorkerRef ## Block queue worker
|
bQueue: BlockQueueWorkerRef ## Block queue worker
|
||||||
bestNumber: Option[BlockNumber] ## Largest block number reported
|
|
||||||
|
|
||||||
CtxData* = object
|
CtxData* = object
|
||||||
## Globally shared data extension
|
## Globally shared data extension
|
||||||
rng*: ref HmacDrbgContext ## Random generator, pre-initialised
|
rng*: ref HmacDrbgContext ## Random generator, pre-initialised
|
||||||
pivot: BestPivotCtxRef ## Global pivot descriptor
|
pivot: BestPivotCtxRef ## Global pivot descriptor
|
||||||
|
pivotState: PivotState ## For initial pivot control
|
||||||
|
pivotStamp: Moment ## `PivotState` driven timing control
|
||||||
bCtx: BlockQueueCtxRef ## Global block queue descriptor
|
bCtx: BlockQueueCtxRef ## Global block queue descriptor
|
||||||
ticker: TickerRef ## Logger ticker
|
ticker: TickerRef ## Logger ticker
|
||||||
|
|
||||||
FullBuddyRef* = ##\
|
FullBuddyRef* = BuddyRef[CtxData,BuddyData]
|
||||||
## Extended worker peer descriptor
|
## Extended worker peer descriptor
|
||||||
BuddyRef[CtxData,BuddyData]
|
|
||||||
|
|
||||||
FullCtxRef* = ##\
|
FullCtxRef* = CtxRef[CtxData]
|
||||||
## Extended global descriptor
|
## Extended global descriptor
|
||||||
CtxRef[CtxData]
|
|
||||||
|
const
|
||||||
|
extraTraceMessages = false # or true
|
||||||
|
## Enabled additional logging noise
|
||||||
|
|
||||||
|
FirstPivotSeenTimeout = 3.minutes
|
||||||
|
## Turn on relaxed pivot negotiation after some waiting time when there
|
||||||
|
## was a `peer` seen but was rejected. This covers a rare event. Typically
|
||||||
|
## useless peers do not appear ready for negotiation.
|
||||||
|
|
||||||
|
FirstPivotAcceptedTimeout = 50.seconds
|
||||||
|
## Turn on relaxed pivot negotiation after some waiting time when there
|
||||||
|
## was a `peer` accepted but no second one yet.
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Private helpers
|
# Private helpers
|
||||||
|
@ -210,6 +229,59 @@ proc stop*(buddy: FullBuddyRef) =
|
||||||
# Public functions
|
# Public functions
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
proc runDaemon*(ctx: FullCtxRef) {.async.} =
|
||||||
|
## Global background job that will be re-started as long as the variable
|
||||||
|
## `ctx.daemon` is set `true`. If that job was stopped due to re-setting
|
||||||
|
## `ctx.daemon` to `false`, it will be restarted next after it was reset
|
||||||
|
## as `true` not before there is some activity on the `runPool()`,
|
||||||
|
## `runSingle()`, or `runMulti()` functions.
|
||||||
|
##
|
||||||
|
case ctx.data.pivotState:
|
||||||
|
of FirstPivotSeen:
|
||||||
|
let elapsed = Moment.now() - ctx.data.pivotStamp
|
||||||
|
if FirstPivotSeenTimeout < elapsed:
|
||||||
|
# Switch to single peer pivot negotiation
|
||||||
|
ctx.data.pivot.pivotRelaxedMode(enable = true)
|
||||||
|
|
||||||
|
# Currently no need for other monitor tasks
|
||||||
|
ctx.daemon = false
|
||||||
|
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "First seen pivot timeout", elapsed,
|
||||||
|
pivotState=ctx.data.pivotState
|
||||||
|
return
|
||||||
|
# Otherwise delay for some time
|
||||||
|
|
||||||
|
of FirstPivotAccepted:
|
||||||
|
let elapsed = Moment.now() - ctx.data.pivotStamp
|
||||||
|
if FirstPivotAcceptedTimeout < elapsed:
|
||||||
|
# Switch to single peer pivot negotiation
|
||||||
|
ctx.data.pivot.pivotRelaxedMode(enable = true)
|
||||||
|
|
||||||
|
# Use currents pivot next time `runSingle()` is visited. This bent is
|
||||||
|
# necessary as there must be a peer initialising and syncing blocks. But
|
||||||
|
# this daemon has no peer assigned.
|
||||||
|
ctx.data.pivotState = FirstPivotUseRegardless
|
||||||
|
|
||||||
|
# Currently no need for other monitor tasks
|
||||||
|
ctx.daemon = false
|
||||||
|
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "First accepted pivot timeout", elapsed,
|
||||||
|
pivotState=ctx.data.pivotState
|
||||||
|
return
|
||||||
|
# Otherwise delay for some time
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Currently no need for other monitior tasks
|
||||||
|
ctx.daemon = false
|
||||||
|
return
|
||||||
|
|
||||||
|
# Without waiting, this function repeats every 50ms (as set with the constant
|
||||||
|
# `sync_sched.execLoopTimeElapsedMin`.) Larger waiting time cleans up logging.
|
||||||
|
await sleepAsync 300.milliseconds
|
||||||
|
|
||||||
|
|
||||||
proc runSingle*(buddy: FullBuddyRef) {.async.} =
|
proc runSingle*(buddy: FullBuddyRef) {.async.} =
|
||||||
## This peer worker is invoked if the peer-local flag `buddy.ctrl.multiOk`
|
## This peer worker is invoked if the peer-local flag `buddy.ctrl.multiOk`
|
||||||
## is set `false` which is the default mode. This flag is updated by the
|
## is set `false` which is the default mode. This flag is updated by the
|
||||||
|
@ -226,35 +298,99 @@ proc runSingle*(buddy: FullBuddyRef) {.async.} =
|
||||||
ctx = buddy.ctx
|
ctx = buddy.ctx
|
||||||
peer = buddy.peer
|
peer = buddy.peer
|
||||||
bq = buddy.data.bQueue
|
bq = buddy.data.bQueue
|
||||||
|
pv = buddy.data.pivot
|
||||||
|
|
||||||
if bq.blockQueueBacktrackOk:
|
when extraTraceMessages:
|
||||||
let rc = await bq.blockQueueBacktrackWorker()
|
trace "Single mode begin", peer, pivotState=ctx.data.pivotState
|
||||||
if rc.isOk:
|
|
||||||
|
|
||||||
# Update persistent database (may reset `multiOk`)
|
case ctx.data.pivotState:
|
||||||
buddy.ctrl.multiOk = true
|
of PivotStateInitial:
|
||||||
while buddy.processStaged() and not buddy.ctrl.stopped:
|
# Set initial state on first encounter
|
||||||
# Allow thread switch as `persistBlocks()` might be slow
|
ctx.data.pivotState = FirstPivotSeen
|
||||||
await sleepAsync(10.milliseconds)
|
ctx.data.pivotStamp = Moment.now()
|
||||||
return
|
ctx.daemon = true # Start monitor
|
||||||
|
|
||||||
buddy.ctrl.zombie = true
|
of FirstPivotSeen, FirstPivotAccepted:
|
||||||
|
discard
|
||||||
|
|
||||||
# Initialise/re-initialise this worker
|
of FirstPivotUseRegardless:
|
||||||
elif await buddy.data.pivot.pivotNegotiate(buddy.data.bestNumber):
|
# Magic case when we accept anything under the sun
|
||||||
|
let rc = pv.pivotHeader(relaxedMode=true)
|
||||||
|
if rc.isOK:
|
||||||
|
# Update/activate `bestNumber` from the pivot header
|
||||||
|
bq.bestNumber = some(rc.value.blockNumber)
|
||||||
|
ctx.data.pivotState = PivotRunMode
|
||||||
|
buddy.ctrl.multiOk = true
|
||||||
|
trace "Single pivot accepted", peer, pivot=('#' & $bq.bestNumber.get)
|
||||||
|
return # stop logging, otherwise unconditional return for this case
|
||||||
|
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "Single mode stopped", peer, pivotState=ctx.data.pivotState
|
||||||
|
return # unconditional return for this case
|
||||||
|
|
||||||
|
of PivotRunMode:
|
||||||
|
# Sync backtrack runs in single mode
|
||||||
|
if bq.blockQueueBacktrackOk:
|
||||||
|
let rc = await bq.blockQueueBacktrackWorker()
|
||||||
|
if rc.isOk:
|
||||||
|
# Update persistent database (may reset `multiOk`)
|
||||||
|
buddy.ctrl.multiOk = true
|
||||||
|
while buddy.processStaged() and not buddy.ctrl.stopped:
|
||||||
|
# Allow thread switch as `persistBlocks()` might be slow
|
||||||
|
await sleepAsync(10.milliseconds)
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "Single backtrack mode done", peer
|
||||||
|
return
|
||||||
|
|
||||||
|
buddy.ctrl.zombie = true
|
||||||
|
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "Single backtrack mode stopped", peer
|
||||||
|
return
|
||||||
|
# End case()
|
||||||
|
|
||||||
|
# Negotiate in order to derive the pivot header from this `peer`. This code
|
||||||
|
# location here is reached when there was no compelling reason for the
|
||||||
|
# `case()` handler to process and `return`.
|
||||||
|
if await pv.pivotNegotiate(buddy.data.bQueue.bestNumber):
|
||||||
|
# Update/activate `bestNumber` from the pivot header
|
||||||
|
bq.bestNumber = some(pv.pivotHeader.value.blockNumber)
|
||||||
|
ctx.data.pivotState = PivotRunMode
|
||||||
buddy.ctrl.multiOk = true
|
buddy.ctrl.multiOk = true
|
||||||
# Update/activate `bestNumber` for local use
|
trace "Pivot accepted", peer, pivot=('#' & $bq.bestNumber.get)
|
||||||
buddy.data.bestNumber =
|
return
|
||||||
some(buddy.data.pivot.pivotHeader.value.blockNumber)
|
|
||||||
|
|
||||||
elif not buddy.ctrl.stopped:
|
if buddy.ctrl.stopped:
|
||||||
await sleepAsync(2.seconds)
|
when extraTraceMessages:
|
||||||
|
trace "Single mode stopped", peer, pivotState=ctx.data.pivotState
|
||||||
|
return # done with this buddy
|
||||||
|
|
||||||
|
var napping = 2.seconds
|
||||||
|
case ctx.data.pivotState:
|
||||||
|
of FirstPivotSeen:
|
||||||
|
# Possible state transition
|
||||||
|
if pv.pivotHeader(relaxedMode=true).isOk:
|
||||||
|
ctx.data.pivotState = FirstPivotAccepted
|
||||||
|
ctx.data.pivotStamp = Moment.now()
|
||||||
|
napping = 300.milliseconds
|
||||||
|
of FirstPivotAccepted:
|
||||||
|
napping = 300.milliseconds
|
||||||
|
else:
|
||||||
|
discard
|
||||||
|
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "Single mode end", peer, pivotState=ctx.data.pivotState, napping
|
||||||
|
|
||||||
|
# Without waiting, this function repeats every 50ms (as set with the constant
|
||||||
|
# `sync_sched.execLoopTimeElapsedMin`.)
|
||||||
|
await sleepAsync napping
|
||||||
|
|
||||||
|
|
||||||
proc runPool*(buddy: FullBuddyRef; last: bool) =
|
proc runPool*(buddy: FullBuddyRef; last: bool): bool =
|
||||||
## Ocne started, the function `runPool()` is called for all worker peers in
|
## Once started, the function `runPool()` is called for all worker peers in
|
||||||
## a row (as the body of an iteration.) There will be no other worker peer
|
## sequence as the body of an iteration as long as the function returns
|
||||||
## functions activated simultaneously.
|
## `false`. There will be no other worker peer functions activated
|
||||||
|
## simultaneously.
|
||||||
##
|
##
|
||||||
## This procedure is started if the global flag `buddy.ctx.poolMode` is set
|
## This procedure is started if the global flag `buddy.ctx.poolMode` is set
|
||||||
## `true` (default is `false`.) It is the responsibility of the `runPool()`
|
## `true` (default is `false`.) It is the responsibility of the `runPool()`
|
||||||
|
@ -265,13 +401,12 @@ proc runPool*(buddy: FullBuddyRef; last: bool) =
|
||||||
##
|
##
|
||||||
## Note that this function does not run in `async` mode.
|
## Note that this function does not run in `async` mode.
|
||||||
##
|
##
|
||||||
let
|
# Mind the gap, fill in if necessary (function is peer independent)
|
||||||
ctx = buddy.ctx
|
buddy.data.bQueue.blockQueueGrout()
|
||||||
bq = buddy.data.bQueue
|
|
||||||
if ctx.poolMode:
|
# Stop after running once regardless of peer
|
||||||
# Mind the gap, fill in if necessary
|
buddy.ctx.poolMode = false
|
||||||
bq.blockQueueGrout()
|
true
|
||||||
ctx.poolMode = false
|
|
||||||
|
|
||||||
|
|
||||||
proc runMulti*(buddy: FullBuddyRef) {.async.} =
|
proc runMulti*(buddy: FullBuddyRef) {.async.} =
|
||||||
|
|
|
@ -279,6 +279,26 @@ proc pivotHeader*(bp: BestPivotWorkerRef): Result[BlockHeader,void] =
|
||||||
|
|
||||||
err()
|
err()
|
||||||
|
|
||||||
|
proc pivotHeader*(
|
||||||
|
bp: BestPivotWorkerRef; ## Worker peer
|
||||||
|
relaxedMode: bool; ## One time relaxed mode flag
|
||||||
|
): Result[BlockHeader,void] =
|
||||||
|
## Variant of `pivotHeader()` with `relaxedMode` flag as function argument.
|
||||||
|
if bp.header.isSome and
|
||||||
|
bp.peer notin bp.global.untrusted:
|
||||||
|
|
||||||
|
if pivotMinPeersToStartSync <= bp.global.trusted.len and
|
||||||
|
bp.peer in bp.global.trusted:
|
||||||
|
return ok(bp.header.unsafeGet)
|
||||||
|
|
||||||
|
if relaxedMode:
|
||||||
|
when extraTraceMessages:
|
||||||
|
trace "Returning not fully trusted pivot", peer=bp.peer,
|
||||||
|
trusted=bp.global.trusted.len, untrusted=bp.global.untrusted.len
|
||||||
|
return ok(bp.header.unsafeGet)
|
||||||
|
|
||||||
|
err()
|
||||||
|
|
||||||
proc pivotNegotiate*(
|
proc pivotNegotiate*(
|
||||||
bp: BestPivotWorkerRef; ## Worker peer
|
bp: BestPivotWorkerRef; ## Worker peer
|
||||||
minBlockNumber: Option[BlockNumber]; ## Minimum block number to expect
|
minBlockNumber: Option[BlockNumber]; ## Minimum block number to expect
|
||||||
|
|
|
@ -471,6 +471,19 @@ proc init*(
|
||||||
peer: peer,
|
peer: peer,
|
||||||
ctrl: ctrl)
|
ctrl: ctrl)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Public functions -- getter/setter
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
proc bestNumber*(qd: BlockQueueWorkerRef): Option[BlockNumber] =
|
||||||
|
## Getter
|
||||||
|
qd.bestNumber
|
||||||
|
|
||||||
|
proc `bestNumber=`*(qd: BlockQueueWorkerRef; val: Option[BlockNumber]) =
|
||||||
|
## Setter, needs to be set to something valid so that `blockQueueWorker()`
|
||||||
|
## does something useful.
|
||||||
|
qd.bestNumber = val
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Public functions -- synchronous
|
# Public functions -- synchronous
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue