Hotfix: Fix a race condition leading to a busy loop preventing progress in Eth1 syncing

This commit is contained in:
Zahary Karadjov 2022-02-15 21:10:04 +02:00 committed by zah
parent 496d0266ec
commit c672628be8
1 changed files with 11 additions and 11 deletions

View File

@ -947,19 +947,17 @@ proc detectPrimaryProviderComingOnline(m: Eth1Monitor) {.async.} =
continue continue
var tempProvider = tempProviderRes.get var tempProvider = tempProviderRes.get
var testRequest = tempProvider.web3.provider.net_version() let testRequest = tempProvider.web3.provider.net_version()
yield testRequest yield testRequest or sleepAsync(web3Timeouts)
try: await tempProvider.close() traceAsyncErrors tempProvider.close()
except CatchableError as err:
debug "Failed to close temp web3 provider", err = err.msg
if testRequest.failed: if testRequest.completed and m.state == Started:
await sleepAsync(checkInterval)
elif m.state == Started:
m.state = ReadyToRestartToPrimary m.state = ReadyToRestartToPrimary
return return
else:
await sleepAsync(checkInterval)
proc doStop(m: Eth1Monitor) {.async.} = proc doStop(m: Eth1Monitor) {.async.} =
safeCancel m.runFut safeCancel m.runFut
@ -1173,10 +1171,13 @@ func init(T: type FullBlockId, blk: Eth1BlockHeader|BlockObject): T =
FullBlockId(number: Eth1BlockNumber blk.number, hash: blk.hash) FullBlockId(number: Eth1BlockNumber blk.number, hash: blk.hash)
proc startEth1Syncing(m: Eth1Monitor, delayBeforeStart: Duration) {.async.} = proc startEth1Syncing(m: Eth1Monitor, delayBeforeStart: Duration) {.async.} =
if m.state in {Started, ReadyToRestartToPrimary}: if m.state == Started:
return return
let isFirstRun = m.state == Initialized let isFirstRun = m.state == Initialized
let needsReset = m.state in {Failed, ReadyToRestartToPrimary}
m.state = Started
if delayBeforeStart != ZeroDuration: if delayBeforeStart != ZeroDuration:
await sleepAsync(delayBeforeStart) await sleepAsync(delayBeforeStart)
@ -1184,14 +1185,13 @@ proc startEth1Syncing(m: Eth1Monitor, delayBeforeStart: Duration) {.async.} =
# If the monitor died with an exception, the web3 provider may be in # If the monitor died with an exception, the web3 provider may be in
# an arbitary state, so we better reset it (not doing this has resulted # an arbitary state, so we better reset it (not doing this has resulted
# in resource leaks historically). # in resource leaks historically).
if not m.dataProvider.isNil and m.state == Failed: if not m.dataProvider.isNil and needsReset:
# We introduce a local var to eliminate the risk of scheduling two # We introduce a local var to eliminate the risk of scheduling two
# competing calls to `close` below. # competing calls to `close` below.
let provider = m.dataProvider let provider = m.dataProvider
m.dataProvider = nil m.dataProvider = nil
await provider.close() await provider.close()
m.state = Started
await m.ensureDataProvider() await m.ensureDataProvider()
# We might need to reset the chain if the new provider disagrees # We might need to reset the chain if the new provider disagrees