remove macOS/aarch64 workaround from proposeBlockAux (#6138)
This commit is contained in:
parent
54d56995a3
commit
605bf99344
|
@ -1147,137 +1147,6 @@ proc proposeBlockAux(
|
||||||
blobsBundle.proofs, blobsBundle.blobs))
|
blobsBundle.proofs, blobsBundle.blobs))
|
||||||
else:
|
else:
|
||||||
Opt.none(seq[BlobSidecar])
|
Opt.none(seq[BlobSidecar])
|
||||||
|
|
||||||
# BIG BUG SOURCE: The `let` below cannot be combined with the others above!
|
|
||||||
# If combined, there are sometimes `SIGSEGV` during `test_keymanager_api`.
|
|
||||||
# This has only been observed on macOS (aarch64) in Jenkins, not on GitHub.
|
|
||||||
#
|
|
||||||
# - macOS 14.2.1 (23C71)
|
|
||||||
# - Xcode 15.1 (15C65)
|
|
||||||
# - Nim v1.6.18 (a749a8b742bd0a4272c26a65517275db4720e58a)
|
|
||||||
#
|
|
||||||
# Issue has started occuring around 12 Jan 2024, in a CI run for PR #5731.
|
|
||||||
# The PR did not change anything related to this, suggesting an environment
|
|
||||||
# or hardware change. The issue is flaky; could have been introduced earlier
|
|
||||||
# before surfacing in the aforementioned PR. About 30% to hit bug.
|
|
||||||
#
|
|
||||||
# [2024-01-12T11:54:21.011Z] Wrote test_keymanager_api/bootstrap_node.enr
|
|
||||||
# [2024-01-12T11:54:29.294Z] Serialization/deserialization [Beacon Node] [Preset: mainnet] . (0.00s)
|
|
||||||
# [2024-01-12T11:54:29.294Z] ListKeys requests [Beacon Node] [Preset: mainnet] .... (0.01s)
|
|
||||||
# [2024-01-12T11:54:34.870Z] ImportKeystores requests [Beacon Node] [Preset: mainnet] Traceback (most recent call last, using override)
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-libp2p/libp2p/protocols/rendezvous.nim(1016) main
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-libp2p/libp2p/protocols/rendezvous.nim(1006) NimMain
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-libp2p/libp2p/protocols/rendezvous.nim(997) PreMain
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(1502) atmtest_keymanager_apidotnim_Init000
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(1475) main
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(378) futureContinue
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(1481) main
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(307) startBeaconNode
|
|
||||||
# [2024-01-12T11:54:34.870Z] beacon_chain/nimbus_beacon_node.nim(1900) start
|
|
||||||
# [2024-01-12T11:54:34.870Z] beacon_chain/nimbus_beacon_node.nim(1847) run
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(378) futureContinue
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(1465) delayedTests
|
|
||||||
# [2024-01-12T11:54:34.870Z] tests/test_keymanager_api.nim(392) runTests
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(378) futureContinue
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-unittest2/unittest2.nim(1147) runTests
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-unittest2/unittest2.nim(1086) runDirect
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-testutils/testutils/unittests.nim(16) runTestX60gensym2933
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(656) waitFor
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(631) pollFor
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(378) futureContinue
|
|
||||||
# [2024-01-12T11:54:34.870Z] beacon_chain/validators/beacon_validators.nim(82) proposeBlockAux
|
|
||||||
# [2024-01-12T11:54:34.870Z] vendor/nimbus-build-system/vendor/Nim/lib/system/excpt.nim(631) signalHandler
|
|
||||||
# [2024-01-12T11:54:34.870Z] SIGSEGV: Illegal storage access. (Attempt to read from nil?)
|
|
||||||
#
|
|
||||||
# This appeared again around 25 Feb 2024, in a CI run for PR #5959,
|
|
||||||
# despite the extra `let` having been applied -- once more observed on
|
|
||||||
# macOS (aarch64) in Jenkins, and much rarer than before.
|
|
||||||
#
|
|
||||||
# [2024-02-25T23:21:24.533Z] Wrote test_keymanager_api/bootstrap_node.enr
|
|
||||||
# [2024-02-25T23:21:32.756Z] Serialization/deserialization [Beacon Node] [Preset: mainnet] . (0.00s)
|
|
||||||
# [2024-02-25T23:21:32.756Z] ListKeys requests [Beacon Node] [Preset: mainnet] .... (0.01s)
|
|
||||||
# [2024-02-25T23:21:37.219Z] ImportKeystores requests [Beacon Node] [Preset: mainnet] Traceback (most recent call last, using override)
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-libp2p/libp2p/protocols/pubsub/pubsub.nim(1068) main
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-libp2p/libp2p/protocols/pubsub/pubsub.nim(1058) NimMain
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-libp2p/libp2p/protocols/pubsub/pubsub.nim(1049) PreMain
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(1501) atmtest_keymanager_apidotnim_Init000
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(1474) main
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(382) futureContinue
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(1480) main
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(307) startBeaconNode
|
|
||||||
# [2024-02-25T23:21:37.219Z] beacon_chain/nimbus_beacon_node.nim(1916) start
|
|
||||||
# [2024-02-25T23:21:37.219Z] beacon_chain/nimbus_beacon_node.nim(1863) run
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(382) futureContinue
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(1464) delayedTests
|
|
||||||
# [2024-02-25T23:21:37.219Z] tests/test_keymanager_api.nim(391) runTests
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(382) futureContinue
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-unittest2/unittest2.nim(1151) runTests
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-unittest2/unittest2.nim(1086) runDirect
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-testutils/testutils/unittests.nim(16) runTestX60gensym3188
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(660) waitFor
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(635) pollFor
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronos/chronos/internal/asyncfutures.nim(382) futureContinue
|
|
||||||
# [2024-02-25T23:21:37.219Z] vendor/nim-chronicles/chronicles.nim(251) proposeBlockAux
|
|
||||||
# [2024-02-25T23:21:37.219Z] SIGSEGV: Illegal storage access. (Attempt to read from nil?)
|
|
||||||
#
|
|
||||||
# One theory is that PR #5946 may increase the frequency, as there were
|
|
||||||
# times where the Jenkins CI failed almost every time using a shorter trace.
|
|
||||||
# However, the problem was once more flaky, with some passes in-between.
|
|
||||||
# For now, PR #5946 was reverted (low priority), and the problem is gone,
|
|
||||||
# whether related or not.
|
|
||||||
#
|
|
||||||
# [2024-02-23T23:11:47.700Z] Wrote test_keymanager_api/bootstrap_node.enr
|
|
||||||
# [2024-02-23T23:11:54.728Z] Serialization/deserialization [Beacon Node] [Preset: mainnet] . (0.00s)
|
|
||||||
# [2024-02-23T23:11:54.728Z] ListKeys requests [Beacon Node] [Preset: mainnet] .... (0.01s)
|
|
||||||
# [2024-02-23T23:11:59.523Z] ImportKeystores requests [Beacon Node] [Preset: mainnet] Traceback (most recent call last, using override)
|
|
||||||
# [2024-02-23T23:11:59.523Z] vendor/nim-libp2p/libp2p/protocols/pubsub/pubsub.nim(1067) main
|
|
||||||
# [2024-02-23T23:11:59.523Z] vendor/nim-libp2p/libp2p/protocols/pubsub/pubsub.nim(1057) NimMain
|
|
||||||
# [2024-02-23T23:11:59.523Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-02-23T23:11:59.523Z] vendor/nim-chronos/chronos/internal/asyncengine.nim(150) poll
|
|
||||||
# [2024-02-23T23:11:59.523Z] SIGSEGV: Illegal storage access. (Attempt to read from nil?)
|
|
||||||
#
|
|
||||||
# The generated `nimcache` differs slightly if the `let` are separated from
|
|
||||||
# a single block; separation introduces an additional state in closure iter.
|
|
||||||
# This change, maybe combined with some macOS specific compiler specifics,
|
|
||||||
# could this trigger the `SIGSEGV`? Maybe the extra state adds just enough
|
|
||||||
# complexity to the function to disable certain problematic optimizations?
|
|
||||||
# The change in size of the environment changes a number of things such as
|
|
||||||
# alignment and which parts of an environment contain pointers and so on,
|
|
||||||
# which in turn may have surprising behavioural effects, ie most likely this
|
|
||||||
# extra state masks some underlying issue. Furthermore, the combination of
|
|
||||||
# `(await xyz).valueOr: return` is not very commonly used with other `await`
|
|
||||||
# in the same `let` block, which could explain this not being more common.
|
|
||||||
#
|
|
||||||
# Note that when compiling for Wasm, there are similar bugs with `results`
|
|
||||||
# when inlining unwraps, e.g., in `eth2_rest_serialization.nim`.
|
|
||||||
# These have not been investigated thoroughly so far as that project uses
|
|
||||||
# Nim 2.0 with --mm:orc and is just a prototype for Wasm, no production use.
|
|
||||||
# But maybe there is something weird going on with `results` related to the
|
|
||||||
# random `SIGSEGV` that we are now observing here, related to doing too much
|
|
||||||
# inline logic without defining intermediate isolated `let` statements.
|
|
||||||
#
|
|
||||||
# if mediaType == ApplicationJsonMediaType:
|
|
||||||
# try:
|
|
||||||
# - ok RestJson.decode(value, T,
|
|
||||||
# - requireAllFields = true,
|
|
||||||
# - allowUnknownFields = true)
|
|
||||||
# + let r = RestJson.decode(value, T,
|
|
||||||
# + requireAllFields = true,
|
|
||||||
# + allowUnknownFields = true)
|
|
||||||
# + ok r
|
|
||||||
# except SerializationError as exc:
|
|
||||||
# debug "Failed to deserialize REST JSON data",
|
|
||||||
# err = exc.formatMsg("<data>"),
|
|
||||||
#
|
|
||||||
# At this time we can only speculate about the trigger of these issues.
|
|
||||||
# Until a shared pattern can be identified, it is better to apply
|
|
||||||
# workarounds that at least avoid the known to be reachable triggers.
|
|
||||||
# The solution is hacky and far from desirable; it is what it is.
|
|
||||||
let
|
|
||||||
newBlockRef = (
|
newBlockRef = (
|
||||||
await node.router.routeSignedBeaconBlock(signedBlock, blobsOpt)
|
await node.router.routeSignedBeaconBlock(signedBlock, blobsOpt)
|
||||||
).valueOr:
|
).valueOr:
|
||||||
|
|
Loading…
Reference in New Issue