Fix a potential segfault and various potential stalls (#4003)

* Fixes a segfault during block production when the Keymanager API
  is disabled. The Keymanager is now disabled on half of the local
  testnet nodes to catch such problems in the future.

* Fixes multiple potential stalls from REST requests being done
  without a timeout. From practice, we know that such requests
  can hang forever if not cancelled with a timeout. At best,
  this would be a resource leak, at worst, it may lead to a
  full stall of the client and missed validator duties.

* Changes some Options usages to Opt (for easier use of valueOr)
This commit is contained in:
zah 2022-08-20 00:51:30 +03:00 committed by GitHub
parent f537f263df
commit b1ac9c9fe4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 118 additions and 65 deletions

View File

@ -610,7 +610,7 @@ proc init*(T: type BeaconNode,
config.validatorMonitorAuto, config.validatorMonitorTotals))
for key in config.validatorMonitorPubkeys:
validatorMonitor[].addMonitor(key, none(ValidatorIndex))
validatorMonitor[].addMonitor(key, Opt.none(ValidatorIndex))
let
networkGenesisValidatorsRoot: Option[Eth2Digest] =
@ -672,7 +672,7 @@ proc init*(T: type BeaconNode,
info "Loading slashing protection database (v2)",
path = config.validatorsDir()
proc getValidatorIdx(pubkey: ValidatorPubKey): Option[ValidatorIndex] =
proc getValidatorIdx(pubkey: ValidatorPubKey): Opt[ValidatorIndex] =
withState(dag.headState):
findValidator(state().data.validators.asSeq(), pubkey)

View File

@ -266,8 +266,8 @@ proc asyncInit(vc: ValidatorClientRef): Future[ValidatorClientRef] {.async.} =
vc.syncCommitteeService = await SyncCommitteeServiceRef.init(vc)
vc.keymanagerServer = keymanagerInitResult.server
if vc.keymanagerServer != nil:
func getValidatorIdx(pubkey: ValidatorPubKey): Option[ValidatorIndex] =
none ValidatorIndex
func getValidatorIdx(pubkey: ValidatorPubKey): Opt[ValidatorIndex] =
Opt.none ValidatorIndex
vc.keymanagerHost = newClone KeymanagerHost.init(
validatorPool,

View File

@ -69,3 +69,17 @@ const
# https://github.com/ethereum/builder-specs/blob/v0.2.0/specs/validator.md#constants
EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION* = 1
BUILDER_PROPOSAL_DELAY_TOLERANCE* = 1.seconds
func shortLog*(v: BlindedBeaconBlock): auto =
(
slot: shortLog(v.slot),
proposer_index: v.proposer_index,
parent_root: shortLog(v.parent_root),
state_root: shortLog(v.state_root),
)
func shortLog*(v: SignedBlindedBeaconBlock): auto =
(
blck: shortLog(v.message),
signature: shortLog(v.signature)
)

View File

@ -416,7 +416,7 @@ proc addValidator*(vc: ValidatorClientRef, keystore: KeystoreData) =
let slot = vc.currentSlot()
case keystore.kind
of KeystoreKind.Local:
vc.attachedValidators[].addLocalValidator(keystore, none[ValidatorIndex](),
vc.attachedValidators[].addLocalValidator(keystore, Opt.none ValidatorIndex,
slot)
of KeystoreKind.Remote:
let
@ -442,7 +442,7 @@ proc addValidator*(vc: ValidatorClientRef, keystore: KeystoreData) =
res
if len(clients) > 0:
vc.attachedValidators[].addRemoteValidator(keystore, clients,
none[ValidatorIndex](), slot)
Opt.none ValidatorIndex, slot)
else:
warn "Unable to initialize remote validator",
validator = $keystore.pubkey

View File

@ -85,7 +85,7 @@ proc pollForValidatorIndices*(vc: ValidatorClientRef) {.async.} =
debug "Local validator updated with index",
pubkey = item.validator.pubkey, index = item.index
vc.attachedValidators[].updateValidator(item.validator.pubkey,
item.index)
item.index)
# Adding validator for doppelganger detection.
vc.addDoppelganger(
vc.attachedValidators[].getValidator(item.validator.pubkey))

View File

@ -68,7 +68,7 @@ type
ImportResult*[T] = Result[T, AddValidatorFailure]
ValidatorPubKeyToIdxFn* =
proc (pubkey: ValidatorPubKey): Option[ValidatorIndex]
proc (pubkey: ValidatorPubKey): Opt[ValidatorIndex]
{.raises: [Defect], gcsafe.}
KeymanagerHost* = object
@ -109,11 +109,11 @@ func init*(T: type KeymanagerHost,
getBeaconTimeFn: getBeaconTimeFn)
proc getValidatorIdx*(host: KeymanagerHost,
pubkey: ValidatorPubKey): Option[ValidatorIndex] =
pubkey: ValidatorPubKey): Opt[ValidatorIndex] =
if host.getValidatorIdxFn != nil:
host.getValidatorIdxFn(pubkey)
else:
none ValidatorIndex
Opt.none ValidatorIndex
proc addLocalValidator*(host: KeymanagerHost, keystore: KeystoreData) =
let

View File

@ -45,10 +45,15 @@ import
from eth/async_utils import awaitWithTimeout
# Metrics for tracking attestation and beacon block loss
const delayBuckets = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05,
0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf]
const
delayBuckets = [-Inf, -4.0, -2.0, -1.0, -0.5, -0.1, -0.05,
0.05, 0.1, 0.5, 1.0, 2.0, 4.0, 8.0, Inf]
BUILDER_BLOCK_SUBMISSION_DELAY_TOLERANCE = 1.seconds
BUILDER_STATUS_DELAY_TOLERANCE = 3.seconds
BUILDER_VALIDATOR_REGISTRATION_DELAY_TOLERANCE = 3.seconds
# Metrics for tracking attestation and beacon block loss
declareCounter beacon_light_client_finality_updates_sent,
"Number of LC finality updates sent by this peer"
@ -70,16 +75,15 @@ logScope: topics = "beacval"
type
ForkedBlockResult* = Result[ForkedBeaconBlock, string]
proc findValidator*(validators: auto, pubkey: ValidatorPubKey):
Option[ValidatorIndex] =
proc findValidator*(validators: auto, pubkey: ValidatorPubKey): Opt[ValidatorIndex] =
let idx = validators.findIt(it.pubkey == pubkey)
if idx == -1:
# We allow adding a validator even if its key is not in the state registry:
# it might be that the deposit for this validator has not yet been processed
notice "Validator deposit not yet processed, monitoring", pubkey
none(ValidatorIndex)
Opt.none ValidatorIndex
else:
some(idx.ValidatorIndex)
Opt.some idx.ValidatorIndex
proc addLocalValidator(node: BeaconNode, validators: auto,
item: KeystoreData, slot: Slot) =
@ -90,7 +94,7 @@ proc addLocalValidator(node: BeaconNode, validators: auto,
# TODO: This should probably be moved to the validator_pool module
proc addRemoteValidator*(pool: var ValidatorPool,
index: Option[ValidatorIndex],
index: Opt[ValidatorIndex],
item: KeystoreData,
slot: Slot) =
var clients: seq[(RestClientRef, RemoteSignerInfo)]
@ -151,7 +155,7 @@ proc getAttachedValidator(node: BeaconNode,
if validator != nil and validator.index != some(idx):
# Update index, in case the validator was activated!
notice "Validator activated", pubkey = validator.pubkey, index = idx
validator.index = some(idx)
validator.index = Opt.some(idx)
validator
else:
warn "Validator index out of bounds",
@ -163,10 +167,10 @@ proc getAttachedValidator(node: BeaconNode,
let key = node.dag.validatorKey(idx)
if key.isSome():
let validator = node.getAttachedValidator(key.get().toPubKey())
if validator != nil and validator.index != some(idx):
if validator != nil and validator.index != Opt.some(idx):
# Update index, in case the validator was activated!
notice "Validator activated", pubkey = validator.pubkey, index = idx
validator.index = some(idx)
validator.index = Opt.some(idx)
validator
else:
warn "Validator key not found",
@ -341,6 +345,17 @@ proc get_execution_payload(
asConsensusExecutionPayload(
await execution_engine.getPayload(payload_id.get))
proc getFeeRecipient(node: BeaconNode,
pubkey: ValidatorPubKey,
validatorIdx: ValidatorIndex,
epoch: Epoch): Eth1Address =
node.dynamicFeeRecipientsStore.getDynamicFeeRecipient(validatorIdx, epoch).valueOr:
if node.keymanagerHost != nil:
node.keymanagerHost[].getSuggestedFeeRecipient(pubkey).valueOr:
node.config.defaultFeeRecipient
else:
node.config.defaultFeeRecipient
from web3/engine_api_types import PayloadExecutionStatus
proc getExecutionPayload(
@ -381,11 +396,7 @@ proc getExecutionPayload(
terminalBlockHash
latestFinalized =
node.dag.loadExecutionBlockRoot(node.dag.finalizedHead.blck)
dynamicFeeRecipient = node.dynamicFeeRecipientsStore.getDynamicFeeRecipient(
validator_index, epoch)
feeRecipient = dynamicFeeRecipient.valueOr:
node.keymanagerHost[].getSuggestedFeeRecipient(pubkey).valueOr:
node.config.defaultFeeRecipient
feeRecipient = node.getFeeRecipient(pubkey, validator_index, epoch)
payload_id = (await forkchoice_updated(
proposalState.bellatrixData.data, latestHead, latestFinalized,
feeRecipient,
@ -523,8 +534,10 @@ proc getBlindedExecutionPayload(
if node.payloadBuilderRestClient.isNil:
return err "getBlindedBeaconBlock: nil REST client"
let blindedHeader = await node.payloadBuilderRestClient.getHeader(
slot, executionBlockRoot, pubkey)
let blindedHeader = awaitWithTimeout(
node.payloadBuilderRestClient.getHeader(slot, executionBlockRoot, pubkey),
BUILDER_PROPOSAL_DELAY_TOLERANCE):
return err "Timeout when obtaining blinded header from builder"
const httpOk = 200
if blindedHeader.status != httpOk:
@ -659,17 +672,21 @@ proc proposeBlockMEV(
# protection check
let unblindedPayload =
try:
await node.payloadBuilderRestClient.submitBlindedBlock(
blindedBlock.get)
awaitWithTimeout(
node.payloadBuilderRestClient.submitBlindedBlock(blindedBlock.get),
BUILDER_BLOCK_SUBMISSION_DELAY_TOLERANCE):
error "Submitting blinded block timed out",
blk = shortLog(blindedBlock.get)
return Opt.some head
# From here on, including error paths, disallow local EL production by
# returning Opt.some, regardless of whether on head or newBlock.
except RestDecodingError as exc:
info "proposeBlockMEV: REST recoding error",
error "proposeBlockMEV: REST recoding error",
slot, head = shortLog(head), validator_index, blindedBlock,
error = exc.msg
return Opt.some head
except CatchableError as exc:
info "proposeBlockMEV: exception in submitBlindedBlock",
error "proposeBlockMEV: exception in submitBlindedBlock",
slot, head = shortLog(head), validator_index, blindedBlock,
error = exc.msg
return Opt.some head
@ -1177,14 +1194,17 @@ proc updateValidatorMetrics*(node: BeaconNode) =
from std/times import epochTime
proc getValidatorRegistration(
node: BeaconNode, validator: AttachedValidator):
node: BeaconNode, validator: AttachedValidator, epoch: Epoch):
Future[Result[SignedValidatorRegistrationV1, string]] {.async.} =
# Stand-in, reasonable default
const gasLimit = 30000000
let feeRecipient =
node.keymanagerHost[].getSuggestedFeeRecipient(validator.pubkey).valueOr:
node.config.defaultFeeRecipient
let validatorIdx = validator.index.valueOr:
# The validator index will be missing when the validator was not
# activated for duties yet. We can safely skip the registration then.
return
let feeRecipient = node.getFeeRecipient(validator.pubkey, validatorIdx, epoch)
var validatorRegistration = SignedValidatorRegistrationV1(
message: ValidatorRegistrationV1(
fee_recipient: ExecutionAddress(data: distinctBase(feeRecipient)),
@ -1205,7 +1225,7 @@ proc getValidatorRegistration(
return ok validatorRegistration
proc registerValidators(node: BeaconNode) {.async.} =
proc registerValidators(node: BeaconNode, epoch: Epoch) {.async.} =
try:
if (not node.config.payloadBuilderEnable) or
node.currentSlot.epoch < node.dag.cfg.BELLATRIX_FORK_EPOCH:
@ -1217,7 +1237,11 @@ proc registerValidators(node: BeaconNode) {.async.} =
const HttpOk = 200
let restBuilderStatus = await node.payloadBuilderRestClient.checkBuilderStatus
let restBuilderStatus = awaitWithTimeout(node.payloadBuilderRestClient.checkBuilderStatus(),
BUILDER_STATUS_DELAY_TOLERANCE):
debug "Timeout when obtaining builder status"
return
if restBuilderStatus.status != HttpOk:
warn "registerValidators: specified builder or relay not available",
builderUrl = node.config.payloadBuilderUrl,
@ -1243,17 +1267,19 @@ proc registerValidators(node: BeaconNode) {.async.} =
continue
let validatorRegistration =
await node.getValidatorRegistration(validator)
await node.getValidatorRegistration(validator, epoch)
if validatorRegistration.isErr:
debug "registerValidators: validatorRegistration failed",
validatorRegistration
error "registerValidators: validatorRegistration failed",
validatorRegistration
continue
validatorRegistrations.add validatorRegistration.get
let registerValidatorResult =
await node.payloadBuilderRestClient.registerValidator(
validatorRegistrations)
awaitWithTimeout(node.payloadBuilderRestClient.registerValidator(validatorRegistrations),
BUILDER_VALIDATOR_REGISTRATION_DELAY_TOLERANCE):
error "Timeout when registering validator with builder"
return
if HttpOk != registerValidatorResult.status:
warn "registerValidators: Couldn't register validator with MEV builder",
registerValidatorResult
@ -1324,7 +1350,7 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
# `EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION` epochs.
if slot.is_epoch and
slot.epoch mod EPOCHS_PER_VALIDATOR_REGISTRATION_SUBMISSION == 0:
asyncSpawn node.registerValidators()
asyncSpawn node.registerValidators(slot.epoch)
let
newHead = await handleProposal(node, head, slot)

View File

@ -188,7 +188,7 @@ type
MonitoredValidator = object
id: string # A short id is used above all for metrics
pubkey: ValidatorPubKey
index: Option[ValidatorIndex]
index: Opt[ValidatorIndex]
summaries: array[2, EpochSummary] # We monitor the current and previous epochs
ValidatorMonitor* = object
@ -222,7 +222,7 @@ proc update_if_lt[T](current: var Option[T], val: T) =
proc addMonitor*(
self: var ValidatorMonitor, pubkey: ValidatorPubKey,
index: Option[ValidatorIndex]) =
index: Opt[ValidatorIndex]) =
if pubkey in self.monitors:
return
@ -249,7 +249,7 @@ proc addAutoMonitor*(
# automatic monitors must be registered with index - we don't look for them in
# the state
self.addMonitor(pubkey, some(index))
self.addMonitor(pubkey, Opt.some(index))
info "Started monitoring validator",
validator = shortLog(pubkey), pubkey, index
@ -308,7 +308,6 @@ proc updateEpoch(self: var ValidatorMonitor, epoch: Epoch) =
monitor.summaries[summaryIdx].name.get.toGaugeValue(),
[if self.totals: total else: monitor.id])
setAll(
validator_monitor_prev_epoch_attestations_total,
attestations)
@ -542,7 +541,7 @@ proc registerState*(self: var ValidatorMonitor, state: ForkyBeaconState) =
# Update indices for the validators we're monitoring
for v in self.knownValidators..<state.validators.len:
self.monitors.withValue(state.validators[v].pubkey, monitor):
monitor[][].index = some(ValidatorIndex(v))
monitor[][].index = Opt.some(ValidatorIndex(v))
self.indices[uint64(v)] = monitor[]
info "Started monitoring validator",

View File

@ -12,7 +12,7 @@ else:
import
std/[options, tables, json, streams, sequtils, uri],
chronos, chronicles, metrics,
chronos, chronicles, metrics, eth/async_utils,
json_serialization/std/net,
presto, presto/client,
@ -28,6 +28,9 @@ export
rest_types, eth2_rest_serialization, rest_remote_signer_calls,
slashing_protection
const
WEB3_SIGNER_DELAY_TOLERANCE = 3.seconds
declareGauge validators,
"Number of validators attached to the beacon node"
@ -51,7 +54,7 @@ type
# it does not change as long as there are no reorgs on eth1 - however, the
# index might not be valid in all eth2 histories, so it should not be
# assumed that a valid index is stored here!
index*: Option[ValidatorIndex]
index*: Opt[ValidatorIndex]
# Cache the latest slot signature - the slot signature is used to determine
# if the validator will be aggregating (in the near future)
@ -87,7 +90,7 @@ template count*(pool: ValidatorPool): int =
len(pool.validators)
proc addLocalValidator*(pool: var ValidatorPool, item: KeystoreData,
index: Option[ValidatorIndex], slot: Slot) =
index: Opt[ValidatorIndex], slot: Slot) =
doAssert item.kind == KeystoreKind.Local
let pubkey = item.pubkey
let v = AttachedValidator(kind: ValidatorKind.Local, pubkey: pubkey,
@ -99,11 +102,11 @@ proc addLocalValidator*(pool: var ValidatorPool, item: KeystoreData,
proc addLocalValidator*(pool: var ValidatorPool, item: KeystoreData,
slot: Slot) =
addLocalValidator(pool, item, none[ValidatorIndex](), slot)
addLocalValidator(pool, item, Opt.none ValidatorIndex, slot)
proc addRemoteValidator*(pool: var ValidatorPool, item: KeystoreData,
clients: seq[(RestClientRef, RemoteSignerInfo)],
index: Option[ValidatorIndex], slot: Slot) =
index: Opt[ValidatorIndex], slot: Slot) =
doAssert item.kind == KeystoreKind.Remote
let pubkey = item.pubkey
let v = AttachedValidator(kind: ValidatorKind.Remote, pubkey: pubkey,
@ -144,7 +147,7 @@ proc updateValidator*(pool: var ValidatorPool, pubkey: ValidatorPubKey,
## not present in the pool.
var v: AttachedValidator
if pool.validators.pop(pubkey, v):
v.index = some(index)
v.index = Opt.some(index)
pool.validators[pubkey] = v
proc close*(pool: var ValidatorPool) =
@ -173,8 +176,11 @@ proc signWithDistributedKey(v: AttachedValidator,
{.async.} =
doAssert v.data.threshold <= uint32(v.clients.len)
let signatureReqs = mapIt(v.clients, it[0].signData(it[1].pubkey, request))
await allFutures(signatureReqs)
let
signatureReqs = mapIt(v.clients, it[0].signData(it[1].pubkey, request))
deadline = sleepAsync(WEB3_SIGNER_DELAY_TOLERANCE)
await allFutures(signatureReqs) or deadline
var shares: seq[SignatureShare]
var neededShares = v.data.threshold
@ -200,7 +206,9 @@ proc signWithSingleKey(v: AttachedValidator,
{.async.} =
doAssert v.clients.len == 1
let (client, info) = v.clients[0]
let res = await client.signData(info.pubkey, request)
let res = awaitWithTimeout(client.signData(info.pubkey, request),
WEB3_SIGNER_DELAY_TOLERANCE):
return SignatureResult.err "Timeout"
if res.isErr:
return SignatureResult.err res.error
else:

View File

@ -32,12 +32,12 @@ template findIt*(s: openArray, predicate: untyped): int =
res
proc findValidator(validators: seq[Validator], pubKey: ValidatorPubKey):
Option[ValidatorIndex] =
Opt[ValidatorIndex] =
let idx = validators.findIt(it.pubkey == pubKey)
if idx == -1:
none(ValidatorIndex)
Opt.none ValidatorIndex
else:
some(idx.ValidatorIndex)
Opt.some idx.ValidatorIndex
cli do(validatorsDir: string, secretsDir: string,
startState: string, network: string):

View File

@ -966,6 +966,12 @@ for NUM_NODE in $(seq 0 $(( NUM_NODES - 1 ))); do
WEB3_ARG="--web3-url=http://127.0.0.1:${EL_RPC_PORTS[${NUM_NODE}]}"
fi
# We enabled the keymanager on half of the nodes
KEYMANAGER_FLAG=""
if [ $((NUM_NODE % 2)) -eq 0 ]; then
KEYMANAGER_FLAG="--keymanager"
fi
# TODO re-add --jwt-secret
${BEACON_NODE_COMMAND} \
--config-file="${CLI_CONF_FILE}" \
@ -976,9 +982,9 @@ for NUM_NODE in $(seq 0 $(( NUM_NODES - 1 ))); do
${BOOTSTRAP_ARG} \
${WEB3_ARG} \
${STOP_AT_EPOCH_FLAG} \
--rest-port="$(( BASE_REST_PORT + NUM_NODE ))" \
--keymanager \
${KEYMANAGER_FLAG} \
--keymanager-token-file="${DATA_DIR}/keymanager-token" \
--rest-port="$(( BASE_REST_PORT + NUM_NODE ))" \
--metrics-port="$(( BASE_METRICS_PORT + NUM_NODE ))" \
--light-client=on \
${EXTRA_ARGS} \
@ -1012,7 +1018,7 @@ for NUM_NODE in $(seq 0 $(( NUM_NODES - 1 ))); do
--log-level="${LOG_LEVEL}" \
${STOP_AT_EPOCH_FLAG} \
--data-dir="${VALIDATOR_DATA_DIR}" \
--keymanager \
${KEYMANAGER_FLAG} \
--keymanager-port=$((BASE_VC_KEYMANAGER_PORT + NUM_NODE)) \
--keymanager-token-file="${DATA_DIR}/keymanager-token" \
--beacon-node="http://127.0.0.1:$((BASE_REST_PORT + NUM_NODE))" \

View File

@ -222,7 +222,7 @@ suite "Gossip validation - Extra": # Not based on preset config
keystoreData = KeystoreData(kind: KeystoreKind.Local,
privateKey: MockPrivKeys[index])
validator = AttachedValidator(pubkey: pubkey,
kind: ValidatorKind.Local, data: keystoreData, index: some(index))
kind: ValidatorKind.Local, data: keystoreData, index: Opt.some index)
resMsg = waitFor getSyncCommitteeMessage(
validator, state[].data.fork, state[].data.genesis_validators_root, slot,
state[].root)