mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-17 00:47:03 +00:00
pre-emptive duplicate validator detection heuristic
This commit is contained in:
parent
55ecb61c3a
commit
a16f5afcd5
@ -135,18 +135,18 @@ proc updateCurrent(pool: var AttestationPool, wallSlot: Slot) =
|
|||||||
|
|
||||||
func addToAggregates(pool: var AttestationPool, attestation: Attestation) =
|
func addToAggregates(pool: var AttestationPool, attestation: Attestation) =
|
||||||
# do a lookup for the current slot and get it's associated htrs/attestations
|
# do a lookup for the current slot and get it's associated htrs/attestations
|
||||||
var aggreated_attestation = pool.attestationAggregates.mgetOrPut(
|
var aggregated_attestation = pool.attestationAggregates.mgetOrPut(
|
||||||
attestation.data.slot, Table[Eth2Digest, Attestation]()).
|
attestation.data.slot, Table[Eth2Digest, Attestation]()).
|
||||||
# do a lookup for the same attestation data htr and get the attestation
|
# do a lookup for the same attestation data htr and get the attestation
|
||||||
mgetOrPut(attestation.data.hash_tree_root, attestation)
|
mgetOrPut(attestation.data.hash_tree_root, attestation)
|
||||||
# if the aggregation bits differ (we didn't just insert it into the table)
|
# if the aggregation bits differ (we didn't just insert it into the table)
|
||||||
# and only if there is no overlap of the signatures ==> aggregate!
|
# and only if there is no overlap of the signatures ==> aggregate!
|
||||||
if not aggreated_attestation.aggregation_bits.overlaps(attestation.aggregation_bits):
|
if not aggregated_attestation.aggregation_bits.overlaps(attestation.aggregation_bits):
|
||||||
var agg {.noInit.}: AggregateSignature
|
var agg {.noInit.}: AggregateSignature
|
||||||
agg.init(aggreated_attestation.signature)
|
agg.init(aggregated_attestation.signature)
|
||||||
aggreated_attestation.aggregation_bits.combine(attestation.aggregation_bits)
|
aggregated_attestation.aggregation_bits.combine(attestation.aggregation_bits)
|
||||||
agg.aggregate(attestation.signature)
|
agg.aggregate(attestation.signature)
|
||||||
aggreated_attestation.signature = agg.finish()
|
aggregated_attestation.signature = agg.finish()
|
||||||
|
|
||||||
proc addAttestation*(pool: var AttestationPool,
|
proc addAttestation*(pool: var AttestationPool,
|
||||||
attestation: Attestation,
|
attestation: Attestation,
|
||||||
|
@ -51,6 +51,11 @@ type
|
|||||||
enabled = "Always enabled"
|
enabled = "Always enabled"
|
||||||
disabled = "Always disabled"
|
disabled = "Always disabled"
|
||||||
|
|
||||||
|
GossipSlashingProtectionMode* {.pure.} = enum
|
||||||
|
dontcheck
|
||||||
|
warn
|
||||||
|
stop
|
||||||
|
|
||||||
BeaconNodeConf* = object
|
BeaconNodeConf* = object
|
||||||
logLevel* {.
|
logLevel* {.
|
||||||
defaultValue: "INFO"
|
defaultValue: "INFO"
|
||||||
@ -255,6 +260,12 @@ type
|
|||||||
desc: "Write SSZ dumps of blocks, attestations and states to data dir"
|
desc: "Write SSZ dumps of blocks, attestations and states to data dir"
|
||||||
name: "dump" }: bool
|
name: "dump" }: bool
|
||||||
|
|
||||||
|
gossipSlashingProtection* {.
|
||||||
|
defaultValue: GossipSlashingProtectionMode.warn
|
||||||
|
desc: "[=warn*|stop] What to do when another validator is detected to be running the same validator keys (default `warn`, will become `stop` in the future)"
|
||||||
|
name: "gossip-slashing-protection"
|
||||||
|
}: GossipSlashingProtectionMode
|
||||||
|
|
||||||
of createTestnet:
|
of createTestnet:
|
||||||
testnetDepositsFile* {.
|
testnetDepositsFile* {.
|
||||||
desc: "A LaunchPad deposits file for the genesis state validators"
|
desc: "A LaunchPad deposits file for the genesis state validators"
|
||||||
|
@ -13,7 +13,7 @@ import
|
|||||||
chronicles, chronos, metrics,
|
chronicles, chronos, metrics,
|
||||||
./spec/[crypto, datatypes, digest],
|
./spec/[crypto, datatypes, digest],
|
||||||
./block_pools/[clearance, chain_dag],
|
./block_pools/[clearance, chain_dag],
|
||||||
./attestation_aggregation, ./exit_pool,
|
./attestation_aggregation, ./exit_pool, ./validator_pool,
|
||||||
./beacon_node_types, ./attestation_pool,
|
./beacon_node_types, ./attestation_pool,
|
||||||
./time, ./conf, ./sszdump
|
./time, ./conf, ./sszdump
|
||||||
|
|
||||||
@ -31,6 +31,9 @@ declareCounter beacon_proposer_slashings_received,
|
|||||||
declareCounter beacon_voluntary_exits_received,
|
declareCounter beacon_voluntary_exits_received,
|
||||||
"Number of beacon chain voluntary exits received by this peer"
|
"Number of beacon chain voluntary exits received by this peer"
|
||||||
|
|
||||||
|
declareCounter beacon_duplicate_validator_protection_activated,
|
||||||
|
"Number of times duplicate validator protection was activated"
|
||||||
|
|
||||||
const delayBuckets = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, Inf]
|
const delayBuckets = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, Inf]
|
||||||
|
|
||||||
declareHistogram beacon_attestation_delay,
|
declareHistogram beacon_attestation_delay,
|
||||||
@ -67,6 +70,7 @@ type
|
|||||||
chainDag*: ChainDAGRef
|
chainDag*: ChainDAGRef
|
||||||
attestationPool*: ref AttestationPool
|
attestationPool*: ref AttestationPool
|
||||||
exitPool: ref ExitPool
|
exitPool: ref ExitPool
|
||||||
|
validatorPool: ref ValidatorPool
|
||||||
quarantine*: QuarantineRef
|
quarantine*: QuarantineRef
|
||||||
blockReceivedDuringSlot*: Future[void]
|
blockReceivedDuringSlot*: Future[void]
|
||||||
|
|
||||||
@ -74,6 +78,8 @@ type
|
|||||||
attestationsQueue*: AsyncQueue[AttestationEntry]
|
attestationsQueue*: AsyncQueue[AttestationEntry]
|
||||||
aggregatesQueue*: AsyncQueue[AggregateEntry]
|
aggregatesQueue*: AsyncQueue[AggregateEntry]
|
||||||
|
|
||||||
|
gossipSlashingProtection*: DupProtection
|
||||||
|
|
||||||
proc updateHead*(self: var Eth2Processor, wallSlot: Slot) =
|
proc updateHead*(self: var Eth2Processor, wallSlot: Slot) =
|
||||||
## Trigger fork choice and returns the new head block.
|
## Trigger fork choice and returns the new head block.
|
||||||
## Can return `nil`
|
## Can return `nil`
|
||||||
@ -298,6 +304,42 @@ proc blockValidator*(
|
|||||||
|
|
||||||
{.push raises: [Defect].}
|
{.push raises: [Defect].}
|
||||||
|
|
||||||
|
proc checkForPotentialSelfSlashing(
|
||||||
|
self: var Eth2Processor, attestationData: AttestationData,
|
||||||
|
attesterIndices: HashSet[ValidatorIndex], wallSlot: Slot) =
|
||||||
|
# Attestations remain valid for 32 slots, so avoid confusing with one's own
|
||||||
|
# reflections, for a ATTESTATION_PROPAGATION_SLOT_RANGE div SLOTS_PER_EPOCH
|
||||||
|
# period after the attestation slot. For mainnet this can be one additional
|
||||||
|
# epoch, and for minimal, four epochs. Unlike in the attestation validation
|
||||||
|
# checks, use the spec version of the constant here.
|
||||||
|
const
|
||||||
|
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.0/specs/phase0/p2p-interface.md#configuration
|
||||||
|
ATTESTATION_PROPAGATION_SLOT_RANGE = 32
|
||||||
|
|
||||||
|
GUARD_EPOCHS = ATTESTATION_PROPAGATION_SLOT_RANGE div SLOTS_PER_EPOCH
|
||||||
|
|
||||||
|
# If gossipSlashingProtection not dontcheck or stop, it's the default "warn".
|
||||||
|
let epoch = wallSlot.epoch
|
||||||
|
if epoch < self.gossipSlashingProtection.broadcastStartEpoch and
|
||||||
|
epoch >= self.gossipSlashingProtection.probeEpoch and
|
||||||
|
epoch <= self.gossipSlashingProtection.probeEpoch + GUARD_EPOCHS:
|
||||||
|
let tgtBlck = self.chainDag.getRef(attestationData.target.root)
|
||||||
|
doAssert not tgtBlck.isNil # because attestation is valid above
|
||||||
|
|
||||||
|
let epochRef = self.chainDag.getEpochRef(
|
||||||
|
tgtBlck, attestationData.target.epoch)
|
||||||
|
for validatorIndex in attesterIndices:
|
||||||
|
let validatorPubkey = epochRef.validator_keys[validatorIndex]
|
||||||
|
if self.validatorPool[].getValidator(validatorPubkey) !=
|
||||||
|
default(AttachedValidator):
|
||||||
|
warn "Duplicate validator detected; would be slashed",
|
||||||
|
validatorIndex,
|
||||||
|
validatorPubkey
|
||||||
|
beacon_duplicate_validator_protection_activated.inc()
|
||||||
|
if self.config.gossipSlashingProtection == GossipSlashingProtectionMode.stop:
|
||||||
|
warn "We believe you are currently running another instance of the same validator. We've disconnected you from the network as this presents a significant slashing risk. Possible next steps are (a) making sure you've disconnected your validator from your old machine before restarting the client; and (b) running the client again with the gossip-slashing-protection option disabled, only if you are absolutely sure this is the only instance of your validator running, and reporting the issue at https://github.com/status-im/nimbus-eth2/issues."
|
||||||
|
quit QuitFailure
|
||||||
|
|
||||||
proc attestationValidator*(
|
proc attestationValidator*(
|
||||||
self: var Eth2Processor,
|
self: var Eth2Processor,
|
||||||
attestation: Attestation,
|
attestation: Attestation,
|
||||||
@ -329,6 +371,8 @@ proc attestationValidator*(
|
|||||||
beacon_attestations_received.inc()
|
beacon_attestations_received.inc()
|
||||||
beacon_attestation_delay.observe(delay.toFloatSeconds())
|
beacon_attestation_delay.observe(delay.toFloatSeconds())
|
||||||
|
|
||||||
|
self.checkForPotentialSelfSlashing(attestation.data, v.value, wallSlot)
|
||||||
|
|
||||||
while self.attestationsQueue.full():
|
while self.attestationsQueue.full():
|
||||||
try:
|
try:
|
||||||
notice "Queue full, dropping attestation",
|
notice "Queue full, dropping attestation",
|
||||||
@ -381,6 +425,9 @@ proc aggregateValidator*(
|
|||||||
beacon_aggregates_received.inc()
|
beacon_aggregates_received.inc()
|
||||||
beacon_aggregate_delay.observe(delay.toFloatSeconds())
|
beacon_aggregate_delay.observe(delay.toFloatSeconds())
|
||||||
|
|
||||||
|
self.checkForPotentialSelfSlashing(
|
||||||
|
signedAggregateAndProof.message.aggregate.data, v.value, wallSlot)
|
||||||
|
|
||||||
while self.aggregatesQueue.full():
|
while self.aggregatesQueue.full():
|
||||||
try:
|
try:
|
||||||
notice "Queue full, dropping aggregate",
|
notice "Queue full, dropping aggregate",
|
||||||
@ -500,6 +547,7 @@ proc new*(T: type Eth2Processor,
|
|||||||
chainDag: ChainDAGRef,
|
chainDag: ChainDAGRef,
|
||||||
attestationPool: ref AttestationPool,
|
attestationPool: ref AttestationPool,
|
||||||
exitPool: ref ExitPool,
|
exitPool: ref ExitPool,
|
||||||
|
validatorPool: ref ValidatorPool,
|
||||||
quarantine: QuarantineRef,
|
quarantine: QuarantineRef,
|
||||||
getWallTime: GetWallTimeFn): ref Eth2Processor =
|
getWallTime: GetWallTimeFn): ref Eth2Processor =
|
||||||
(ref Eth2Processor)(
|
(ref Eth2Processor)(
|
||||||
@ -508,6 +556,7 @@ proc new*(T: type Eth2Processor,
|
|||||||
chainDag: chainDag,
|
chainDag: chainDag,
|
||||||
attestationPool: attestationPool,
|
attestationPool: attestationPool,
|
||||||
exitPool: exitPool,
|
exitPool: exitPool,
|
||||||
|
validatorPool: validatorPool,
|
||||||
quarantine: quarantine,
|
quarantine: quarantine,
|
||||||
blockReceivedDuringSlot: newFuture[void](),
|
blockReceivedDuringSlot: newFuture[void](),
|
||||||
blocksQueue: newAsyncQueue[BlockEntry](1),
|
blocksQueue: newAsyncQueue[BlockEntry](1),
|
||||||
|
@ -322,7 +322,8 @@ proc init*(T: type BeaconNode,
|
|||||||
proc getWallTime(): BeaconTime = res.beaconClock.now()
|
proc getWallTime(): BeaconTime = res.beaconClock.now()
|
||||||
|
|
||||||
res.processor = Eth2Processor.new(
|
res.processor = Eth2Processor.new(
|
||||||
conf, chainDag, attestationPool, exitPool, quarantine, getWallTime)
|
conf, chainDag, attestationPool, exitPool, newClone(res.attachedValidators),
|
||||||
|
quarantine, getWallTime)
|
||||||
|
|
||||||
res.requestManager = RequestManager.init(
|
res.requestManager = RequestManager.init(
|
||||||
network, res.processor.blocksQueue)
|
network, res.processor.blocksQueue)
|
||||||
@ -627,6 +628,45 @@ proc removeMessageHandlers(node: BeaconNode) =
|
|||||||
for subnet in 0'u64 ..< ATTESTATION_SUBNET_COUNT:
|
for subnet in 0'u64 ..< ATTESTATION_SUBNET_COUNT:
|
||||||
node.network.unsubscribe(getAttestationTopic(node.forkDigest, subnet))
|
node.network.unsubscribe(getAttestationTopic(node.forkDigest, subnet))
|
||||||
|
|
||||||
|
proc setupSelfSlashingProtection(node: BeaconNode, slot: Slot) =
|
||||||
|
# When another client's already running, this is very likely to detect
|
||||||
|
# potential duplicate validators, which can trigger slashing. Assuming
|
||||||
|
# the most pessimal case of two validators started simultaneously, the
|
||||||
|
# probability of triggering a slashable condition is up to 1/n, with n
|
||||||
|
# being the number of epochs one waits before proposing or attesting.
|
||||||
|
#
|
||||||
|
# Every missed attestation costs approximately 3*get_base_reward(), which
|
||||||
|
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
|
||||||
|
# and one should gauge the likelihood of this simultaneous launch to tune
|
||||||
|
# the epoch delay to one's perceived risk.
|
||||||
|
#
|
||||||
|
# This approach catches both startup and network outage conditions.
|
||||||
|
|
||||||
|
const duplicateValidatorEpochs = 2
|
||||||
|
|
||||||
|
node.processor.gossipSlashingProtection.broadcastStartEpoch =
|
||||||
|
slot.epoch + duplicateValidatorEpochs
|
||||||
|
# randomize() already called; also, never probe on first epoch in guard
|
||||||
|
# period, so that existing, running validators can be picked up. Whilst
|
||||||
|
# this reduces entropy for overlapping-start cases, and increases their
|
||||||
|
# collision likelihood, that can be compensated for by increasing guard
|
||||||
|
# epoch periods by 1. As a corollary, 1 guard epoch won't detect when a
|
||||||
|
# duplicate pair overlaps exactly, only the running/starting case. Even
|
||||||
|
# 2 epochs is dangerous because it'll guarantee colliding probes in the
|
||||||
|
# overlapping case.
|
||||||
|
|
||||||
|
# So dPE == 2 -> epoch + 1, always; dPE == 3 -> epoch + (1 or 2), etc.
|
||||||
|
node.processor.gossipSlashingProtection.probeEpoch =
|
||||||
|
slot.epoch + 1 + rand(duplicateValidatorEpochs.int - 2).uint64
|
||||||
|
doAssert node.processor.gossipSlashingProtection.probeEpoch <
|
||||||
|
node.processor.gossipSlashingProtection.broadcastStartEpoch
|
||||||
|
|
||||||
|
debug "Setting up self-slashing protection",
|
||||||
|
epoch = slot.epoch,
|
||||||
|
probeEpoch = node.processor.gossipSlashingProtection.probeEpoch,
|
||||||
|
broadcastStartEpoch =
|
||||||
|
node.processor.gossipSlashingProtection.broadcastStartEpoch
|
||||||
|
|
||||||
proc updateGossipStatus(node: BeaconNode, slot: Slot) =
|
proc updateGossipStatus(node: BeaconNode, slot: Slot) =
|
||||||
# Syncing tends to be ~1 block/s, and allow for an epoch of time for libp2p
|
# Syncing tends to be ~1 block/s, and allow for an epoch of time for libp2p
|
||||||
# subscribing to spin up. The faster the sync, the more wallSlot - headSlot
|
# subscribing to spin up. The faster the sync, the more wallSlot - headSlot
|
||||||
@ -660,6 +700,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) =
|
|||||||
headSlot = node.chainDag.head.slot,
|
headSlot = node.chainDag.head.slot,
|
||||||
syncQueueLen
|
syncQueueLen
|
||||||
|
|
||||||
|
node.setupSelfSlashingProtection(slot)
|
||||||
node.addMessageHandlers()
|
node.addMessageHandlers()
|
||||||
doAssert node.getTopicSubscriptionEnabled()
|
doAssert node.getTopicSubscriptionEnabled()
|
||||||
elif
|
elif
|
||||||
@ -978,6 +1019,7 @@ proc run*(node: BeaconNode) =
|
|||||||
node.startSyncManager()
|
node.startSyncManager()
|
||||||
|
|
||||||
if not node.beaconClock.now().toSlot().afterGenesis:
|
if not node.beaconClock.now().toSlot().afterGenesis:
|
||||||
|
node.setupSelfSlashingProtection(curSlot)
|
||||||
node.addMessageHandlers()
|
node.addMessageHandlers()
|
||||||
doAssert node.getTopicSubscriptionEnabled()
|
doAssert node.getTopicSubscriptionEnabled()
|
||||||
|
|
||||||
|
@ -534,6 +534,10 @@ type
|
|||||||
current_justified_checkpoint*: Checkpoint
|
current_justified_checkpoint*: Checkpoint
|
||||||
finalized_checkpoint*: Checkpoint
|
finalized_checkpoint*: Checkpoint
|
||||||
|
|
||||||
|
DupProtection* = object
|
||||||
|
broadcastStartEpoch*: Epoch
|
||||||
|
probeEpoch*: Epoch
|
||||||
|
|
||||||
func shortValidatorKey*(state: BeaconState, validatorIdx: int): string =
|
func shortValidatorKey*(state: BeaconState, validatorIdx: int): string =
|
||||||
($state.validators[validatorIdx].pubkey)[0..7]
|
($state.validators[validatorIdx].pubkey)[0..7]
|
||||||
|
|
||||||
|
@ -615,6 +615,21 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
|
|||||||
|
|
||||||
var curSlot = lastSlot + 1
|
var curSlot = lastSlot + 1
|
||||||
|
|
||||||
|
# The dontcheck option's a deliberately undocumented escape hatch for the
|
||||||
|
# local testnets and similar development and testing use cases.
|
||||||
|
doAssert node.config.gossipSlashingProtection == GossipSlashingProtectionMode.dontcheck or (
|
||||||
|
node.processor[].gossipSlashingProtection.probeEpoch <
|
||||||
|
node.processor[].gossipSlashingProtection.broadcastStartEpoch)
|
||||||
|
if curSlot.epoch <
|
||||||
|
node.processor[].gossipSlashingProtection.broadcastStartEpoch and
|
||||||
|
curSlot.epoch != node.processor[].gossipSlashingProtection.probeEpoch and
|
||||||
|
node.config.gossipSlashingProtection == GossipSlashingProtectionMode.stop:
|
||||||
|
notice "Waiting to gossip out to detect potential duplicate validators",
|
||||||
|
broadcastStartEpoch =
|
||||||
|
node.processor[].gossipSlashingProtection.broadcastStartEpoch,
|
||||||
|
probeEpoch = node.processor[].gossipSlashingProtection.probeEpoch
|
||||||
|
return
|
||||||
|
|
||||||
# Start by checking if there's work we should have done in the past that we
|
# Start by checking if there's work we should have done in the past that we
|
||||||
# can still meaningfully do
|
# can still meaningfully do
|
||||||
while curSlot < slot:
|
while curSlot < slot:
|
||||||
|
@ -385,6 +385,7 @@ for NUM_NODE in $(seq 0 $(( NUM_NODES - 1 ))); do
|
|||||||
--metrics \
|
--metrics \
|
||||||
--metrics-address="127.0.0.1" \
|
--metrics-address="127.0.0.1" \
|
||||||
--metrics-port="$(( BASE_METRICS_PORT + NUM_NODE ))" \
|
--metrics-port="$(( BASE_METRICS_PORT + NUM_NODE ))" \
|
||||||
|
--gossip-slashing-protection=dontcheck \
|
||||||
${EXTRA_ARGS} \
|
${EXTRA_ARGS} \
|
||||||
> "${DATA_DIR}/log${NUM_NODE}.txt" 2>&1 &
|
> "${DATA_DIR}/log${NUM_NODE}.txt" 2>&1 &
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ shift
|
|||||||
# shellcheck source=/dev/null
|
# shellcheck source=/dev/null
|
||||||
source "$(dirname "$0")/vars.sh"
|
source "$(dirname "$0")/vars.sh"
|
||||||
|
|
||||||
if [[ ! -z "$1" ]]; then
|
if [[ -n "$1" ]]; then
|
||||||
ADDITIONAL_BEACON_NODE_ARGS=$1
|
ADDITIONAL_BEACON_NODE_ARGS=$1
|
||||||
shift
|
shift
|
||||||
else
|
else
|
||||||
@ -18,7 +18,7 @@ fi
|
|||||||
|
|
||||||
BOOTSTRAP_ARG=""
|
BOOTSTRAP_ARG=""
|
||||||
|
|
||||||
if [[ ! -z "$1" ]]; then
|
if [[ -n "$1" ]]; then
|
||||||
BOOTSTRAP_NODE_ID=$1
|
BOOTSTRAP_NODE_ID=$1
|
||||||
shift
|
shift
|
||||||
else
|
else
|
||||||
@ -105,5 +105,6 @@ $BEACON_NODE_BIN \
|
|||||||
--metrics \
|
--metrics \
|
||||||
--metrics-address="127.0.0.1" \
|
--metrics-address="127.0.0.1" \
|
||||||
--metrics-port="$(( $BASE_METRICS_PORT + $NODE_ID ))" \
|
--metrics-port="$(( $BASE_METRICS_PORT + $NODE_ID ))" \
|
||||||
|
--gossip-slashing-protection=dontcheck \
|
||||||
${ADDITIONAL_BEACON_NODE_ARGS} \
|
${ADDITIONAL_BEACON_NODE_ARGS} \
|
||||||
"$@"
|
"$@"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user