nimbus-eth2/tests/test_gossip_validation.nim
Jacek Sieka b8a32419b8
async batch verification (+40% sig verification throughput) (#5176)
* async batch verification

When batch verification is done, the main thread is blocked reducing
concurrency.

With this PR, the new thread signalling primitive in chronos is used to
offload the full batch verification process to a separate thread
allowing the main threads to continue async operations while the other
threads verify signatures.

Similar to previous behavior, the number of ongoing batch verifications
is capped to prevent runaway resource usage.

In addition to the asynchronous processing, 3 addition changes help
drive throughput:

* A loop is used for batch accumulation: this prevents a stampede of
small batches in eager mode where both the eager and the scheduled batch
runner would pick batches off the queue, prematurely picking "fresh"
batches off the queue
* An additional small wait is introduced for small batches - this helps
create slightly larger batches which make better used of the increased
concurrency
* Up to 2 batches are scheduled to the threadpool during high pressure,
reducing startup latency for the threads

Together, these changes increase attestation verification throughput
under load up to 30%.

* fixup

* Update submodules

* fix blst build issues (and a PIC warning)

* bump

---------

Co-authored-by: Zahary Karadjov <zahary@gmail.com>
2023-08-03 11:36:45 +03:00

293 lines
11 KiB
Nim

# beacon_chain
# Copyright (c) 2018-2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.used.}
import
# Standard library
std/sequtils,
# Status lib
unittest2,
chronos,
taskpools,
# Internal
../beacon_chain/[beacon_clock],
../beacon_chain/gossip_processing/[gossip_validation, batch_validation],
../beacon_chain/fork_choice/fork_choice,
../beacon_chain/consensus_object_pools/[
block_quarantine, blockchain_dag, block_clearance, attestation_pool,
sync_committee_msg_pool],
../beacon_chain/spec/datatypes/[phase0, altair],
../beacon_chain/spec/[
beaconstate, state_transition, helpers, network, validator],
../beacon_chain/validators/validator_pool,
# Test utilities
./testutil, ./testdbutil, ./testblockutil
proc pruneAtFinalization(dag: ChainDAGRef, attPool: AttestationPool) =
if dag.needStateCachesAndForkChoicePruning():
dag.pruneStateCachesDAG()
# pool[].prune() # We test logic without att_1_0 pool / fork choice pruning
suite "Gossip validation " & preset():
setup:
# Genesis state that results in 3 members per committee
let rng = HmacDrbgContext.new()
var
validatorMonitor = newClone(ValidatorMonitor.init())
dag = init(
ChainDAGRef, defaultRuntimeConfig, makeTestDB(SLOTS_PER_EPOCH * 3),
validatorMonitor, {})
taskpool = Taskpool.new()
verifier = BatchVerifier.init(rng, taskpool)
quarantine = newClone(Quarantine.init())
pool = newClone(AttestationPool.init(dag, quarantine))
state = newClone(dag.headState)
cache = StateCache()
info = ForkedEpochInfo()
batchCrypto = BatchCrypto.new(
rng, eager = proc(): bool = false,
genesis_validators_root = dag.genesis_validators_root, taskpool).expect(
"working batcher")
# Slot 0 is a finalized slot - won't be making attestations for it..
check:
process_slots(
defaultRuntimeConfig, state[], getStateField(state[], slot) + 1,
cache, info, {}).isOk()
test "Empty committee when no committee for slot":
template committee(idx: uint64): untyped =
get_beacon_committee(
dag.headState, dag.head.slot, idx.CommitteeIndex, cache)
template committeeLen(idx: uint64): untyped =
get_beacon_committee_len(
dag.headState, dag.head.slot, idx.CommitteeIndex, cache)
check:
committee(0).len > 0
committee(63).len == 0
check:
committeeLen(2) > 0
committeeLen(63) == 0
test "validateAttestation":
var cache: StateCache
for blck in makeTestBlocks(
dag.headState, cache, int(SLOTS_PER_EPOCH * 5), attested = false):
let added = dag.addHeadBlock(verifier, blck.phase0Data) do (
blckRef: BlockRef, signedBlock: phase0.TrustedSignedBeaconBlock,
epochRef: EpochRef, unrealized: FinalityCheckpoints):
# Callback add to fork choice if valid
pool[].addForkChoice(
epochRef, blckRef, unrealized, signedBlock.message,
blckRef.slot.start_beacon_time)
check: added.isOk()
dag.updateHead(added[], quarantine[], [])
pruneAtFinalization(dag, pool[])
var
# Create attestations for slot 1
beacon_committee = get_beacon_committee(
dag.headState, dag.head.slot, 0.CommitteeIndex, cache)
att_1_0 = makeAttestation(
dag.headState, dag.head.root, beacon_committee[0], cache)
att_1_1 = makeAttestation(
dag.headState, dag.head.root, beacon_committee[1], cache)
committees_per_slot =
get_committee_count_per_slot(
dag.headState, att_1_0.data.slot.epoch, cache)
subnet = compute_subnet_for_attestation(
committees_per_slot,
att_1_0.data.slot, att_1_0.data.index.CommitteeIndex)
beaconTime = att_1_0.data.slot.start_beacon_time()
check:
validateAttestation(pool, batchCrypto, att_1_0, beaconTime, subnet, true).waitFor().isOk
# Same validator again
validateAttestation(pool, batchCrypto, att_1_0, beaconTime, subnet, true).waitFor().error()[0] ==
ValidationResult.Ignore
pool[].nextAttestationEpoch.setLen(0) # reset for test
check:
# Wrong subnet
validateAttestation(
pool, batchCrypto, att_1_0, beaconTime, SubnetId(subnet.uint8 + 1), true).waitFor().isErr
pool[].nextAttestationEpoch.setLen(0) # reset for test
check:
# Too far in the future
validateAttestation(
pool, batchCrypto, att_1_0, beaconTime - 1.seconds, subnet, true).waitFor().isErr
pool[].nextAttestationEpoch.setLen(0) # reset for test
check:
# Too far in the past
validateAttestation(
pool, batchCrypto, att_1_0,
beaconTime - (SECONDS_PER_SLOT * SLOTS_PER_EPOCH - 1).int.seconds,
subnet, true).waitFor().isErr
block:
var broken = att_1_0
broken.signature.blob[0] += 1
pool[].nextAttestationEpoch.setLen(0) # reset for test
check:
# Invalid signature
validateAttestation(
pool, batchCrypto, broken, beaconTime, subnet, true).waitFor().
error()[0] == ValidationResult.Reject
block:
var broken = att_1_0
broken.signature.blob[5] += 1
pool[].nextAttestationEpoch.setLen(0) # reset for test
# One invalid, one valid (batched)
let
fut_1_0 = validateAttestation(
pool, batchCrypto, broken, beaconTime, subnet, true)
fut_1_1 = validateAttestation(
pool, batchCrypto, att_1_1, beaconTime, subnet, true)
check:
fut_1_0.waitFor().error()[0] == ValidationResult.Reject
fut_1_1.waitFor().isOk()
block:
var broken = att_1_0
# This shouldn't deserialize, which is a different way to break it
broken.signature.blob = default(type broken.signature.blob)
pool[].nextAttestationEpoch.setLen(0) # reset for test
# One invalid, one valid (batched)
let
fut_1_0 = validateAttestation(
pool, batchCrypto, broken, beaconTime, subnet, true)
fut_1_1 = validateAttestation(
pool, batchCrypto, att_1_1, beaconTime, subnet, true)
check:
fut_1_0.waitFor().error()[0] == ValidationResult.Reject
fut_1_1.waitFor().isOk()
suite "Gossip validation - Extra": # Not based on preset config
test "validateSyncCommitteeMessage":
const num_validators = SLOTS_PER_EPOCH
let
cfg = block:
var cfg = defaultRuntimeConfig
cfg.ALTAIR_FORK_EPOCH = (GENESIS_EPOCH + 1).Epoch
cfg
taskpool = Taskpool.new()
quarantine = newClone(Quarantine.init())
rng = HmacDrbgContext.new()
var
verifier = BatchVerifier.init(rng, taskpool)
dag = block:
let
validatorMonitor = newClone(ValidatorMonitor.init())
dag = ChainDAGRef.init(
cfg, makeTestDB(num_validators), validatorMonitor, {})
var cache = StateCache()
for blck in makeTestBlocks(
dag.headState, cache, int(SLOTS_PER_EPOCH),
attested = false, cfg = cfg):
let added =
case blck.kind
of ConsensusFork.Phase0:
const nilCallback = OnPhase0BlockAdded(nil)
dag.addHeadBlock(verifier, blck.phase0Data, nilCallback)
of ConsensusFork.Altair:
const nilCallback = OnAltairBlockAdded(nil)
dag.addHeadBlock(verifier, blck.altairData, nilCallback)
of ConsensusFork.Bellatrix:
const nilCallback = OnBellatrixBlockAdded(nil)
dag.addHeadBlock(verifier, blck.bellatrixData, nilCallback)
of ConsensusFork.Capella:
const nilCallback = OnCapellaBlockAdded(nil)
dag.addHeadBlock(verifier, blck.capellaData, nilCallback)
of ConsensusFork.Deneb:
const nilCallback = OnDenebBlockAdded(nil)
dag.addHeadBlock(verifier, blck.denebData, nilCallback)
check: added.isOk()
dag.updateHead(added[], quarantine[], [])
dag
let batchCrypto = BatchCrypto.new(
rng, eager = proc(): bool = false,
genesis_validators_root = dag.genesis_validators_root, taskpool).expect(
"working batcher")
var
state = assignClone(dag.headState.altairData)
slot = state[].data.slot
subcommitteeIdx = 0.SyncSubcommitteeIndex
syncCommittee = @(dag.syncCommitteeParticipants(slot))
subcommittee = toSeq(syncCommittee.syncSubcommittee(subcommitteeIdx))
index = subcommittee[0]
expectedCount = subcommittee.count(index)
pubkey = state[].data.validators.item(index).pubkey
keystoreData = KeystoreData(kind: KeystoreKind.Local,
pubkey: pubkey,
privateKey: MockPrivKeys[index])
validator = AttachedValidator(
kind: ValidatorKind.Local, data: keystoreData, index: Opt.some index)
resMsg = waitFor getSyncCommitteeMessage(
validator, state[].data.fork, state[].data.genesis_validators_root,
slot, state[].latest_block_root)
msg = resMsg.get()
syncCommitteePool = newClone(SyncCommitteeMsgPool.init(rng, cfg))
res = waitFor validateSyncCommitteeMessage(
dag, quarantine, batchCrypto, syncCommitteePool,
msg, subcommitteeIdx, slot.start_beacon_time(), true)
(bid, cookedSig, positions) = res.get()
syncCommitteePool[].addSyncCommitteeMessage(
msg.slot,
bid,
msg.validator_index,
cookedSig,
subcommitteeIdx,
positions)
let
contrib = block:
let contrib = (ref SignedContributionAndProof)()
check:
syncCommitteePool[].produceContribution(
slot, bid, subcommitteeIdx,
contrib.message.contribution)
syncCommitteePool[].addContribution(
contrib[], bid,
contrib.message.contribution.signature.load.get)
let signRes = waitFor validator.getContributionAndProofSignature(
state[].data.fork, state[].data.genesis_validators_root,
contrib[].message)
doAssert(signRes.isOk())
contrib[].signature = signRes.get()
contrib
aggregate = syncCommitteePool[].produceSyncAggregate(bid, slot + 1)
check:
expectedCount > 1 # Cover edge case
res.isOk
contrib.message.contribution.aggregation_bits.countOnes == expectedCount
aggregate.sync_committee_bits.countOnes == expectedCount
# Same message twice should be ignored
validateSyncCommitteeMessage(
dag, quarantine, batchCrypto, syncCommitteePool,
msg, subcommitteeIdx, state[].data.slot.start_beacon_time(), true
).waitFor().isErr()