Bugfix: The VC was producing invalid sync committee contributions (#4343)

Since the sync committee duties are no longer updated on every slot
and previously the sync committee aggregators selection proofs were
generated during the duties update, this now resulted in the client
using stale selection proofs (they must be generated at each slot).

The fix consists of moving the selection proof generation logic in
a different function which is properly executed on each slot.

Other changes:

* The logtrace tool has been enhanced with a framework for adding
  new simpler log aggregation and analysis algorithms.
  The default CI testnet simulation will now ensure that the blocks
  in the network have reasonable sync committee participation.
This commit is contained in:
zah 2022-11-24 09:46:35 +02:00 committed by GitHub
parent 44cab3a4cc
commit d07113767d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 335 additions and 136 deletions

View File

@ -216,7 +216,7 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
# state, it's not possible that it will sit on the sync committee.
# Since this API must omit results for validators that don't have
# duties, we can simply ingnore this requested index.
# (we won't bother to validate it agains a more recent state).
# (we won't bother to validate it against a more recent state).
continue
let requestedValidatorPubkey =
@ -709,15 +709,15 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
let res = slot.get()
if res.isErr():
return RestApiResponse.jsonError(Http400, InvalidSlotValueError,
$res.error())
$res.error())
let rslot = res.get()
if epoch(rslot) < node.dag.cfg.ALTAIR_FORK_EPOCH:
return RestApiResponse.jsonError(Http400,
SlotFromTheIncorrectForkError)
SlotFromTheIncorrectForkError)
rslot
if qslot <= node.dag.finalizedHead.slot:
return RestApiResponse.jsonError(Http400, InvalidSlotValueError,
"Slot already finalized")
"Slot already finalized")
let qindex =
if subcommittee_index.isNone():
return RestApiResponse.jsonError(Http400,
@ -726,8 +726,8 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
let res = subcommittee_index.get()
if res.isErr():
return RestApiResponse.jsonError(Http400,
InvalidSubCommitteeIndexValueError,
$res.error())
InvalidSubCommitteeIndexValueError,
$res.error())
res.get()
let qroot =
if beacon_block_root.isNone():

View File

@ -88,11 +88,6 @@ type
validator_index*: ValidatorIndex
validator_sync_committee_index*: IndexInSyncCommittee
SyncDutyAndProof* = object
epoch*: Epoch
data*: SyncCommitteeDuty
slotSig*: Option[ValidatorSig]
SyncCommitteeSubscriptionInfo* = object
validator_index*: ValidatorIndex
validator_sync_committee_indices*: seq[IndexInSyncCommittee]
@ -129,7 +124,7 @@ type
duties*: Table[Epoch, DutyAndProof]
EpochSyncDuties* = object
duties*: Table[Epoch, SyncDutyAndProof]
duties*: Table[Epoch, SyncCommitteeDuty]
RestBeaconNodeStatus* {.pure.} = enum
Uninitalized, Offline, Incompatible, NotSynced, Online
@ -197,8 +192,6 @@ type
const
DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
DefaultSyncDutyAndProof* =
SyncDutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
SlotDuration* = int64(SECONDS_PER_SLOT).seconds
OneThirdDuration* = int64(SECONDS_PER_SLOT).seconds div INTERVALS_PER_SLOT
AllBeaconNodeRoles* = {
@ -308,9 +301,6 @@ proc stop*(csr: ClientServiceRef) {.async.} =
proc isDefault*(dap: DutyAndProof): bool =
dap.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
proc isDefault*(sdap: SyncDutyAndProof): bool =
sdap.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
proc isDefault*(prd: ProposedData): bool =
prd.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
@ -397,11 +387,6 @@ proc init*(t: typedesc[DutyAndProof], epoch: Epoch, dependentRoot: Eth2Digest,
DutyAndProof(epoch: epoch, dependentRoot: dependentRoot, data: duty,
slotSig: slotSig)
proc init*(t: typedesc[SyncDutyAndProof], epoch: Epoch,
duty: SyncCommitteeDuty,
slotSig: Option[ValidatorSig]): SyncDutyAndProof =
SyncDutyAndProof(epoch: epoch, data: duty, slotSig: slotSig)
proc init*(t: typedesc[ProposedData], epoch: Epoch, dependentRoot: Eth2Digest,
data: openArray[ProposerTask]): ProposedData =
ProposedData(epoch: epoch, dependentRoot: dependentRoot, duties: @data)
@ -432,9 +417,9 @@ proc getAttesterDutiesForSlot*(vc: ValidatorClientRef,
res
proc getSyncCommitteeDutiesForSlot*(vc: ValidatorClientRef,
slot: Slot): seq[SyncDutyAndProof] =
## Returns all `SyncDutyAndProof` for the given `slot`.
var res: seq[SyncDutyAndProof]
slot: Slot): seq[SyncCommitteeDuty] =
## Returns all `SyncCommitteeDuty` for the given `slot`.
var res: seq[SyncCommitteeDuty]
let epoch = slot.epoch()
for key, item in mpairs(vc.syncCommitteeDuties):
item.duties.withValue(epoch, duty):
@ -494,10 +479,10 @@ proc syncMembersSubscriptionInfoForEpoch*(
item.duties.withValue(epoch, epochDuties):
if not initialized:
cur.validator_index = epochDuties.data.validator_index
cur.validator_index = epochDuties.validator_index
initialized = true
cur.validator_sync_committee_indices.add(
epochDuties.data.validator_sync_committee_index)
epochDuties.validator_sync_committee_index)
if initialized:
res.add cur

View File

@ -296,73 +296,31 @@ proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef,
fork = vc.forkAtEpoch(epoch)
genesisRoot = vc.beaconGenesis.genesis_validators_root
let addOrReplaceItems =
block:
var alreadyWarned = false
var res: seq[tuple[epoch: Epoch, duty: SyncCommitteeDuty]]
for duty in relevantDuties:
let map = vc.syncCommitteeDuties.getOrDefault(duty.pubkey)
let epochDuty = map.duties.getOrDefault(epoch, DefaultSyncDutyAndProof)
if epochDuty.isDefault():
info "Received new sync committee duty", duty, epoch
res.add((epoch, duty))
else:
if epochDuty.data != duty:
if not(alreadyWarned):
info "Sync committee duties re-organization", duty, epoch
alreadyWarned = true
res.add((epoch, duty))
var dutyFound = false
vc.syncCommitteeDuties.withValue(duty.pubkey, map):
map.duties.withValue(epoch, epochDuty):
if epochDuty[] != duty:
dutyFound = true
if dutyFound and not alreadyWarned:
info "Sync committee duties re-organization", duty, epoch
alreadyWarned = true
res.add((epoch, duty))
res
if len(addOrReplaceItems) > 0:
var pendingRequests: seq[Future[SignatureResult]]
var validators: seq[AttachedValidator]
let sres = vc.getCurrentSlot()
if sres.isSome():
for item in addOrReplaceItems:
let validator = vc.attachedValidators[].getValidator(item.duty.pubkey)
let future = validator.getSyncCommitteeSelectionProof(
fork,
genesisRoot,
sres.get(),
getSubcommitteeIndex(item.duty.validator_sync_committee_index))
pendingRequests.add(future)
validators.add(validator)
try:
await allFutures(pendingRequests)
except CancelledError as exc:
var pendingCancel: seq[Future[void]]
for future in pendingRequests:
if not(future.finished()):
pendingCancel.add(future.cancelAndWait())
await allFutures(pendingCancel)
raise exc
for index, fut in pendingRequests:
let item = addOrReplaceItems[index]
let dap =
if fut.done():
let sigRes = fut.read()
if sigRes.isErr():
error "Unable to create slot signature using remote signer",
validator = shortLog(validators[index]),
error_msg = sigRes.error()
SyncDutyAndProof.init(item.epoch, item.duty,
none[ValidatorSig]())
else:
SyncDutyAndProof.init(item.epoch, item.duty,
some(sigRes.get()))
else:
SyncDutyAndProof.init(item.epoch, item.duty,
none[ValidatorSig]())
for epoch, duty in items(addOrReplaceItems):
var validatorDuties =
vc.syncCommitteeDuties.getOrDefault(item.duty.pubkey)
validatorDuties.duties[item.epoch] = dap
vc.syncCommitteeDuties[item.duty.pubkey] = validatorDuties
vc.syncCommitteeDuties.getOrDefault(duty.pubkey)
validatorDuties.duties[epoch] = duty
vc.syncCommitteeDuties[duty.pubkey] = validatorDuties
return len(addOrReplaceItems)

View File

@ -27,16 +27,16 @@ type
proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
slot: Slot, beaconBlockRoot: Eth2Digest,
duty: SyncDutyAndProof): Future[bool] {.
duty: SyncCommitteeDuty): Future[bool] {.
async.} =
let
vc = service.client
fork = vc.forkAtEpoch(slot.epoch)
genesisValidatorsRoot = vc.beaconGenesis.genesis_validators_root
vindex = duty.data.validator_index
vindex = duty.validator_index
subcommitteeIdx = getSubcommitteeIndex(
duty.data.validator_sync_committee_index)
validator = vc.getValidator(duty.data.pubkey).valueOr: return false
duty.validator_sync_committee_index)
validator = vc.getValidator(duty.pubkey).valueOr: return false
message =
block:
let res = await getSyncCommitteeMessage(validator, fork,
@ -93,16 +93,15 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
proc produceAndPublishSyncCommitteeMessages(service: SyncCommitteeServiceRef,
slot: Slot,
beaconBlockRoot: Eth2Digest,
duties: seq[SyncDutyAndProof]) {.
async.} =
duties: seq[SyncCommitteeDuty])
{.async.} =
let vc = service.client
let pendingSyncCommitteeMessages =
block:
var res: seq[Future[bool]]
for duty in duties:
debug "Serving sync message duty", duty = duty.data,
epoch = slot.epoch()
debug "Serving sync message duty", duty, epoch = slot.epoch()
res.add(service.serveSyncCommitteeMessage(slot,
beaconBlockRoot,
duty))
@ -203,38 +202,81 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef,
proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
slot: Slot,
beaconBlockRoot: Eth2Digest,
duties: seq[SyncDutyAndProof]) {.async.} =
duties: seq[SyncCommitteeDuty]) {.async.} =
let
vc = service.client
contributionItems =
block:
var res: seq[ContributionItem]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.data.pubkey)
if not isNil(validator):
if duty.slotSig.isSome:
template slotSignature: auto = duty.slotSig.get
if is_sync_committee_aggregator(slotSignature):
res.add(ContributionItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator,
subcommitteeIdx: getSubcommitteeIndex(
duty.data.validator_sync_committee_index)
))
res
epoch = slot.epoch
fork = vc.forkAtEpoch(epoch)
if len(contributionItems) > 0:
var slotSignatureReqs: seq[Future[SignatureResult]]
var validators: seq[(AttachedValidator, SyncSubcommitteeIndex)]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.pubkey)
if isNil(validator): continue # This should never happen
let
subCommitteeIdx =
getSubcommitteeIndex(duty.validator_sync_committee_index)
future = validator.getSyncCommitteeSelectionProof(
fork,
vc.beaconGenesis.genesis_validators_root,
slot,
subCommitteeIdx)
slotSignatureReqs.add(future)
validators.add((validator, subCommitteeIdx))
try:
await allFutures(slotSignatureReqs)
except CancelledError as exc:
var pendingCancel: seq[Future[void]]
for future in slotSignatureReqs:
if not(future.finished()):
pendingCancel.add(future.cancelAndWait())
await allFutures(pendingCancel)
raise exc
var
contributionsFuts: array[SYNC_COMMITTEE_SUBNET_COUNT,
Future[SyncCommitteeContribution]]
let validatorContributions = block:
var res: seq[ContributionItem]
for idx, fut in slotSignatureReqs:
if fut.done:
let
sigRes = fut.read
validator = validators[idx][0]
subCommitteeIdx = validators[idx][1]
if sigRes.isErr:
error "Unable to create slot signature using remote signer",
validator = shortLog(validator),
error_msg = sigRes.error()
elif validator.index.isSome and
is_sync_committee_aggregator(sigRes.get):
res.add ContributionItem(
aggregator_index: uint64(validator.index.get),
selection_proof: sigRes.get,
validator: validator,
subcommitteeIdx: subCommitteeIdx)
if isNil(contributionsFuts[subCommitteeIdx]):
contributionsFuts[int subCommitteeIdx] =
vc.produceSyncCommitteeContribution(
slot,
subCommitteeIdx,
beaconBlockRoot,
ApiStrategyKind.Best)
res
if len(validatorContributions) > 0:
let pendingAggregates =
block:
var res: seq[Future[bool]]
for item in contributionItems:
for item in validatorContributions:
let aggContribution =
try:
await vc.produceSyncCommitteeContribution(slot,
item.subcommitteeIdx,
beaconBlockRoot,
ApiStrategyKind.Best)
await contributionsFuts[item.subcommitteeIdx]
except ValidatorApiError:
error "Unable to get sync message contribution data", slot = slot,
beaconBlockRoot = shortLog(beaconBlockRoot)
@ -290,8 +332,8 @@ proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
slot: Slot,
duties: seq[SyncDutyAndProof]) {.
async.} =
duties: seq[SyncCommitteeDuty])
{.async.} =
let
vc = service.client
startTime = Moment.now()

View File

@ -23,47 +23,52 @@ const
type
StartUpCommand {.pure.} = enum
pubsub, asl, asr, aggasr, scmsr, csr, lat, traceAll
pubsub, asl, asr, aggasr, scmsr, csr, lat, traceAll, localSimChecks
LogTraceConf = object
logFiles {.
desc: "Specifies one or more log files",
abbr: "f",
name: "log-file" }: seq[string]
name: "log-file" .}: seq[string]
simDir {.
desc: "Specifies path to eth2_network_simulation directory",
name: "sim-dir",
defaultValue: "" }: string
defaultValue: "",
name: "sim-dir" .}: string
netDir {.
desc: "Specifies path to network build directory",
name: "net-dir",
defaultValue: "" }: string
defaultValue: "",
name: "net-dir" .}: string
logDir {.
desc: "Specifies path with bunch of logs",
name: "log-dir",
defaultValue: "" }: string
defaultValue: "",
name: "log-dir" .}: string
ignoreSerializationErrors {.
desc: "Ignore serialization errors while parsing log files",
name: "ignore-errors",
defaultValue: true }: bool
defaultValue: true,
name: "ignore-errors" .}: bool
dumpSerializationErrors {.
desc: "Dump full serialization errors while parsing log files",
name: "dump-errors",
defaultValue: false }: bool
defaultValue: false ,
name: "dump-errors" .}: bool
nodes {.
desc: "Specifies node names which logs will be used",
name: "nodes" }: seq[string]
name: "nodes" .}: seq[string]
allowedLag {.
desc: "Allowed latency lag multiplier",
name: "lag",
defaultValue: 2.0 }: float
defaultValue: 2.0,
name: "lag" .}: float
constPreset {.
desc: "The const preset being used"
defaultValue: "mainnet"
name: "const-preset" .}: string
case cmd {.command.}: StartUpCommand
of pubsub:
@ -82,6 +87,25 @@ type
discard
of traceAll:
discard
of localSimChecks:
discard
IssuesGroup = ref object
name: string
fatalIssues: seq[string]
warnings: seq[string]
FileReport = object
categories: seq[IssuesGroup]
LogVisitor = object
visitLine: proc(msg, fullLine: string) {.gcsafe, raises: [Defect].}
produceReport: proc(): FileReport {.gcsafe, raises: [Defect].}
LogVisitorFactory = proc(): LogVisitor {.gcsafe, raises: [Defect].}
LogTracer = object
enabledVisitors: seq[LogVisitorFactory]
GossipDirection = enum
None, Incoming, Outgoing
@ -142,6 +166,46 @@ type
wallSlot: uint64
signature: string
BlockSentMessage = object
# The message structure is as follows:
#[
{
"lvl": "NOT",
"ts": "2022-11-21 23:02:37.032+02:00",
"msg": "Block sent",
"topics": "beacval",
"blockRoot": "7a0836e4",
"blck": {
"slot": 15,
"proposer_index": 96,
"parent_root": "487372dc",
"state_root": "06699625",
"eth1data": {
"deposit_root": "6c3ff67871b79b7aecc7a125e7ec9ff857879a1c83e50513be113103acf8ca3f",
"deposit_count": 1024,
"block_hash": "4242424242424242424242424242424242424242424242424242424242424242"
},
"graffiti": "Nimbus/v22.10.1-eb6615-stateofus",
"proposer_slashings_len": 0,
"attester_slashings_len": 0,
"attestations_len": 4,
"deposits_len": 0,
"voluntary_exits_len": 0,
"sync_committee_participants": 32,
"block_number": 0,
"fee_recipient": ""
},
"signature": "b544f144",
"delay": "32ms3us"
}
]#
# So far, logtrace needs only a single property of the block object.
# Feel free to add additional fields to be parsed as necessary.
blck: BlockShortLog
BlockShortLog = object
sync_committee_participants: int
SyncCommitteeMessageObject = object
slot: uint64
beaconBlockRoot {.serializedFieldName: "beacon_block_root".}: string
@ -216,6 +280,42 @@ type
contributionSends: seq[ContributionSentMessage]
contributionRecvs: TableRef[string, ContributionReceivedMessage]
template noIssues: FileReport =
FileReport()
template hasIssues(issuesCategories: varargs[IssuesGroup]): FileReport =
FileReport(categories: @issuesCategories)
proc copyEntriesTo(src: FileReport, dst: var FileReport) =
for c in src.categories:
dst.categories.add c
func isEmpty(r: FileReport): bool =
r.categories.len == 0
proc printCategory(severityLevel: string, issues: openArray[string]) =
if issues.len > 0:
echo ""
echo severityLevel, ":"
for issue in issues:
echo "* ", issue
proc print(r: FileReport) =
for category in r.categories:
echo "### ", category.name
printCategory "Fatal Issues", category.fatalIssues
printCategory "Warnings", category.warnings
echo ""
template fatal(issuesGroup: IssuesGroup, msg: string) =
issuesGroup.fatalIssues.add msg
template warning(issuesGroup: IssuesGroup, msg: string) =
issuesGroup.warnings.add msg
proc new(T: type IssuesGroup, name: string): T =
T(name: name)
proc readValue(reader: var JsonReader, value: var DateTime) =
let s = reader.readValue(string)
try:
@ -455,6 +555,93 @@ proc readLogFileForSCMSRMessages(file: string, srnode: var SRSCNode,
finally:
stream.close()
proc processFile(tracer: LogTracer, file: string): FileReport =
var stream = newFileStream(file)
let visitors = mapIt(tracer.enabledVisitors, it())
try:
while not (stream.atEnd()):
let line = stream.readLine()
var reader = JsonReader[DefaultFlavor].init(memoryInput(line))
for fieldName in reader.readObjectFields:
if fieldName == "msg":
let msg = reader.readValue(string)
for visitor in visitors:
visitor.visitLine(msg, line)
break
else:
reader.skipSingleJsValue()
except CatchableError as exc:
warn "Error reading data from file", file = file, errorMsg = exc.msg
finally:
stream.close()
for vistor in visitors:
let report = vistor.produceReport()
if not report.isEmpty:
report.copyEntriesTo result
proc failedValidationsChecker: LogVisitorFactory =
return proc (): LogVisitor =
var failedValidations = initCountTable[string]()
LogVisitor(
visitLine: proc (msg, line: string) =
if msg.endsWith("failed validation"):
failedValidations.inc msg
,
produceReport: proc (): FileReport =
if failedValidations.len > 0:
let issues = IssuesGroup.new "Failed Validations"
for msg, count in failedValidations:
issues.fatal(msg & " " & $count & " times")
return hasIssues(issues)
else:
return noIssues()
)
proc syncAggregateChecker(constPreset: string): LogVisitorFactory =
return proc (): LogVisitor =
var totalBlocks = 0
var minSyncAggregate = 512
var syncAggregatesCombinedSize = 0
let minExpectedAggregateSize = if constPreset == "mainnet":
450
else:
20
LogVisitor(
visitLine: proc (msgLabel, line: string) =
if msgLabel == "Block sent":
let msg = try:
Json.decode(line, BlockSentMessage, allowUnknownFields = true)
except SerializationError as err:
echo "Failure to parse a 'Block sent' message:"
echo err.formatMsg("<msg>")
quit 1
let syncAggregateSize = msg.blck.sync_committee_participants
if syncAggregateSize != -1:
inc totalBlocks
syncAggregatesCombinedSize += syncAggregateSize
if minSyncAggregate > syncAggregateSize:
minSyncAggregate = syncAggregateSize
,
produceReport: proc (): FileReport =
let avgSyncAggregateSize = syncAggregatesCombinedSize div totalBlocks
if avgSyncAggregateSize < minExpectedAggregateSize:
let issues = IssuesGroup.new "SyncAggregate Stats"
issues.fatal("Minimal sync aggregate size: " & $minSyncAggregate)
issues.fatal("Average sync aggregate size: " & $avgSyncAggregateSize)
return hasIssues(issues)
else:
return noIssues()
)
proc readLogFileForSecondMessages(file: string, ignoreErrors = true,
dumpErrors = false): seq[LogMessage] =
var stream = newFileStream(file)
@ -960,6 +1147,33 @@ proc run(conf: LogTraceConf) =
runLatencyCheck(conf, logFiles, logNodes)
runPubsub(conf, logFiles)
runAttSend(conf, logFiles)
of StartUpCommand.localSimChecks:
runAggAttSendReceive(conf, logNodes)
# TODO All analysis types can be converted to the more efficient
# LogVisitor style, so they can enabled together here.
# See the discussion below for some potential caveats:
# https://github.com/status-im/nimbus-eth2/pull/3583#pullrequestreview-941934055
var tracer = LogTracer()
tracer.enabledVisitors.add failedValidationsChecker()
tracer.enabledVisitors.add syncAggregateChecker(conf.constPreset)
var issuesDetected = false
for node in logNodes:
for logFile in node.logs:
let report = tracer.processFile(node.path / logFile)
if not report.isEmpty:
if not issuesDetected:
issuesDetected = true
echo "# Logtrace Report"
echo ""
echo "## ", logFile
echo ""
print report
quit ord(issuesDetected)
when isMainModule:
echo LogTraceHeader

View File

@ -811,7 +811,7 @@ dump_logs() {
dump_logtrace() {
if [[ "$ENABLE_LOGTRACE" == "1" ]]; then
find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace aggasr --log-dir="${DATA_DIR}" || true
find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace localSimChecks --log-dir="${DATA_DIR}" --const-preset=${CONST_PRESET} || true
fi
}

@ -1 +1 @@
Subproject commit e5b18fb710c3d0167ec79f3b892f5a7a1bc6d1a4
Subproject commit b42cd3c0d9c433079f3c075365440392e4278b1b