Bugfix: The VC was producing invalid sync committee contributions (#4343)

Since the sync committee duties are no longer updated on every slot
and previously the sync committee aggregators selection proofs were
generated during the duties update, this now resulted in the client
using stale selection proofs (they must be generated at each slot).

The fix consists of moving the selection proof generation logic in
a different function which is properly executed on each slot.

Other changes:

* The logtrace tool has been enhanced with a framework for adding
  new simpler log aggregation and analysis algorithms.
  The default CI testnet simulation will now ensure that the blocks
  in the network have reasonable sync committee participation.
This commit is contained in:
zah 2022-11-24 09:46:35 +02:00 committed by GitHub
parent 44cab3a4cc
commit d07113767d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 335 additions and 136 deletions

View File

@ -216,7 +216,7 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
# state, it's not possible that it will sit on the sync committee. # state, it's not possible that it will sit on the sync committee.
# Since this API must omit results for validators that don't have # Since this API must omit results for validators that don't have
# duties, we can simply ingnore this requested index. # duties, we can simply ingnore this requested index.
# (we won't bother to validate it agains a more recent state). # (we won't bother to validate it against a more recent state).
continue continue
let requestedValidatorPubkey = let requestedValidatorPubkey =
@ -709,15 +709,15 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
let res = slot.get() let res = slot.get()
if res.isErr(): if res.isErr():
return RestApiResponse.jsonError(Http400, InvalidSlotValueError, return RestApiResponse.jsonError(Http400, InvalidSlotValueError,
$res.error()) $res.error())
let rslot = res.get() let rslot = res.get()
if epoch(rslot) < node.dag.cfg.ALTAIR_FORK_EPOCH: if epoch(rslot) < node.dag.cfg.ALTAIR_FORK_EPOCH:
return RestApiResponse.jsonError(Http400, return RestApiResponse.jsonError(Http400,
SlotFromTheIncorrectForkError) SlotFromTheIncorrectForkError)
rslot rslot
if qslot <= node.dag.finalizedHead.slot: if qslot <= node.dag.finalizedHead.slot:
return RestApiResponse.jsonError(Http400, InvalidSlotValueError, return RestApiResponse.jsonError(Http400, InvalidSlotValueError,
"Slot already finalized") "Slot already finalized")
let qindex = let qindex =
if subcommittee_index.isNone(): if subcommittee_index.isNone():
return RestApiResponse.jsonError(Http400, return RestApiResponse.jsonError(Http400,
@ -726,8 +726,8 @@ proc installValidatorApiHandlers*(router: var RestRouter, node: BeaconNode) =
let res = subcommittee_index.get() let res = subcommittee_index.get()
if res.isErr(): if res.isErr():
return RestApiResponse.jsonError(Http400, return RestApiResponse.jsonError(Http400,
InvalidSubCommitteeIndexValueError, InvalidSubCommitteeIndexValueError,
$res.error()) $res.error())
res.get() res.get()
let qroot = let qroot =
if beacon_block_root.isNone(): if beacon_block_root.isNone():

View File

@ -88,11 +88,6 @@ type
validator_index*: ValidatorIndex validator_index*: ValidatorIndex
validator_sync_committee_index*: IndexInSyncCommittee validator_sync_committee_index*: IndexInSyncCommittee
SyncDutyAndProof* = object
epoch*: Epoch
data*: SyncCommitteeDuty
slotSig*: Option[ValidatorSig]
SyncCommitteeSubscriptionInfo* = object SyncCommitteeSubscriptionInfo* = object
validator_index*: ValidatorIndex validator_index*: ValidatorIndex
validator_sync_committee_indices*: seq[IndexInSyncCommittee] validator_sync_committee_indices*: seq[IndexInSyncCommittee]
@ -129,7 +124,7 @@ type
duties*: Table[Epoch, DutyAndProof] duties*: Table[Epoch, DutyAndProof]
EpochSyncDuties* = object EpochSyncDuties* = object
duties*: Table[Epoch, SyncDutyAndProof] duties*: Table[Epoch, SyncCommitteeDuty]
RestBeaconNodeStatus* {.pure.} = enum RestBeaconNodeStatus* {.pure.} = enum
Uninitalized, Offline, Incompatible, NotSynced, Online Uninitalized, Offline, Incompatible, NotSynced, Online
@ -197,8 +192,6 @@ type
const const
DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)) DefaultDutyAndProof* = DutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
DefaultSyncDutyAndProof* =
SyncDutyAndProof(epoch: Epoch(0xFFFF_FFFF_FFFF_FFFF'u64))
SlotDuration* = int64(SECONDS_PER_SLOT).seconds SlotDuration* = int64(SECONDS_PER_SLOT).seconds
OneThirdDuration* = int64(SECONDS_PER_SLOT).seconds div INTERVALS_PER_SLOT OneThirdDuration* = int64(SECONDS_PER_SLOT).seconds div INTERVALS_PER_SLOT
AllBeaconNodeRoles* = { AllBeaconNodeRoles* = {
@ -308,9 +301,6 @@ proc stop*(csr: ClientServiceRef) {.async.} =
proc isDefault*(dap: DutyAndProof): bool = proc isDefault*(dap: DutyAndProof): bool =
dap.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64) dap.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
proc isDefault*(sdap: SyncDutyAndProof): bool =
sdap.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
proc isDefault*(prd: ProposedData): bool = proc isDefault*(prd: ProposedData): bool =
prd.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64) prd.epoch == Epoch(0xFFFF_FFFF_FFFF_FFFF'u64)
@ -397,11 +387,6 @@ proc init*(t: typedesc[DutyAndProof], epoch: Epoch, dependentRoot: Eth2Digest,
DutyAndProof(epoch: epoch, dependentRoot: dependentRoot, data: duty, DutyAndProof(epoch: epoch, dependentRoot: dependentRoot, data: duty,
slotSig: slotSig) slotSig: slotSig)
proc init*(t: typedesc[SyncDutyAndProof], epoch: Epoch,
duty: SyncCommitteeDuty,
slotSig: Option[ValidatorSig]): SyncDutyAndProof =
SyncDutyAndProof(epoch: epoch, data: duty, slotSig: slotSig)
proc init*(t: typedesc[ProposedData], epoch: Epoch, dependentRoot: Eth2Digest, proc init*(t: typedesc[ProposedData], epoch: Epoch, dependentRoot: Eth2Digest,
data: openArray[ProposerTask]): ProposedData = data: openArray[ProposerTask]): ProposedData =
ProposedData(epoch: epoch, dependentRoot: dependentRoot, duties: @data) ProposedData(epoch: epoch, dependentRoot: dependentRoot, duties: @data)
@ -432,9 +417,9 @@ proc getAttesterDutiesForSlot*(vc: ValidatorClientRef,
res res
proc getSyncCommitteeDutiesForSlot*(vc: ValidatorClientRef, proc getSyncCommitteeDutiesForSlot*(vc: ValidatorClientRef,
slot: Slot): seq[SyncDutyAndProof] = slot: Slot): seq[SyncCommitteeDuty] =
## Returns all `SyncDutyAndProof` for the given `slot`. ## Returns all `SyncCommitteeDuty` for the given `slot`.
var res: seq[SyncDutyAndProof] var res: seq[SyncCommitteeDuty]
let epoch = slot.epoch() let epoch = slot.epoch()
for key, item in mpairs(vc.syncCommitteeDuties): for key, item in mpairs(vc.syncCommitteeDuties):
item.duties.withValue(epoch, duty): item.duties.withValue(epoch, duty):
@ -494,10 +479,10 @@ proc syncMembersSubscriptionInfoForEpoch*(
item.duties.withValue(epoch, epochDuties): item.duties.withValue(epoch, epochDuties):
if not initialized: if not initialized:
cur.validator_index = epochDuties.data.validator_index cur.validator_index = epochDuties.validator_index
initialized = true initialized = true
cur.validator_sync_committee_indices.add( cur.validator_sync_committee_indices.add(
epochDuties.data.validator_sync_committee_index) epochDuties.validator_sync_committee_index)
if initialized: if initialized:
res.add cur res.add cur

View File

@ -296,73 +296,31 @@ proc pollForSyncCommitteeDuties*(vc: ValidatorClientRef,
fork = vc.forkAtEpoch(epoch) fork = vc.forkAtEpoch(epoch)
genesisRoot = vc.beaconGenesis.genesis_validators_root
let addOrReplaceItems = let addOrReplaceItems =
block: block:
var alreadyWarned = false var alreadyWarned = false
var res: seq[tuple[epoch: Epoch, duty: SyncCommitteeDuty]] var res: seq[tuple[epoch: Epoch, duty: SyncCommitteeDuty]]
for duty in relevantDuties: for duty in relevantDuties:
let map = vc.syncCommitteeDuties.getOrDefault(duty.pubkey) var dutyFound = false
let epochDuty = map.duties.getOrDefault(epoch, DefaultSyncDutyAndProof)
if epochDuty.isDefault(): vc.syncCommitteeDuties.withValue(duty.pubkey, map):
info "Received new sync committee duty", duty, epoch map.duties.withValue(epoch, epochDuty):
res.add((epoch, duty)) if epochDuty[] != duty:
else: dutyFound = true
if epochDuty.data != duty:
if not(alreadyWarned): if dutyFound and not alreadyWarned:
info "Sync committee duties re-organization", duty, epoch info "Sync committee duties re-organization", duty, epoch
alreadyWarned = true alreadyWarned = true
res.add((epoch, duty))
res.add((epoch, duty))
res res
if len(addOrReplaceItems) > 0: if len(addOrReplaceItems) > 0:
var pendingRequests: seq[Future[SignatureResult]] for epoch, duty in items(addOrReplaceItems):
var validators: seq[AttachedValidator]
let sres = vc.getCurrentSlot()
if sres.isSome():
for item in addOrReplaceItems:
let validator = vc.attachedValidators[].getValidator(item.duty.pubkey)
let future = validator.getSyncCommitteeSelectionProof(
fork,
genesisRoot,
sres.get(),
getSubcommitteeIndex(item.duty.validator_sync_committee_index))
pendingRequests.add(future)
validators.add(validator)
try:
await allFutures(pendingRequests)
except CancelledError as exc:
var pendingCancel: seq[Future[void]]
for future in pendingRequests:
if not(future.finished()):
pendingCancel.add(future.cancelAndWait())
await allFutures(pendingCancel)
raise exc
for index, fut in pendingRequests:
let item = addOrReplaceItems[index]
let dap =
if fut.done():
let sigRes = fut.read()
if sigRes.isErr():
error "Unable to create slot signature using remote signer",
validator = shortLog(validators[index]),
error_msg = sigRes.error()
SyncDutyAndProof.init(item.epoch, item.duty,
none[ValidatorSig]())
else:
SyncDutyAndProof.init(item.epoch, item.duty,
some(sigRes.get()))
else:
SyncDutyAndProof.init(item.epoch, item.duty,
none[ValidatorSig]())
var validatorDuties = var validatorDuties =
vc.syncCommitteeDuties.getOrDefault(item.duty.pubkey) vc.syncCommitteeDuties.getOrDefault(duty.pubkey)
validatorDuties.duties[item.epoch] = dap validatorDuties.duties[epoch] = duty
vc.syncCommitteeDuties[item.duty.pubkey] = validatorDuties vc.syncCommitteeDuties[duty.pubkey] = validatorDuties
return len(addOrReplaceItems) return len(addOrReplaceItems)

View File

@ -27,16 +27,16 @@ type
proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef, proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
slot: Slot, beaconBlockRoot: Eth2Digest, slot: Slot, beaconBlockRoot: Eth2Digest,
duty: SyncDutyAndProof): Future[bool] {. duty: SyncCommitteeDuty): Future[bool] {.
async.} = async.} =
let let
vc = service.client vc = service.client
fork = vc.forkAtEpoch(slot.epoch) fork = vc.forkAtEpoch(slot.epoch)
genesisValidatorsRoot = vc.beaconGenesis.genesis_validators_root genesisValidatorsRoot = vc.beaconGenesis.genesis_validators_root
vindex = duty.data.validator_index vindex = duty.validator_index
subcommitteeIdx = getSubcommitteeIndex( subcommitteeIdx = getSubcommitteeIndex(
duty.data.validator_sync_committee_index) duty.validator_sync_committee_index)
validator = vc.getValidator(duty.data.pubkey).valueOr: return false validator = vc.getValidator(duty.pubkey).valueOr: return false
message = message =
block: block:
let res = await getSyncCommitteeMessage(validator, fork, let res = await getSyncCommitteeMessage(validator, fork,
@ -93,16 +93,15 @@ proc serveSyncCommitteeMessage*(service: SyncCommitteeServiceRef,
proc produceAndPublishSyncCommitteeMessages(service: SyncCommitteeServiceRef, proc produceAndPublishSyncCommitteeMessages(service: SyncCommitteeServiceRef,
slot: Slot, slot: Slot,
beaconBlockRoot: Eth2Digest, beaconBlockRoot: Eth2Digest,
duties: seq[SyncDutyAndProof]) {. duties: seq[SyncCommitteeDuty])
async.} = {.async.} =
let vc = service.client let vc = service.client
let pendingSyncCommitteeMessages = let pendingSyncCommitteeMessages =
block: block:
var res: seq[Future[bool]] var res: seq[Future[bool]]
for duty in duties: for duty in duties:
debug "Serving sync message duty", duty = duty.data, debug "Serving sync message duty", duty, epoch = slot.epoch()
epoch = slot.epoch()
res.add(service.serveSyncCommitteeMessage(slot, res.add(service.serveSyncCommitteeMessage(slot,
beaconBlockRoot, beaconBlockRoot,
duty)) duty))
@ -203,38 +202,81 @@ proc serveContributionAndProof*(service: SyncCommitteeServiceRef,
proc produceAndPublishContributions(service: SyncCommitteeServiceRef, proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
slot: Slot, slot: Slot,
beaconBlockRoot: Eth2Digest, beaconBlockRoot: Eth2Digest,
duties: seq[SyncDutyAndProof]) {.async.} = duties: seq[SyncCommitteeDuty]) {.async.} =
let let
vc = service.client vc = service.client
contributionItems = epoch = slot.epoch
block: fork = vc.forkAtEpoch(epoch)
var res: seq[ContributionItem]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.data.pubkey)
if not isNil(validator):
if duty.slotSig.isSome:
template slotSignature: auto = duty.slotSig.get
if is_sync_committee_aggregator(slotSignature):
res.add(ContributionItem(
aggregator_index: uint64(duty.data.validator_index),
selection_proof: slotSignature,
validator: validator,
subcommitteeIdx: getSubcommitteeIndex(
duty.data.validator_sync_committee_index)
))
res
if len(contributionItems) > 0: var slotSignatureReqs: seq[Future[SignatureResult]]
var validators: seq[(AttachedValidator, SyncSubcommitteeIndex)]
for duty in duties:
let validator = vc.attachedValidators[].getValidator(duty.pubkey)
if isNil(validator): continue # This should never happen
let
subCommitteeIdx =
getSubcommitteeIndex(duty.validator_sync_committee_index)
future = validator.getSyncCommitteeSelectionProof(
fork,
vc.beaconGenesis.genesis_validators_root,
slot,
subCommitteeIdx)
slotSignatureReqs.add(future)
validators.add((validator, subCommitteeIdx))
try:
await allFutures(slotSignatureReqs)
except CancelledError as exc:
var pendingCancel: seq[Future[void]]
for future in slotSignatureReqs:
if not(future.finished()):
pendingCancel.add(future.cancelAndWait())
await allFutures(pendingCancel)
raise exc
var
contributionsFuts: array[SYNC_COMMITTEE_SUBNET_COUNT,
Future[SyncCommitteeContribution]]
let validatorContributions = block:
var res: seq[ContributionItem]
for idx, fut in slotSignatureReqs:
if fut.done:
let
sigRes = fut.read
validator = validators[idx][0]
subCommitteeIdx = validators[idx][1]
if sigRes.isErr:
error "Unable to create slot signature using remote signer",
validator = shortLog(validator),
error_msg = sigRes.error()
elif validator.index.isSome and
is_sync_committee_aggregator(sigRes.get):
res.add ContributionItem(
aggregator_index: uint64(validator.index.get),
selection_proof: sigRes.get,
validator: validator,
subcommitteeIdx: subCommitteeIdx)
if isNil(contributionsFuts[subCommitteeIdx]):
contributionsFuts[int subCommitteeIdx] =
vc.produceSyncCommitteeContribution(
slot,
subCommitteeIdx,
beaconBlockRoot,
ApiStrategyKind.Best)
res
if len(validatorContributions) > 0:
let pendingAggregates = let pendingAggregates =
block: block:
var res: seq[Future[bool]] var res: seq[Future[bool]]
for item in contributionItems: for item in validatorContributions:
let aggContribution = let aggContribution =
try: try:
await vc.produceSyncCommitteeContribution(slot, await contributionsFuts[item.subcommitteeIdx]
item.subcommitteeIdx,
beaconBlockRoot,
ApiStrategyKind.Best)
except ValidatorApiError: except ValidatorApiError:
error "Unable to get sync message contribution data", slot = slot, error "Unable to get sync message contribution data", slot = slot,
beaconBlockRoot = shortLog(beaconBlockRoot) beaconBlockRoot = shortLog(beaconBlockRoot)
@ -290,8 +332,8 @@ proc produceAndPublishContributions(service: SyncCommitteeServiceRef,
proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef, proc publishSyncMessagesAndContributions(service: SyncCommitteeServiceRef,
slot: Slot, slot: Slot,
duties: seq[SyncDutyAndProof]) {. duties: seq[SyncCommitteeDuty])
async.} = {.async.} =
let let
vc = service.client vc = service.client
startTime = Moment.now() startTime = Moment.now()

View File

@ -23,47 +23,52 @@ const
type type
StartUpCommand {.pure.} = enum StartUpCommand {.pure.} = enum
pubsub, asl, asr, aggasr, scmsr, csr, lat, traceAll pubsub, asl, asr, aggasr, scmsr, csr, lat, traceAll, localSimChecks
LogTraceConf = object LogTraceConf = object
logFiles {. logFiles {.
desc: "Specifies one or more log files", desc: "Specifies one or more log files",
abbr: "f", abbr: "f",
name: "log-file" }: seq[string] name: "log-file" .}: seq[string]
simDir {. simDir {.
desc: "Specifies path to eth2_network_simulation directory", desc: "Specifies path to eth2_network_simulation directory",
name: "sim-dir", defaultValue: "",
defaultValue: "" }: string name: "sim-dir" .}: string
netDir {. netDir {.
desc: "Specifies path to network build directory", desc: "Specifies path to network build directory",
name: "net-dir", defaultValue: "",
defaultValue: "" }: string name: "net-dir" .}: string
logDir {. logDir {.
desc: "Specifies path with bunch of logs", desc: "Specifies path with bunch of logs",
name: "log-dir", defaultValue: "",
defaultValue: "" }: string name: "log-dir" .}: string
ignoreSerializationErrors {. ignoreSerializationErrors {.
desc: "Ignore serialization errors while parsing log files", desc: "Ignore serialization errors while parsing log files",
name: "ignore-errors", defaultValue: true,
defaultValue: true }: bool name: "ignore-errors" .}: bool
dumpSerializationErrors {. dumpSerializationErrors {.
desc: "Dump full serialization errors while parsing log files", desc: "Dump full serialization errors while parsing log files",
name: "dump-errors", defaultValue: false ,
defaultValue: false }: bool name: "dump-errors" .}: bool
nodes {. nodes {.
desc: "Specifies node names which logs will be used", desc: "Specifies node names which logs will be used",
name: "nodes" }: seq[string] name: "nodes" .}: seq[string]
allowedLag {. allowedLag {.
desc: "Allowed latency lag multiplier", desc: "Allowed latency lag multiplier",
name: "lag", defaultValue: 2.0,
defaultValue: 2.0 }: float name: "lag" .}: float
constPreset {.
desc: "The const preset being used"
defaultValue: "mainnet"
name: "const-preset" .}: string
case cmd {.command.}: StartUpCommand case cmd {.command.}: StartUpCommand
of pubsub: of pubsub:
@ -82,6 +87,25 @@ type
discard discard
of traceAll: of traceAll:
discard discard
of localSimChecks:
discard
IssuesGroup = ref object
name: string
fatalIssues: seq[string]
warnings: seq[string]
FileReport = object
categories: seq[IssuesGroup]
LogVisitor = object
visitLine: proc(msg, fullLine: string) {.gcsafe, raises: [Defect].}
produceReport: proc(): FileReport {.gcsafe, raises: [Defect].}
LogVisitorFactory = proc(): LogVisitor {.gcsafe, raises: [Defect].}
LogTracer = object
enabledVisitors: seq[LogVisitorFactory]
GossipDirection = enum GossipDirection = enum
None, Incoming, Outgoing None, Incoming, Outgoing
@ -142,6 +166,46 @@ type
wallSlot: uint64 wallSlot: uint64
signature: string signature: string
BlockSentMessage = object
# The message structure is as follows:
#[
{
"lvl": "NOT",
"ts": "2022-11-21 23:02:37.032+02:00",
"msg": "Block sent",
"topics": "beacval",
"blockRoot": "7a0836e4",
"blck": {
"slot": 15,
"proposer_index": 96,
"parent_root": "487372dc",
"state_root": "06699625",
"eth1data": {
"deposit_root": "6c3ff67871b79b7aecc7a125e7ec9ff857879a1c83e50513be113103acf8ca3f",
"deposit_count": 1024,
"block_hash": "4242424242424242424242424242424242424242424242424242424242424242"
},
"graffiti": "Nimbus/v22.10.1-eb6615-stateofus",
"proposer_slashings_len": 0,
"attester_slashings_len": 0,
"attestations_len": 4,
"deposits_len": 0,
"voluntary_exits_len": 0,
"sync_committee_participants": 32,
"block_number": 0,
"fee_recipient": ""
},
"signature": "b544f144",
"delay": "32ms3us"
}
]#
# So far, logtrace needs only a single property of the block object.
# Feel free to add additional fields to be parsed as necessary.
blck: BlockShortLog
BlockShortLog = object
sync_committee_participants: int
SyncCommitteeMessageObject = object SyncCommitteeMessageObject = object
slot: uint64 slot: uint64
beaconBlockRoot {.serializedFieldName: "beacon_block_root".}: string beaconBlockRoot {.serializedFieldName: "beacon_block_root".}: string
@ -216,6 +280,42 @@ type
contributionSends: seq[ContributionSentMessage] contributionSends: seq[ContributionSentMessage]
contributionRecvs: TableRef[string, ContributionReceivedMessage] contributionRecvs: TableRef[string, ContributionReceivedMessage]
template noIssues: FileReport =
FileReport()
template hasIssues(issuesCategories: varargs[IssuesGroup]): FileReport =
FileReport(categories: @issuesCategories)
proc copyEntriesTo(src: FileReport, dst: var FileReport) =
for c in src.categories:
dst.categories.add c
func isEmpty(r: FileReport): bool =
r.categories.len == 0
proc printCategory(severityLevel: string, issues: openArray[string]) =
if issues.len > 0:
echo ""
echo severityLevel, ":"
for issue in issues:
echo "* ", issue
proc print(r: FileReport) =
for category in r.categories:
echo "### ", category.name
printCategory "Fatal Issues", category.fatalIssues
printCategory "Warnings", category.warnings
echo ""
template fatal(issuesGroup: IssuesGroup, msg: string) =
issuesGroup.fatalIssues.add msg
template warning(issuesGroup: IssuesGroup, msg: string) =
issuesGroup.warnings.add msg
proc new(T: type IssuesGroup, name: string): T =
T(name: name)
proc readValue(reader: var JsonReader, value: var DateTime) = proc readValue(reader: var JsonReader, value: var DateTime) =
let s = reader.readValue(string) let s = reader.readValue(string)
try: try:
@ -455,6 +555,93 @@ proc readLogFileForSCMSRMessages(file: string, srnode: var SRSCNode,
finally: finally:
stream.close() stream.close()
proc processFile(tracer: LogTracer, file: string): FileReport =
var stream = newFileStream(file)
let visitors = mapIt(tracer.enabledVisitors, it())
try:
while not (stream.atEnd()):
let line = stream.readLine()
var reader = JsonReader[DefaultFlavor].init(memoryInput(line))
for fieldName in reader.readObjectFields:
if fieldName == "msg":
let msg = reader.readValue(string)
for visitor in visitors:
visitor.visitLine(msg, line)
break
else:
reader.skipSingleJsValue()
except CatchableError as exc:
warn "Error reading data from file", file = file, errorMsg = exc.msg
finally:
stream.close()
for vistor in visitors:
let report = vistor.produceReport()
if not report.isEmpty:
report.copyEntriesTo result
proc failedValidationsChecker: LogVisitorFactory =
return proc (): LogVisitor =
var failedValidations = initCountTable[string]()
LogVisitor(
visitLine: proc (msg, line: string) =
if msg.endsWith("failed validation"):
failedValidations.inc msg
,
produceReport: proc (): FileReport =
if failedValidations.len > 0:
let issues = IssuesGroup.new "Failed Validations"
for msg, count in failedValidations:
issues.fatal(msg & " " & $count & " times")
return hasIssues(issues)
else:
return noIssues()
)
proc syncAggregateChecker(constPreset: string): LogVisitorFactory =
return proc (): LogVisitor =
var totalBlocks = 0
var minSyncAggregate = 512
var syncAggregatesCombinedSize = 0
let minExpectedAggregateSize = if constPreset == "mainnet":
450
else:
20
LogVisitor(
visitLine: proc (msgLabel, line: string) =
if msgLabel == "Block sent":
let msg = try:
Json.decode(line, BlockSentMessage, allowUnknownFields = true)
except SerializationError as err:
echo "Failure to parse a 'Block sent' message:"
echo err.formatMsg("<msg>")
quit 1
let syncAggregateSize = msg.blck.sync_committee_participants
if syncAggregateSize != -1:
inc totalBlocks
syncAggregatesCombinedSize += syncAggregateSize
if minSyncAggregate > syncAggregateSize:
minSyncAggregate = syncAggregateSize
,
produceReport: proc (): FileReport =
let avgSyncAggregateSize = syncAggregatesCombinedSize div totalBlocks
if avgSyncAggregateSize < minExpectedAggregateSize:
let issues = IssuesGroup.new "SyncAggregate Stats"
issues.fatal("Minimal sync aggregate size: " & $minSyncAggregate)
issues.fatal("Average sync aggregate size: " & $avgSyncAggregateSize)
return hasIssues(issues)
else:
return noIssues()
)
proc readLogFileForSecondMessages(file: string, ignoreErrors = true, proc readLogFileForSecondMessages(file: string, ignoreErrors = true,
dumpErrors = false): seq[LogMessage] = dumpErrors = false): seq[LogMessage] =
var stream = newFileStream(file) var stream = newFileStream(file)
@ -960,6 +1147,33 @@ proc run(conf: LogTraceConf) =
runLatencyCheck(conf, logFiles, logNodes) runLatencyCheck(conf, logFiles, logNodes)
runPubsub(conf, logFiles) runPubsub(conf, logFiles)
runAttSend(conf, logFiles) runAttSend(conf, logFiles)
of StartUpCommand.localSimChecks:
runAggAttSendReceive(conf, logNodes)
# TODO All analysis types can be converted to the more efficient
# LogVisitor style, so they can enabled together here.
# See the discussion below for some potential caveats:
# https://github.com/status-im/nimbus-eth2/pull/3583#pullrequestreview-941934055
var tracer = LogTracer()
tracer.enabledVisitors.add failedValidationsChecker()
tracer.enabledVisitors.add syncAggregateChecker(conf.constPreset)
var issuesDetected = false
for node in logNodes:
for logFile in node.logs:
let report = tracer.processFile(node.path / logFile)
if not report.isEmpty:
if not issuesDetected:
issuesDetected = true
echo "# Logtrace Report"
echo ""
echo "## ", logFile
echo ""
print report
quit ord(issuesDetected)
when isMainModule: when isMainModule:
echo LogTraceHeader echo LogTraceHeader

View File

@ -811,7 +811,7 @@ dump_logs() {
dump_logtrace() { dump_logtrace() {
if [[ "$ENABLE_LOGTRACE" == "1" ]]; then if [[ "$ENABLE_LOGTRACE" == "1" ]]; then
find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace aggasr --log-dir="${DATA_DIR}" || true find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace localSimChecks --log-dir="${DATA_DIR}" --const-preset=${CONST_PRESET} || true
fi fi
} }

@ -1 +1 @@
Subproject commit e5b18fb710c3d0167ec79f3b892f5a7a1bc6d1a4 Subproject commit b42cd3c0d9c433079f3c075365440392e4278b1b