add aggregated attestation tracing to logtrace and enable it in Jenkins (#2766)

* add aggregated attestation tracing to logtrace and enable it in Jenkins CI

* use a slightly less cryptic acronym than aasr

* mostly, nimbus and the eth2 spec use aggregate attestation, not aggregated attestation
This commit is contained in:
tersec 2021-08-06 21:25:30 +00:00 committed by GitHub
parent 780a9cfd42
commit 61825f4979
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 167 additions and 98 deletions

4
Jenkinsfile vendored
View File

@ -57,12 +57,12 @@ def runStages() {
// EXECUTOR_NUMBER will be 0 or 1, since we have 2 executors per Jenkins node
sh """#!/bin/bash
set -e
./scripts/launch_local_testnet.sh --preset minimal --nodes 4 --stop-at-epoch 5 --log-level DEBUG --disable-htop \
./scripts/launch_local_testnet.sh --preset minimal --nodes 4 --stop-at-epoch 5 --log-level DEBUG --disable-htop --enable-logtrace \
--data-dir local_testnet0_data --base-port \$(( 9000 + EXECUTOR_NUMBER * 100 )) --base-rpc-port \
\$(( 7000 + EXECUTOR_NUMBER * 100 )) --base-metrics-port \$(( 8008 + EXECUTOR_NUMBER * 100 )) --timeout 600 \
--kill-old-processes \
-- --verify-finalization --discv5:no
./scripts/launch_local_testnet.sh --nodes 4 --stop-at-epoch 5 --log-level DEBUG --disable-htop \
./scripts/launch_local_testnet.sh --nodes 4 --stop-at-epoch 5 --log-level DEBUG --disable-htop --enable-logtrace \
--data-dir local_testnet1_data --base-port \$(( 9000 + EXECUTOR_NUMBER * 100 )) --base-rpc-port \
\$(( 7000 + EXECUTOR_NUMBER * 100 )) --base-metrics-port \$(( 8008 + EXECUTOR_NUMBER * 100 )) --timeout 2400 \
--kill-old-processes \

View File

@ -1,5 +1,5 @@
# beacon_chain
# Copyright (c) 2018-2020 Status Research & Development GmbH
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
@ -9,154 +9,159 @@ import confutils, json, times, streams, os, strutils, options, chronicles,
import json_serialization
const
LogTraceName* = "Beacon-Chain LogTrace Tool"
LogTraceMajor*: int = 0
LogTraceMinor*: int = 0
LogTracePatch*: int = 4
LogTraceVersion* = $LogTraceMajor & "." & $LogTraceMinor & "." &
LogTraceName = "Beacon-Chain LogTrace Tool"
LogTraceMajor: int = 0
LogTraceMinor: int = 0
LogTracePatch: int = 4
LogTraceVersion = $LogTraceMajor & "." & $LogTraceMinor & "." &
$LogTracePatch
LogTraceCopyright* = "Copyright(C) 2020" &
LogTraceCopyright = "Copyright(C) 2021" &
" Status Research & Development GmbH"
LogTraceHeader* = LogTraceName & ", Version " & LogTraceVersion &
LogTraceHeader = LogTraceName & ", Version " & LogTraceVersion &
" [" & hostOS & ": " & hostCPU & "]\r\n" &
LogTraceCopyright & "\r\n"
type
StartUpCommand* {.pure.} = enum
pubsub, asl, asr, lat
StartUpCommand {.pure.} = enum
pubsub, asl, asr, aggasr, lat
LogTraceConf* = object
logFiles* {.
LogTraceConf = object
logFiles {.
desc: "Specifies one or more log files",
abbr: "f",
name: "log-file" }: seq[string]
simDir* {.
simDir {.
desc: "Specifies path to eth2_network_simulation directory",
name: "sim-dir",
defaultValue: "" }: string
netDir* {.
netDir {.
desc: "Specifies path to network build directory",
name: "net-dir",
defaultValue: "" }: string
logDir* {.
logDir {.
desc: "Specifies path with bunch of logs",
name: "log-dir",
defaultValue: "" }: string
ignoreSerializationErrors* {.
ignoreSerializationErrors {.
desc: "Ignore serialization errors while parsing log files",
name: "ignore-errors",
defaultValue: true }: bool
dumpSerializationErrors* {.
dumpSerializationErrors {.
desc: "Dump full serialization errors while parsing log files",
name: "dump-errors",
defaultValue: false }: bool
nodes* {.
nodes {.
desc: "Specifies node names which logs will be used",
name: "nodes" }: seq[string]
allowedLag* {.
allowedLag {.
desc: "Allowed latency lag multiplier",
name: "lag",
defaultValue: 2.0 }: float
case cmd* {.command.}: StartUpCommand
case cmd {.command.}: StartUpCommand
of pubsub:
discard
of asl:
discard
of asr:
discard
of aggasr:
discard
of lat:
discard
GossipDirection* = enum
GossipDirection = enum
None, Incoming, Outgoing
NodeDirectory* = object
name*: string
path*: string
logs*: seq[string]
NodeDirectory = object
name: string
path: string
logs: seq[string]
LogMessage* = object of RootObj
level* {.serializedFieldName: "lvl" .}: string
timestamp* {.serializedFieldName: "ts" .}: DateTime
msg*: string
topics*: string
tid*: int
LogMessage = object of RootObj
level {.serializedFieldName: "lvl" .}: string
timestamp {.serializedFieldName: "ts" .}: DateTime
msg: string
topics: string
tid: int
SlotStartMessage* = object of LogMessage
beaconTime*: uint64
finalizedEpoch*: uint64
finalizedRoot*: string
finalizedSlot*: uint64
headEpoch*: uint64
headRoot*: string
headSlot*: uint64
lastSlot*: uint64
peers*: uint64
scheduledSlot*: uint64
SlotStartMessage = object of LogMessage
beaconTime: uint64
finalizedEpoch: uint64
finalizedRoot: string
finalizedSlot: uint64
headEpoch: uint64
headRoot: string
headSlot: uint64
lastSlot: uint64
peers: uint64
scheduledSlot: uint64
AttestationDataObject* = object
slot*: uint64
index*: uint64
beaconBlockRoot* {.serializedFieldName: "beacon_block_root".}: string
sourceEpoch* {.serializedFieldName: "source_epoch".}: uint64
sourceRoot* {.serializedFieldName: "source_root".}: string
targetEpoch* {.serializedFieldName: "target_epoch".}: uint64
targetRoot* {.serializedFieldName: "target_root".}: string
AttestationDataObject = object
slot: uint64
index: uint64
beaconBlockRoot {.serializedFieldName: "beacon_block_root".}: string
sourceEpoch {.serializedFieldName: "source_epoch".}: uint64
sourceRoot {.serializedFieldName: "source_root".}: string
targetEpoch {.serializedFieldName: "target_epoch".}: uint64
targetRoot {.serializedFieldName: "target_root".}: string
AttestationObject* = object
aggregationBits* {.serializedFieldName: "aggregation_bits".}: string
data*: AttestationDataObject
signature*: string
AttestationObject = object
aggregationBits {.serializedFieldName: "aggregation_bits".}: string
data: AttestationDataObject
signature: string
AttestationSentMessage* = object of LogMessage
attestation*: AttestationObject
indexInCommittee*: uint64
validator*: string
AttestationSentMessage = object of LogMessage
attestation: AttestationObject
indexInCommittee: uint64
validator: string
AttestationReceivedMessage* = object of LogMessage
attestation*: AttestationObject
head*: string
wallSlot*: uint64
pcs*: string
AttestationReceivedMessage = object of LogMessage
attestation: AttestationObject
head: string
wallSlot: uint64
pcs: string
GossipMessage* = object
kind*: GossipDirection
id*: string
datetime*: DateTime
processed*: bool
AggregatedAttestationSentMessage = object of LogMessage
attestation: AttestationObject
validator: string
signature: string
aggregationSlot: uint64
SaMessageType* {.pure.} = enum
AggregatedAttestationReceivedMessage = object of LogMessage
aggregate: AttestationObject
wallSlot: uint64
signature: string
GossipMessage = object
kind: GossipDirection
id: string
datetime: DateTime
processed: bool
SaMessageType {.pure.} = enum
AttestationSent, SlotStart
SRMessageType* {.pure.} = enum
AttestationSent, AttestationReceived
SlotAttMessage* = object
case kind*: SaMessageType
SlotAttMessage = object
case kind: SaMessageType
of SaMessageType.AttestationSent:
asmsg*: AttestationSentMessage
asmsg: AttestationSentMessage
of SaMessageType.SlotStart:
ssmsg*: SlotStartMessage
ssmsg: SlotStartMessage
SRAttMessage* = object
case kind*: SRMessageType
of SRMessageType.AttestationSent:
asmsg*: AttestationSentMessage
of SRMessageType.AttestationReceived:
armsg*: AttestationReceivedMessage
SRANode* = object
directory*: NodeDirectory
sends*: seq[AttestationSentMessage]
recvs*: TableRef[string, AttestationReceivedMessage]
SRANode = object
directory: NodeDirectory
sends: seq[AttestationSentMessage]
recvs: TableRef[string, AttestationReceivedMessage]
aggSends: seq[AggregatedAttestationSentMessage]
aggRecvs: TableRef[string, AggregatedAttestationReceivedMessage]
proc readValue*(reader: var JsonReader, value: var DateTime) =
let s = reader.readValue(string)
@ -273,6 +278,14 @@ proc readLogFileForASRMessages(file: string, srnode: var SRANode,
let rm = Json.decode(line, AttestationReceivedMessage,
allowUnknownFields = true)
discard srnode.recvs.hasKeyOrPut(rm.attestation.signature, rm)
elif m.msg == "Aggregate received":
let rm = Json.decode(line, AggregatedAttestationReceivedMessage,
allowUnknownFields = true)
discard srnode.aggRecvs.hasKeyOrPut(rm.signature, rm)
elif m.msg == "Aggregated attestation sent":
let sm = Json.decode(line, AggregatedAttestationSentMessage,
allowUnknownFields = true)
srnode.aggSends.add(sm)
if counter mod 10_000 == 0:
info "Processing file", file = extractFilename(file),
@ -410,7 +423,7 @@ proc runPubsub(logConf: LogTraceConf, logFiles: seq[string]) =
var logs = newSeq[tuple[name: string, data: seq[GossipMessage]]]()
if len(logFiles) < 2:
error "Number of log files are not enough to process pubsub messages",
error "Number of log files insufficient to process pubsub messages",
logs_count = len(logFiles)
quit(1)
@ -450,7 +463,7 @@ proc runPubsub(logConf: LogTraceConf, logFiles: seq[string]) =
proc runAttSend(logConf: LogTraceConf, logFiles: seq[string]) =
info "Check for late `attestation sent` messages"
if len(logFiles) < 1:
error "Number of log files are not enough to process pubsub messages",
error "Number of log files insufficient to process pubsub messages",
logs_count = len(logFiles)
quit(1)
@ -491,9 +504,9 @@ proc toSimple*(s: seq[string]): string =
result = "[" & s.mapIt("'" & it & "'").join(", ") & "]"
proc runAttSendReceive(logConf: LogTraceConf, nodes: seq[NodeDirectory]) =
info "Check for attestations send/receive messages"
info "Check for attestation sent/received messages"
if len(nodes) < 2:
error "Number of nodes' log files are not enough", nodes_count = len(nodes)
error "Number of nodes' log files insufficient", nodes_count = len(nodes)
quit(1)
var srnodes = newSeq[SRANode]()
@ -501,7 +514,9 @@ proc runAttSendReceive(logConf: LogTraceConf, nodes: seq[NodeDirectory]) =
var srnode = SRANode(
directory: node,
sends: newSeq[AttestationSentMessage](),
recvs: newTable[string, AttestationReceivedMessage]()
recvs: newTable[string, AttestationReceivedMessage](),
aggSends: newSeq[AggregatedAttestationSentMessage](),
aggRecvs: newTable[string, AggregatedAttestationReceivedMessage]()
)
info "Processing node", node = node.name
for logfile in node.logs:
@ -513,7 +528,7 @@ proc runAttSendReceive(logConf: LogTraceConf, nodes: seq[NodeDirectory]) =
srnodes.add(srnode)
if len(nodes) < 2:
error "Number of nodes' log files are not enough", nodes_count = len(nodes)
error "Number of nodes' log files insufficient", nodes_count = len(nodes)
quit(1)
for i in 0 ..< len(srnodes):
@ -539,11 +554,63 @@ proc runAttSendReceive(logConf: LogTraceConf, nodes: seq[NodeDirectory]) =
successful_broadcasts = success, failed_broadcasts = failed,
total_broadcasts = len(srnodes[i].sends)
proc runAggAttSendReceive(logConf: LogTraceConf, nodes: seq[NodeDirectory]) =
info "Check for aggregate attestation sent/received messages"
if len(nodes) < 2:
error "Number of nodes' log files insufficient", nodes_count = len(nodes)
quit(1)
var srnodes = newSeq[SRANode]()
for node in nodes:
var srnode = SRANode(
directory: node,
sends: newSeq[AttestationSentMessage](),
recvs: newTable[string, AttestationReceivedMessage](),
aggSends: newSeq[AggregatedAttestationSentMessage](),
aggRecvs: newTable[string, AggregatedAttestationReceivedMessage]()
)
info "Processing node", node = node.name
for logfile in node.logs:
let path = node.path & DirSep & logfile
info "Processing node's logfile", node = node.name, logfile = path
readLogFileForASRMessages(path, srnode,
logConf.ignoreSerializationErrors,
logConf.dumpSerializationErrors)
srnodes.add(srnode)
if len(nodes) < 2:
error "Number of nodes' log files insufficient", nodes_count = len(nodes)
quit(1)
for i in 0 ..< len(srnodes):
var success = 0
var failed = 0
for item in srnodes[i].aggSends:
var k = (i + 1) mod len(srnodes)
var misses = newSeq[string]()
while k != i:
if item.signature notin srnodes[k].aggRecvs:
misses.add(srnodes[k].directory.name)
k = (k + 1) mod len(srnodes)
if len(misses) == 0:
inc(success)
else:
inc(failed)
info "Aggregate attestation was not received",
sender = srnodes[i].directory.name,
signature = item.signature,
receivers = misses.toSimple(), send_stamp = item.timestamp
info "Statistics for sender node", sender = srnodes[i].directory.name,
successful_broadcasts = success, failed_broadcasts = failed,
total_broadcasts = len(srnodes[i].aggSends)
proc runLatencyCheck(logConf: LogTraceConf, logFiles: seq[string],
nodes: seq[NodeDirectory]) =
info "Check for async responsiveness"
if len(nodes) == 0 and len(logFiles) == 0:
error "Number of log files are not enough", nodes_count = len(nodes)
error "Number of log files insufficient", nodes_count = len(nodes)
quit(1)
let allowedTime = int64(float(initDuration(seconds = 1).inMilliseconds()) *
@ -617,6 +684,8 @@ proc run(conf: LogTraceConf) =
runAttSend(conf, logFiles)
of StartUpCommand.asr:
runAttSendReceive(conf, logNodes)
of StartUpCommand.aggasr:
runAggAttSendReceive(conf, logNodes)
of StartUpCommand.lat:
runLatencyCheck(conf, logFiles, logNodes)

View File

@ -69,7 +69,7 @@ CI run: $(basename "$0") --disable-htop -- --verify-finalization
--base-metrics-port bootstrap node's metrics server port (default: ${BASE_METRICS_PORT})
--disable-htop don't use "htop" to see the nimbus_beacon_node processes
--disable-vc don't use validator client binaries for validators (by default validators are split 50/50 between beacon nodes and validator clients)
--enable-logtrace display logtrace asr analysis
--enable-logtrace display logtrace aggasr analysis
--log-level set the log level (default: ${LOG_LEVEL})
--reuse-existing-data-dir instead of deleting and recreating the data dir, keep it and reuse everything we can from it
--timeout timeout in seconds (default: ${TIMEOUT_DURATION} - no timeout)
@ -351,7 +351,7 @@ dump_logs() {
dump_logtrace() {
if [[ "$ENABLE_LOGTRACE" == "1" ]]; then
find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace asr --log-dir="${DATA_DIR}" || true
find "${DATA_DIR}" -maxdepth 1 -type f -regex '.*/log[0-9]+.txt' | sed -e"s/${DATA_DIR}\//--nodes=/" | sort | xargs ./build/logtrace aggasr --log-dir="${DATA_DIR}" || true
fi
}