REST server metrics and connections API endpoints. (#5193)

* Add new REST endpoints to monitor REST server connections and new chronos metrics.

* Bump head versions of chronos and presto.

* Bump chronos with regression fix.

* Remove outdated tests which was supposed to test pipeline mode.

* Disable pipeline mode in resttest.

* Update copyright year.

* Upgrade test_signing_node to start use AsyncProcess instead of std library's osproc.
Bump chronos to check graceful shutdown.

* Update AllTests.

* Bump chronos.
This commit is contained in:
Eugene Kabanov 2023-07-21 17:54:09 +03:00 committed by GitHub
parent bf400d37ab
commit df80ae68fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1027 additions and 1048 deletions

View File

@ -382,8 +382,6 @@ OK: 4/4 Fail: 0/4 Skip: 0/4
## Nimbus remote signer/signing test (web3signer)
```diff
+ Connection timeout test OK
+ Connections pool stress test OK
+ Idle connection test OK
+ Public keys enumeration (/api/v1/eth2/publicKeys) test OK
+ Public keys reload (/reload) test OK
+ Signing BeaconBlock (getBlockSignature(bellatrix)) OK
@ -401,7 +399,7 @@ OK: 4/4 Fail: 0/4 Skip: 0/4
+ Signing voluntary exit (getValidatorExitSignature()) OK
+ Waiting for signing node (/upcheck) test OK
```
OK: 19/19 Fail: 0/19 Skip: 0/19
OK: 17/17 Fail: 0/17 Skip: 0/17
## Old database versions [Preset: mainnet]
```diff
+ pre-1.1.0 OK
@ -700,4 +698,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9
---TOTAL---
OK: 393/398 Fail: 0/398 Skip: 5/398
OK: 391/396 Fail: 0/396 Skip: 5/396

View File

@ -7,8 +7,9 @@
import
std/[sequtils],
stew/results,
stew/[results, base10],
chronicles,
chronos/apps/http/httpdebug,
libp2p/[multiaddress, multicodec, peerstore],
libp2p/protocols/pubsub/pubsubpeer,
./rest_utils,
@ -61,10 +62,22 @@ type
http_client_connections*: uint64
http_client_requests*: uint64
http_client_responses*: uint64
http_server_connections*: uint64
http_server_secure_connections*: uint64
http_server_unsecure_connections*: uint64
http_server_requests*: uint64
http_server_responses*: uint64
http_body_readers*: uint64
http_body_writers*: uint64
RestConnectionInfo* = object
handle*: string
connection_type*: string
connection_state*: string
remote_address*: string
local_address*: string
since_accept*: string
since_create*: string
RestPubSubPeer* = object
peerId*: PeerId
score*: float64
@ -279,27 +292,74 @@ proc installNimbusApiHandlers*(router: var RestRouter, node: BeaconNode) =
router.api(MethodGet, "/nimbus/v1/debug/chronos/metrics") do (
) -> RestApiResponse:
template getCount(ttype: untyped, name: string): uint64 =
let res = ttype(getTracker(name))
if res.isNil(): 0'u64 else: uint64(res.opened - res.closed)
template getCount(name: string): uint64 =
let res = getTrackerCounter(name)
uint64(res.opened - res.closed)
let res = RestChronosMetricsInfo(
tcp_transports: getCount(StreamTransportTracker, "stream.transport"),
udp_transports: getCount(DgramTransportTracker, "datagram.transport"),
tcp_servers: getCount(StreamServerTracker, "stream.server"),
stream_readers: getCount(AsyncStreamTracker,
AsyncStreamReaderTrackerName),
stream_writers: getCount(AsyncStreamTracker,
AsyncStreamWriterTrackerName),
http_client_connections: getCount(HttpClientTracker,
HttpClientConnectionTrackerName),
http_client_requests: getCount(HttpClientTracker,
HttpClientRequestTrackerName),
http_client_responses: getCount(HttpClientTracker,
HttpClientResponseTrackerName),
http_server_connections: lenu64(node.restServer.server.connections),
http_body_readers: getCount(HttpBodyTracker, HttpBodyReaderTrackerName),
http_body_writers: getCount(HttpBodyTracker, HttpBodyWriterTrackerName)
tcp_transports: getCount(StreamTransportTrackerName),
udp_transports: getCount(DgramTransportTrackerName),
tcp_servers: getCount(StreamServerTrackerName),
stream_readers: getCount(AsyncStreamReaderTrackerName),
stream_writers: getCount(AsyncStreamWriterTrackerName),
http_client_connections: getCount(HttpClientConnectionTrackerName),
http_client_requests: getCount(HttpClientRequestTrackerName),
http_client_responses: getCount(HttpClientResponseTrackerName),
http_server_secure_connections:
getCount(HttpServerSecureConnectionTrackerName),
http_server_unsecure_connections:
getCount(HttpServerUnsecureConnectionTrackerName),
http_server_requests: getCount(HttpServerRequestTrackerName),
http_server_responses: getCount(HttpServerResponseTrackerName),
http_body_readers: getCount(HttpBodyReaderTrackerName),
http_body_writers: getCount(HttpBodyWriterTrackerName)
)
return RestApiResponse.jsonResponse(res)
router.api(MethodGet, "/nimbus/v1/debug/chronos/restserver/connections") do (
) -> RestApiResponse:
var res: seq[RestConnectionInfo]
for connection in node.restServer.server.getConnections():
let
connectionState =
case connection.connectionState
of httpdebug.ConnectionState.Accepted: "accepted"
of httpdebug.ConnectionState.Alive: "alive"
of httpdebug.ConnectionState.Closing: "closing"
of httpdebug.ConnectionState.Closed: "closed"
connectionType =
case connection.connectionType
of ConnectionType.Secure: "secure"
of ConnectionType.NonSecure: "non-secure"
localAddress =
if connection.localAddress.isNone():
"not available"
else:
$connection.localAddress.get()
remoteAddress =
if connection.remoteAddress.isNone():
"not available"
else:
$connection.remoteAddress.get()
handle = Base10.toString(uint64(connection.handle))
moment = Moment.now()
sinceAccept = $(moment - connection.acceptMoment)
sinceCreate =
if connection.createMoment.isSome():
$(moment - connection.createMoment.get())
else:
"not available"
res.add(
RestConnectionInfo(
handle: handle,
connection_state: connectionState,
connection_type: connectionType,
local_address: localAddress,
remote_address: remoteAddress,
since_accept: sinceAccept,
since_create: sinceCreate
)
)
return RestApiResponse.jsonResponse(res)

View File

@ -1,5 +1,5 @@
# beacon_chain
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Copyright (c) 2021-2023 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
@ -993,7 +993,6 @@ proc workerLoop(address: TransportAddress, uri: Uri, worker: int,
let testRes = await runTest(conn, uri, test.rule, worker, test.index)
let caseRes = TestCaseResult(index: test.index, data: testRes)
await outputQueue.addLast(caseRes)
if ResetConnection in testRes.flags:
await conn.closeWait()
conn = nil
index = 0

View File

@ -6,8 +6,9 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[osproc, algorithm],
std/algorithm,
presto, unittest2, chronicles, stew/[results, byteutils, io2],
chronos/asyncproc,
chronos/unittest2/asynctests,
../beacon_chain/spec/[signatures, crypto],
../beacon_chain/spec/eth2_apis/rest_remote_signer_calls,
@ -71,6 +72,11 @@ const
SigningRequestTimeoutSeconds = 1
type
SigningProcess = object
process: AsyncProcessRef
reader: Future[seq[byte]]
proc getNodePort(basePort: int, rt: RemoteSignerType): int =
# Individual port numbers derived by adding to configurable base port
case rt
@ -79,7 +85,6 @@ proc getNodePort(basePort: int, rt: RemoteSignerType): int =
of RemoteSignerType.VerifyingWeb3Signer:
basePort + 1
func init(T: type ForkedBeaconBlock, contents: ProduceBlockResponseV2): T =
case contents.kind
of ConsensusFork.Phase0:
@ -278,15 +283,14 @@ proc getRemoteKeystoreData(data: string, basePort: int,
remotes: @[info])
proc spawnSigningNodeProcess(
basePort: int, rt: RemoteSignerType): Result[Process, string] =
let process =
try:
basePort: int, rt: RemoteSignerType): Future[SigningProcess] {.async.} =
let arguments =
case rt
of RemoteSignerType.Web3Signer:
@[
"--non-interactive=true",
"--data-dir=" & getTestDir(rt) & DirSep & "signing-node",
"--log-level=DEBUG",
"--data-dir=" & getTestDir(rt) & "/signing-node",
"--bind-address=" & SigningNodeAddress,
"--bind-port=" & $getNodePort(basePort, rt),
"--request-timeout=" & $SigningRequestTimeoutSeconds
@ -295,24 +299,34 @@ proc spawnSigningNodeProcess(
of RemoteSignerType.VerifyingWeb3Signer:
@[
"--non-interactive=true",
"--data-dir=" & getTestDir(rt) & DirSep & "signing-node",
"--log-level=DEBUG",
"--data-dir=" & getTestDir(rt) & "/signing-node",
"--bind-address=" & SigningNodeAddress,
"--bind-port=" & $getNodePort(basePort, rt),
"--expected-fee-recipient=" & $SigningExpectedFeeRecipient,
"--request-timeout=" & $SigningRequestTimeoutSeconds
# we make so low `timeout` to test connection pool.
]
osproc.startProcess("build/nimbus_signing_node", "", arguments, options = {poParentStreams})
except CatchableError as exc:
echo "Error while spawning `nimbus_signing_node` process [", $exc.name,
"] " & $exc.msg
return err($exc.msg)
ok(process)
proc shutdownSigningNodeProcess(process: Process) =
if process.peekExitCode() == -1:
process.kill()
discard process.waitForExit()
let res =
await startProcess("build/nimbus_signing_node",
arguments = arguments,
options = {AsyncProcessOption.StdErrToStdOut},
stdoutHandle = AsyncProcess.Pipe)
SigningProcess(
process: res, reader: res.stdoutStream.read()
)
proc shutdownSigningNodeProcess(sp: SigningProcess) {.async.} =
if sp.process.running().get(true):
sp.process.kill()
discard await sp.process.waitForExit()
await allFutures(sp.reader)
let data = sp.reader.read()
echo ""
echo "===== nimbus_signing_node log ====="
echo bytesToString(data)
let
basePortStr =
@ -328,15 +342,13 @@ let
fatal "Invalid base port arg", basePort = basePortStr, exc = exc.msg
quit 1
suite "Nimbus remote signer/signing test (web3signer)":
block:
let res = createTestDir(RemoteSignerType.Web3Signer)
doAssert(res.isOk())
let process = waitFor(spawnSigningNodeProcess(
basePort, RemoteSignerType.Web3Signer))
let pres = spawnSigningNodeProcess(
basePort, RemoteSignerType.Web3Signer)
doAssert(pres.isOk())
let process = pres.get()
suite "Nimbus remote signer/signing test (web3signer)":
setup:
let pool1 = newClone(default(ValidatorPool))
let
@ -863,94 +875,6 @@ suite "Nimbus remote signer/signing test (web3signer)":
sres2.get() == rres2.get()
sres3.get() == rres3.get()
asyncTest "Connections pool stress test":
const TestsCount = 100
var
futures1: seq[Future[SignatureResult]]
futures2: seq[Future[SignatureResult]]
for i in 0 ..< TestsCount:
futures1.add(validator4.getEpochSignature(SigningFork,
GenesisValidatorsRoot, Epoch(i)))
await allFutures(futures1)
for fut in futures1:
check fut.completed()
await sleepAsync(seconds(SigningRequestTimeoutSeconds) + 100.milliseconds)
for i in 0 ..< TestsCount:
futures2.add(validator4.getEpochSignature(SigningFork,
GenesisValidatorsRoot, Epoch(i)))
await allFutures(futures2)
for fut in futures2:
check fut.completed()
for i in 0 ..< TestsCount:
let
sres1 = futures1[i].read()
sres2 = futures2[i].read()
check:
sres1.isOk()
sres2.isOk()
sres1.get() == sres2.get()
asyncTest "Idle connection test":
let
remoteUrl = "http://" & SigningNodeAddress & ":" &
$getNodePort(basePort, RemoteSignerType.Web3Signer)
prestoFlags = {RestClientFlag.CommaSeparatedArray}
rclient =
RestClientRef.new(remoteUrl, prestoFlags, {},
idleTimeout = 1.seconds,
idlePeriod = 250.milliseconds)
check rclient.isOk()
let client = rclient.get()
try:
block:
let response = await client.getKeys()
check:
response.status == 200
len(response.data) == 3
let
received = sorted([
"0x" & response.data[0].toHex(),
"0x" & response.data[1].toHex(),
"0x" & response.data[2].toHex()
])
expected = sorted([
ValidatorPubKey1,
ValidatorPubKey2,
ValidatorPubKey3
])
check received == expected
await sleepAsync(seconds(SigningRequestTimeoutSeconds) + 100.milliseconds)
block:
let response = await client.getKeys()
check:
response.status == 200
len(response.data) == 3
let
received = sorted([
"0x" & response.data[0].toHex(),
"0x" & response.data[1].toHex(),
"0x" & response.data[2].toHex()
])
expected = sorted([
ValidatorPubKey1,
ValidatorPubKey2,
ValidatorPubKey3
])
check received == expected
finally:
await client.closeWait()
asyncTest "Connection timeout test":
let
request = Web3SignerRequest.init(SigningFork, GenesisValidatorsRoot,
@ -1004,18 +928,16 @@ suite "Nimbus remote signer/signing test (web3signer)":
finally:
await client.closeWait()
shutdownSigningNodeProcess(process)
waitFor(shutdownSigningNodeProcess(process))
removeTestDir(RemoteSignerType.Web3Signer)
suite "Nimbus remote signer/signing test (verifying-web3signer)":
block:
let res = createTestDir(RemoteSignerType.VerifyingWeb3Signer)
doAssert(res.isOk())
let process = waitFor(spawnSigningNodeProcess(
basePort, RemoteSignerType.VerifyingWeb3Signer))
let pres = spawnSigningNodeProcess(
basePort, RemoteSignerType.VerifyingWeb3Signer)
doAssert(pres.isOk())
let process = pres.get()
suite "Nimbus remote signer/signing test (verifying-web3signer)":
setup:
let pool1 = newClone(default(ValidatorPool))
let
@ -1352,5 +1274,5 @@ suite "Nimbus remote signer/signing test (verifying-web3signer)":
finally:
await client.closeWait()
shutdownSigningNodeProcess(process)
waitFor(shutdownSigningNodeProcess(process))
removeTestDir(RemoteSignerType.VerifyingWeb3Signer)

2
vendor/nim-chronos vendored

@ -1 +1 @@
Subproject commit 0035f4fa6692e85756aa192b4df84c21d3cacacb
Subproject commit e04c042e8acfe0025c780de8a025aa4c4e042130

2
vendor/nim-presto vendored

@ -1 +1 @@
Subproject commit 35652ed19ccbbf042e95941bc2f8bab39e3f6030
Subproject commit 42552611cfe04ca7838243d0a0e88fb9a5e9701a