nimbus-eth1/tests/test_clique.nim
Jordan Hrycaj ca07c40a48
Fearture/poa clique tuning (#765)
* Provide API

details:
  API is bundled via clique.nim.

* Set extraValidation as default for PoA chains

why:
  This triggers consensus verification and an update of the list
  of authorised signers. These signers are integral part of the
  PoA block chain.

todo:
  Option argument to control validation for the nimbus binary.

* Fix snapshot state block number

why:
  Using sub-sequence here, so the len() function was wrong.

* Optional start where block verification begins

why:
  Can speed up time building loading initial parts of block chain. For
  PoA, this allows to prove & test that authorised signers can be
  (correctly) calculated starting at any point on the block chain.

todo:
  On Goerli around blocks #193537..#197568, processing time increases
  disproportionally -- needs to be understand

* For Clique test, get old grouping back (7 transactions per log entry)

why:
  Forgot to change back after troubleshooting

* Fix field/function/module-name misunderstanding

why:
  Make compilation work

* Use eth_types.blockHash() rather than utils.hash() in Clique modules

why:
  Prefer lib module

* Dissolve snapshot_misc.nim

details:
  .. into clique_verify.nim (the other source file clique_unused.nim
  is inactive)

* Hide unused AsyncLock in Clique descriptor

details:
  Unused here but was part of the Go reference implementation

* Remove fakeDiff flag from Clique descriptor

details:
  This flag was a kludge in the Go reference implementation used for the
  canonical tests. The tests have been adapted so there is no need for
  the fakeDiff flag and its implementation.

* Not observing minimum distance from epoch sync point

why:
  For compiling PoA state, the go implementation will walk back to the
  epoch header with at least 90000 blocks apart from the current header
  in the absence of other synchronisation points.

  Here just the nearest epoch header is used. The assumption is that all
  the checkpoints before have been vetted already regardless of the
  current branch.

details:
  The behaviour of using the nearest vs the minimum distance epoch is
  controlled by a flag and can be changed at run time.

* Analysing processing time (patch adds some debugging/visualisation support)

why:
  At the first half million blocks of the Goerli replay, blocks on the
  interval #194854..#196224 take exceptionally long to process, but not
  due to PoA processing.

details:
  It turns out that much time is spent in p2p/excecutor.processBlock()
  where the elapsed transaction execution time is significantly greater
  for many of these blocks.

  Between the 1371 blocks #194854..#196224 there are 223 blocks with more
  than 1/2 seconds execution time whereas there are only 4 such blocks
  before and 13 such after this range up to #504192.

* fix debugging symbol in clique_desc (causes CI failing)

* Fixing canonical reference tests

why:
  Two errors were introduced earlier but ovelooked:
   1. "Remove fakeDiff flag .." patch was incomplete
   2. "Not observing minimum distance .." introduced problem w/tests 23/24

details:
  Fixing 2. needed to revert the behaviour by setting the
  applySnapsMinBacklog flag for the Clique descriptor. Also a new
  test was added to lock the new behaviour.

* Remove cruft

why:
  Clique/PoA processing was intended to take place somewhere in
  executor/process_block.processBlock() but was decided later to run
  from chain/persist_block.persistBlock() instead.

* Update API comment

* ditto
2021-07-30 15:06:51 +01:00

271 lines
9.8 KiB
Nim

# Nimbus
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
import
std/[algorithm, os, sequtils, strformat, strutils, times],
../nimbus/db/db_chain,
../nimbus/p2p/[chain, clique, clique/clique_snapshot],
./test_clique/[pool, undump],
eth/[common, keys],
stint,
unittest2
const
goerliCapture = "test_clique" / "goerli51840.txt.gz"
groupReplayTransactions = 7
# ------------------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------------------
proc getBlockHeader(ap: TesterPool; number: BlockNumber): BlockHeader =
## Shortcut => db/db_chain.getBlockHeader()
doAssert ap.db.getBlockHeader(number, result)
proc ppSecs(elapsed: Duration): string =
result = $elapsed.inSeconds
let ns = elapsed.inNanoseconds mod 1_000_000_000
if ns != 0:
# to rounded decimal seconds
let ds = (ns + 5_000_000i64) div 10_000_000i64
result &= &".{ds:02}"
result &= "s"
proc ppRow(elapsed: Duration): string =
let ms = elapsed.inMilliSeconds + 500
"x".repeat(ms div 1000)
# ------------------------------------------------------------------------------
# Test Runners
# ------------------------------------------------------------------------------
# clique/snapshot_test.go(99): func TestClique(t *testing.T) {
proc runCliqueSnapshot(noisy = true; postProcessOk = false;
testIds = {0 .. 999}; skipIds = {0}-{0}) =
## Clique PoA Snapshot
## ::
## Tests that Clique signer voting is evaluated correctly for various
## simple and complex scenarios, as well as that a few special corner
## cases fail correctly.
##
let postProcessInfo = if postProcessOk: ", Transaction Finaliser Applied"
else: ", Without Finaliser"
suite &"Clique PoA Snapshot{postProcessInfo}":
var
pool = newVoterPool()
pool.debug = noisy
# clique/snapshot_test.go(379): for i, tt := range tests {
for tt in voterSamples.filterIt(it.id in testIds):
test &"Snapshots {tt.id:2}: {tt.info.substr(0,50)}...":
pool.say "\n"
# Noisily skip this test
if tt.id in skipIds:
skip()
else:
# Assemble a chain of headers from the cast votes
# see clique/snapshot_test.go(407): config := *params.TestChainConfig
pool
.resetVoterChain(tt.signers, tt.epoch, tt.runBack)
# see clique/snapshot_test.go(425): for j, block := range blocks {
.appendVoter(tt.votes)
.commitVoterChain(postProcessOk)
# see clique/snapshot_test.go(477): if err != nil {
if tt.failure != cliqueNoError[0]:
# Note that clique/snapshot_test.go does not verify _here_ against
# the scheduled test error -- rather this voting error is supposed
# to happen earlier (processed at clique/snapshot_test.go(467)) when
# assembling the block chain (sounds counter intuitive to the author
# of this source file as the scheduled errors are _clique_ related).
check pool.failed[1][0] == tt.failure
else:
let
expected = tt.results.mapIt("@" & it).sorted
snapResult = pool.pp(pool.cliqueSigners).sorted
pool.say "*** snap state=", pool.snapshot.pp(16)
pool.say " result=[", snapResult.join(",") & "]"
pool.say " expected=[", expected.join(",") & "]"
# Verify the final list of signers against the expected ones
check snapResult == expected
proc runCliqueSnapshot(noisy = true; postProcessOk = false; testId: int) =
noisy.runCliqueSnapshot(postProcessOk, testIds = {testId})
proc runGoerliReplay(noisy = true; showElapsed = false,
dir = "tests"; captureFile = goerliCapture,
startAtBlock = 0u64; stopAfterBlock = 0u64) =
var
pool = newVoterPool()
cache: array[groupReplayTransactions,(seq[BlockHeader],seq[BlockBody])]
cInx = 0
stoppedOk = false
pool.debug = noisy
pool.verifyFrom = startAtBlock
let stopThreshold = if stopAfterBlock == 0u64: uint64.high.u256
else: stopAfterBlock.u256
suite "Replay Goerli Chain":
for w in (dir / captureFile).undumpNextGroup:
if w[0][0].blockNumber == 0.u256:
# Verify Genesis
doAssert w[0][0] == pool.getBlockHeader(0.u256)
else:
# Condense in cache
cache[cInx] = w
cInx.inc
# Handy for partial tests
if stopThreshold < cache[cInx-1][0][0].blockNumber:
stoppedOk = true
break
# Run from cache if complete set
if cache.len <= cInx:
cInx = 0
let
first = cache[0][0][0].blockNumber
last = cache[^1][0][^1].blockNumber
blkRange = &"#{first}..#{last}"
info = if first <= startAtBlock.u256 and startAtBlock.u256 <= last:
&", verification #{startAtBlock}.."
else:
""
test &"Goerli Blocks {blkRange} ({cache.len} transactions{info})":
let start = getTime()
for (headers,bodies) in cache:
let addedPersistBlocks = pool.chain.persistBlocks(headers,bodies)
check addedPersistBlocks == ValidationResult.Ok
if addedPersistBlocks != ValidationResult.Ok: return
if showElapsed and startAtBlock.u256 <= last:
let
elpd = getTime() - start
info = &"{elpd.ppSecs:>7} {pool.cliqueSignersLen} {elpd.ppRow}"
echo &"\n elapsed {blkRange:<17} {info}"
# Rest from cache
if 0 < cInx:
let
first = cache[0][0][0].blockNumber
last = cache[cInx-1][0][^1].blockNumber
blkRange = &"#{first}..#{last}"
info = if first <= startAtBlock.u256 and startAtBlock.u256 <= last:
&", Verification #{startAtBlock}.."
else:
""
test &"Goerli Blocks {blkRange} ({cache.len} transactions{info})":
let start = getTime()
for (headers,bodies) in cache:
let addedPersistBlocks = pool.chain.persistBlocks(headers,bodies)
check addedPersistBlocks == ValidationResult.Ok
if addedPersistBlocks != ValidationResult.Ok: return
if showElapsed and startAtBlock.u256 <= last:
let
elpsd = getTime() - start
info = &"{elpsd.ppSecs:>7} {pool.cliqueSignersLen} {elpsd.ppRow}"
echo &"\n elapsed {blkRange:<17} {info}"
if stoppedOk:
test &"Runner stopped after reaching #{stopThreshold}":
discard
proc runGoerliBaybySteps(noisy = true;
dir = "tests"; captureFile = goerliCapture,
stopAfterBlock = 0u64) =
var
pool = newVoterPool()
stoppedOk = false
pool.debug = noisy
let stopThreshold = if stopAfterBlock == 0u64: 20.u256
else: stopAfterBlock.u256
suite "Replay Goerli Chain Transactions Single Blockwise":
for w in (dir / captureFile).undumpNextGroup:
if stoppedOk:
break
if w[0][0].blockNumber == 0.u256:
# Verify Genesis
doAssert w[0][0] == pool.getBlockHeader(0.u256)
else:
for n in 0 ..< w[0].len:
let
header = w[0][n]
body = w[1][n]
var
parents = w[0][0 ..< n]
test &"Goerli Block #{header.blockNumber} + {parents.len} parents":
check pool.chain.clique.cliqueSnapshot(header,parents).isOk
let addedPersistBlocks = pool.chain.persistBlocks(@[header],@[body])
check addedPersistBlocks == ValidationResult.Ok
if addedPersistBlocks != ValidationResult.Ok: return
# Handy for partial tests
if stopThreshold <= header.blockNumber:
stoppedOk = true
break
if stoppedOk:
test &"Runner stopped after reaching #{stopThreshold}":
discard
# ------------------------------------------------------------------------------
# Main function(s)
# ------------------------------------------------------------------------------
proc cliqueMain*(noisy = defined(debug)) =
noisy.runCliqueSnapshot(true)
noisy.runCliqueSnapshot(false)
noisy.runGoerliBaybySteps
noisy.runGoerliReplay(startAtBlock = 31100u64)
when isMainModule:
let
skipIDs = {999}
# A new capture file can be generated using
# `test_clique/indiump.dumpGroupNl()`
# placed at the end of
# `p2p/chain/persist_blocks.persistBlocks()`.
captureFile = "test_clique" / "goerli504192.txt.gz"
#captureFile = "test_clique" / "dump-stream.out.gz"
proc goerliReplay(noisy = true; showElapsed = true;
dir = "."; captureFile = captureFile;
startAtBlock = 0u64; stopAfterBlock = 0u64) =
runGoerliReplay(
noisy = noisy, showElapsed = showElapsed,
dir = dir, captureFile = captureFile,
startAtBlock = startAtBlock, stopAfterBlock = stopAfterBlock)
let noisy = defined(debug)
noisy.runCliqueSnapshot(true)
noisy.runCliqueSnapshot(false)
noisy.runGoerliBaybySteps(dir = ".")
noisy.runGoerliReplay(dir = ".", startAtBlock = 31100u64)
#noisy.goerliReplay(startAtBlock = 31100u64)
#noisy.goerliReplay(startAtBlock = 194881u64, stopAfterBlock = 198912u64)
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------