Reset cached indices when resetting cache on SSZ read (#2480)

* Reset cached indices when resetting cache on SSZ read

When deserializing into an existing structure, the cache should be
cleared - goes for json also. Also improve error messages.
This commit is contained in:
Jacek Sieka 2021-04-08 12:11:04 +02:00 committed by GitHub
parent ba59dd85cd
commit 7165e0ac31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 197 additions and 92 deletions

View File

@ -280,8 +280,9 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
## hash
```diff
+ HashArray OK
+ HashList OK
```
OK: 1/1 Fail: 0/1 Skip: 0/1
OK: 2/2 Fail: 0/2 Skip: 0/2
## state diff tests [Preset: mainnet]
```diff
+ random slot differences [Preset: mainnet] OK
@ -289,4 +290,4 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
OK: 1/1 Fail: 0/1 Skip: 0/1
---TOTAL---
OK: 154/163 Fail: 0/163 Skip: 9/163
OK: 155/164 Fail: 0/164 Skip: 9/164

View File

@ -473,10 +473,6 @@ proc getStateOnlyMutableValidators(
let numValidators = output.validators.len
doAssert db.immutableValidatorsMem.len >= numValidators
output.validators.hashes.setLen(0)
for item in output.validators.indices.mitems():
item = 0
for i in 0 ..< numValidators:
let
# Bypass hash cache invalidation
@ -487,7 +483,7 @@ proc getStateOnlyMutableValidators(
assign(dstValidator.withdrawal_credentials,
srcValidator.withdrawal_credentials)
output.validators.growHashes()
output.validators.resetCache()
true
of GetResult.notFound:

View File

@ -122,8 +122,11 @@ proc process_deposit*(preset: RuntimePreset,
# by the deposit contract
if skipBLSValidation in flags or verify_deposit_signature(preset, deposit.data):
# New validator! Add validator and balance entries
state.validators.add(get_validator_from_deposit(deposit.data))
state.balances.add(amount)
if not state.validators.add(get_validator_from_deposit(deposit.data)):
return err("process_deposit: too many validators")
if not state.balances.add(amount):
static: doAssert state.balances.maxLen == state.validators.maxLen
raiseAssert "adding validator succeeded, so should balances"
doAssert state.validators.len == state.balances.len
else:
@ -298,8 +301,11 @@ proc initialize_beacon_state_from_eth1*(
if skipBlsValidation in flags or
verify_deposit_signature(preset, deposit):
pubkeyToIndex[pubkey] = state.validators.len
state.validators.add(get_validator_from_deposit(deposit))
state.balances.add(amount)
if not state.validators.add(get_validator_from_deposit(deposit)):
raiseAssert "too many validators"
if not state.balances.add(amount):
raiseAssert "same as validators"
else:
# Invalid deposits are perfectly possible
trace "Skipping deposit with invalid signature",
@ -507,7 +513,8 @@ func get_sorted_attesting_indices_list*(
state: BeaconState, data: AttestationData, bits: CommitteeValidatorsBits,
cache: var StateCache): List[uint64, Limit MAX_VALIDATORS_PER_COMMITTEE] =
for index in get_sorted_attesting_indices(state, data, bits, cache):
result.add index.uint64
if not result.add index.uint64:
raiseAssert "The `result` list has the same max size as the sorted `bits` input"
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.1/specs/phase0/beacon-chain.md#get_indexed_attestation
func get_indexed_attestation(state: BeaconState, attestation: Attestation,
@ -626,6 +633,8 @@ proc process_attestation*(
# data sadly is a processing hotspot - the business with the addDefault
# pointer is here simply to work around the poor codegen
var pa = attestations.addDefault()
if pa.isNil:
return err("process_attestation: too many pending attestations")
assign(pa[].aggregation_bits, attestation.aggregation_bits)
pa[].data = attestation.data
pa[].inclusion_delay = state.slot - attestation.data.slot

View File

@ -726,6 +726,7 @@ proc writeValue*(writer: var JsonWriter, value: HashList)
proc readValue*(reader: var JsonReader, value: var HashList)
{.raises: [IOError, SerializationError, Defect].} =
value.resetCache()
readValue(reader, value.data)
template writeValue*(writer: var JsonWriter, value: Version | ForkDigest) =

View File

@ -73,13 +73,12 @@ func `xor`[T: array](a, b: T): T =
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.1/specs/phase0/beacon-chain.md#randao
proc process_randao(
state: var BeaconState, body: SomeBeaconBlockBody, flags: UpdateFlags,
stateCache: var StateCache): bool {.nbench.} =
stateCache: var StateCache): Result[void, cstring] {.nbench.} =
let
proposer_index = get_beacon_proposer_index(state, stateCache)
if proposer_index.isNone:
debug "Proposer index missing, probably along with any active validators"
return false
return err("process_randao: proposer index missing, probably along with any active validators")
# Verify RANDAO reveal
let
@ -91,11 +90,8 @@ proc process_randao(
if not verify_epoch_signature(
state.fork, state.genesis_validators_root, epoch, proposer_pubkey,
body.randao_reveal):
debug "Randao mismatch", proposer_pubkey = shortLog(proposer_pubkey),
epoch,
signature = shortLog(body.randao_reveal),
slot = state.slot
return false
return err("process_randao: invalid epoch signature")
# Mix it in
let
@ -105,15 +101,18 @@ proc process_randao(
state.randao_mixes[epoch mod EPOCHS_PER_HISTORICAL_VECTOR].data =
mix.data xor rr
true
ok()
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.1/specs/phase0/beacon-chain.md#eth1-data
func process_eth1_data(state: var BeaconState, body: SomeBeaconBlockBody) {.nbench.}=
state.eth1_data_votes.add body.eth1_data
func process_eth1_data(state: var BeaconState, body: SomeBeaconBlockBody): Result[void, cstring] {.nbench.}=
if not state.eth1_data_votes.add body.eth1_data:
# Count is reset in process_final_updates, so this should never happen
return err("process_eth1_data: no more room for eth1 data")
if state.eth1_data_votes.asSeq.count(body.eth1_data).uint64 * 2 >
SLOTS_PER_ETH1_VOTING_PERIOD:
state.eth1_data = body.eth1_data
ok()
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.1/specs/phase0/beacon-chain.md#is_slashable_validator
func is_slashable_validator(validator: Validator, epoch: Epoch): bool =
@ -340,20 +339,14 @@ proc process_operations(preset: RuntimePreset,
proc process_block*(
preset: RuntimePreset,
state: var BeaconState, blck: SomeBeaconBlock, flags: UpdateFlags,
stateCache: var StateCache): Result[void, cstring] {.nbench.}=
cache: var StateCache): Result[void, cstring] {.nbench.}=
## When there's a new block, we need to verify that the block is sane and
## update the state accordingly - the state is left in an unknown state when
## block application fails (!)
? process_block_header(state, blck, flags, stateCache)
if not process_randao(state, blck.body, flags, stateCache):
return err("Randao failure".cstring)
process_eth1_data(state, blck.body)
let res_ops = process_operations(preset, state, blck.body, flags, stateCache)
if res_ops.isErr:
return res_ops
? process_block_header(state, blck, flags, cache)
? process_randao(state, blck.body, flags, cache)
? process_eth1_data(state, blck.body)
? process_operations(preset, state, blck.body, flags, cache)
ok()

View File

@ -598,8 +598,9 @@ func process_historical_roots_update(state: var BeaconState) {.nbench.} =
# significant additional stack or heap.
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.1/specs/phase0/beacon-chain.md#historicalbatch
# In response to https://github.com/status-im/nimbus-eth2/issues/921
state.historical_roots.add hash_tree_root(
[hash_tree_root(state.block_roots), hash_tree_root(state.state_roots)])
if not state.historical_roots.add hash_tree_root(
[hash_tree_root(state.block_roots), hash_tree_root(state.state_roots)]):
raiseAssert "no more room for historical roots, so long and thanks for the fish!"
# https://github.com/ethereum/eth2.0-specs/blob/dev/specs/phase0/beacon-chain.md#participation-records-rotation
func process_participation_record_updates(state: var BeaconState) {.nbench.} =

View File

@ -24,9 +24,8 @@ template setOutputSize[R, T](a: var array[R, T], length: int) =
raiseIncorrectSize a.type
proc setOutputSize(list: var List, length: int) {.raisesssz.} =
if int64(length) > list.maxLen:
if not list.setLen length:
raise newException(MalformedSszError, "SSZ list maximum size exceeded")
list.setLen length
# fromSszBytes copies the wire representation to a Nim variable,
# assuming there's enough data in the buffer
@ -140,15 +139,9 @@ func readSszValue*[T](input: openArray[byte],
if resultBytesCount == maxExpectedSize:
checkForForbiddenBits(T, input, val.maxLen + 1)
elif val is HashList:
elif val is HashList | HashArray:
readSszValue(input, val.data)
val.hashes.setLen(0)
val.growHashes()
elif val is HashArray:
readSszValue(input, val.data)
for h in val.hashes.mitems():
clearCache(h)
val.resetCache()
elif val is List|array:
type E = type val[0]

View File

@ -85,13 +85,38 @@ type
BitList*[maxLen: static Limit] = distinct BitSeq
HashArray*[maxLen: static Limit; T] = object
## Array implementation that caches the hash of each chunk of data - see
## also HashList for more details.
data*: array[maxLen, T]
hashes* {.dontSerialize.}: array[maxChunkIdx(T, maxLen), Eth2Digest]
HashList*[T; maxLen: static Limit] = object
## List implementation that caches the hash of each chunk of data as well
## as the combined hash of each level of the merkle tree using a flattened
## list of hashes.
##
## The merkle tree of a list is formed by imagining a virtual buffer of
## `maxLen` length which is zero-filled where there is no data. Then,
## a merkle tree of hashes is formed as usual - at each level of the tree,
## iff the hash is combined from two zero-filled chunks, the hash is not
## stored in the `hashes` list - instead, `indices` keeps track of where in
## the list each level starts. When the length of `data` changes, the
## `hashes` and `indices` structures must be updated accordingly using
## `growHashes`.
##
## All mutating operators (those that take `var HashList`) will
## automatically invalidate the cache for the relevant chunks - the leaf and
## all intermediate chunk hashes up to the root. When large changes are made
## to `data`, it might be more efficient to batch the updates then reset
## the cache using resetCache` instead.
data*: List[T, maxLen]
hashes* {.dontSerialize.}: seq[Eth2Digest]
indices* {.dontSerialize.}: array[hashListIndicesLen(maxChunkIdx(T, maxLen)), int64]
hashes* {.dontSerialize.}: seq[Eth2Digest] ## \
## Flattened tree store that skips "empty" branches of the tree - the
## starting index in this sequence of each "level" in the tree is found
## in `indices`.
indices* {.dontSerialize.}: array[hashListIndicesLen(maxChunkIdx(T, maxLen)), int64] ##\
## Holds the starting index in the hashes list for each level of the tree
# Note for readers:
# We use `array` for `Vector` and
@ -115,9 +140,7 @@ template init*[T, N](L: type List[T, N], x: seq[T]): auto =
List[T, N](x)
template `$`*(x: List): auto = $(distinctBase x)
template add*(x: var List, val: auto) = add(distinctBase x, val)
template len*(x: List): auto = len(distinctBase x)
template setLen*(x: var List, val: auto) = setLen(distinctBase x, val)
template low*(x: List): auto = low(distinctBase x)
template high*(x: List): auto = high(distinctBase x)
template `[]`*(x: List, idx: auto): untyped = distinctBase(x)[idx]
@ -131,6 +154,20 @@ template pairs* (x: List): untyped = pairs(distinctBase x)
template mitems*(x: var List): untyped = mitems(distinctBase x)
template mpairs*(x: var List): untyped = mpairs(distinctBase x)
proc add*(x: var List, val: auto): bool =
if x.len < x.maxLen:
add(distinctBase x, val)
true
else:
false
proc setLen*(x: var List, newLen: int): bool =
if newLen <= x.maxLen:
setLen(distinctBase x, newLen)
true
else:
false
template init*(L: type BitList, x: seq[byte], N: static Limit): auto =
BitList[N](data: x)
@ -194,13 +231,16 @@ func nodesAtLayer*(layer, depth, leaves: int): int =
func cacheNodes*(depth, leaves: int): int =
## Total number of nodes needed to cache a tree of a given depth with
## `leaves` items in it (the rest zero-filled)
## `leaves` items in it - chunks that are zero-filled have well-known hash
## trees and don't need to be stored in the tree.
var res = 0
for i in 0..<depth:
res += nodesAtLayer(i, depth, leaves)
res
proc clearCaches*(a: var HashList, dataIdx: int64) =
## Clear each level of the merkle tree up to the root affected by a data
## change at `dataIdx`.
if a.hashes.len == 0:
return
@ -212,6 +252,8 @@ proc clearCaches*(a: var HashList, dataIdx: int64) =
idxInLayer = idx - (1'i64 shl layer)
layerIdx = idxInlayer + a.indices[layer]
if layerIdx < a.indices[layer + 1]:
# Only clear cache when we're actually storing it - ie it hasn't been
# skipped by the "combined zero hash" optimization
clearCache(a.hashes[layerIdx])
idx = idx shr 1
@ -225,7 +267,8 @@ proc clearCache*(a: var HashList) =
for c in a.hashes.mitems(): clearCache(c)
proc growHashes*(a: var HashList) =
# Ensure that the hash cache is big enough for the data in the list
## Ensure that the hash cache is big enough for the data in the list - must
## be called whenever `data` grows.
let
leaves = int(
chunkIdx(a, a.data.len() + dataPerChunk(a.T) - 1))
@ -250,12 +293,26 @@ proc growHashes*(a: var HashList) =
swap(a.hashes, newHashes)
a.indices = newIndices
proc resetCache*(a: var HashList) =
## Perform a full reset of the hash cache, for example after data has been
## rewritten "manually" without going through the exported operators
a.hashes.setLen(0)
a.indices = default(type a.indices)
a.growHashes()
proc resetCache*(a: var HashArray) =
for h in a.hashes.mitems():
clearCache(h)
template len*(a: type HashArray): auto = int(a.maxLen)
template add*(x: var HashList, val: auto) =
add(x.data, val)
x.growHashes()
clearCaches(x, x.data.len() - 1)
proc add*(x: var HashList, val: auto): bool =
if add(x.data, val):
x.growHashes()
clearCaches(x, x.data.len() - 1)
true
else:
false
proc addDefault*(x: var HashList): ptr x.T =
distinctBase(x.data).setLen(x.data.len + 1)
@ -299,7 +356,8 @@ template swap*(a, b: var HashList) =
swap(a.indices, b.indices)
template clear*(a: var HashList) =
a.data.setLen(0)
if not a.data.setLen(0):
raiseAssert "length 0 should always succeed"
a.hashes.setLen(0)
a.indices = default(type a.indices)

View File

@ -31,9 +31,10 @@ func applyValidatorIdentities(
validators: var HashList[Validator, Limit VALIDATOR_REGISTRY_LIMIT],
hl: auto) =
for item in hl:
validators.add Validator(
pubkey: item.pubkey,
withdrawal_credentials: item.withdrawal_credentials)
if not validators.add Validator(
pubkey: item.pubkey,
withdrawal_credentials: item.withdrawal_credentials):
raiseAssert "cannot readd"
func setValidatorStatuses(
validators: var HashList[Validator, Limit VALIDATOR_REGISTRY_LIMIT],
@ -67,7 +68,8 @@ func replaceOrAddEncodeEth1Votes[T, U](votes0, votes1: HashList[T, U]):
result[0] = lower_bound == 0
for i in lower_bound ..< votes1.len:
result[1].add votes1[i]
if not result[1].add votes1[i]:
raiseAssert "same limit"
func replaceOrAddDecodeEth1Votes[T, U](
votes0: var HashList[T, U], eth1_data_votes_replaced: bool,
@ -76,11 +78,13 @@ func replaceOrAddDecodeEth1Votes[T, U](
votes0 = HashList[T, U]()
for item in votes1:
votes0.add item
if not votes0.add item:
raiseAssert "same limit"
func getMutableValidatorStatuses(state: BeaconState):
List[ValidatorStatus, Limit VALIDATOR_REGISTRY_LIMIT] =
result.setLen(state.validators.len)
if not result.setLen(state.validators.len):
raiseAssert "same limt as validators"
for i in 0 ..< state.validators.len:
let validator = unsafeAddr state.validators.data[i]
assign(result[i].effective_balance, validator.effective_balance)
@ -149,9 +153,8 @@ func applyDiff*(
immutableValidators: openArray[ImmutableValidatorData],
stateDiff: BeaconStateDiff) =
template assign[T, U](tgt: var HashList[T, U], src: List[T, U]) =
tgt.clear()
assign(tgt.data, src)
tgt.growHashes()
tgt.resetCache()
# Carry over unchanged genesis_time, genesis_validators_root, and fork.
assign(state.latest_block_header, stateDiff.latest_block_header)
@ -159,7 +162,8 @@ func applyDiff*(
applyModIncrement(state.block_roots, stateDiff.block_roots, state.slot.uint64)
applyModIncrement(state.state_roots, stateDiff.state_roots, state.slot.uint64)
if stateDiff.historical_root_added:
state.historical_roots.add stateDiff.historical_root
if not state.historical_roots.add stateDiff.historical_root:
raiseAssert "cannot readd historical state root"
assign(state.eth1_data, stateDiff.eth1_data)
replaceOrAddDecodeEth1Votes(

View File

@ -35,8 +35,10 @@ func add(v: var Deltas, idx: int, delta: Delta) =
v.penalties[idx] += delta.penalties
func init(T: type Deltas, len: int): T =
result.rewards.setLen(len)
result.penalties.setLen(len)
if not result.rewards.setLen(len):
raiseAssert "setLen"
if not result.penalties.setLen(len):
raiseAssert "setLen"
proc runTest(rewardsDir, identifier: string) =
# We wrap the tests in a proc to avoid running out of globals

View File

@ -91,31 +91,40 @@ suiteReport "SSZ navigator":
let b = [byte 0x04, 0x05, 0x06].toDigest
let c = [byte 0x07, 0x08, 0x09].toDigest
var xx: List[uint64, 16]
check:
not xx.setLen(17)
xx.setLen(16)
var leaves = HashList[Eth2Digest, 1'i64 shl 3]()
leaves.add a
leaves.add b
leaves.add c
check:
leaves.add a
leaves.add b
leaves.add c
let root = hash_tree_root(leaves)
check $root == "5248085b588fab1dd1e03f3cd62201602b12e6560665935964f46e805977e8c5"
while leaves.len < 1 shl 3:
leaves.add c
check hash_tree_root(leaves) == hash_tree_root(leaves.data)
check:
leaves.add c
hash_tree_root(leaves) == hash_tree_root(leaves.data)
leaves = default(type leaves)
while leaves.len < (1 shl 3) - 1:
leaves.add c
leaves.add c
check hash_tree_root(leaves) == hash_tree_root(leaves.data)
check:
leaves.add c
leaves.add c
hash_tree_root(leaves) == hash_tree_root(leaves.data)
leaves = default(type leaves)
while leaves.len < (1 shl 3) - 2:
leaves.add c
leaves.add c
leaves.add c
check hash_tree_root(leaves) == hash_tree_root(leaves.data)
check:
leaves.add c
leaves.add c
leaves.add c
hash_tree_root(leaves) == hash_tree_root(leaves.data)
for i in 0 ..< leaves.data.len - 2:
leaves[i] = a
@ -124,23 +133,26 @@ suiteReport "SSZ navigator":
check hash_tree_root(leaves) == hash_tree_root(leaves.data)
var leaves2 = HashList[Eth2Digest, 1'i64 shl 48]() # Large number!
leaves2.add a
leaves2.add b
leaves2.add c
check hash_tree_root(leaves2) == hash_tree_root(leaves2.data)
check:
leaves2.add a
leaves2.add b
leaves2.add c
hash_tree_root(leaves2) == hash_tree_root(leaves2.data)
var leaves3 = HashList[Eth2Digest, 7]() # Non-power-of-2
check hash_tree_root(leaves3) == hash_tree_root(leaves3.data)
leaves3.add a
leaves3.add b
leaves3.add c
check hash_tree_root(leaves3) == hash_tree_root(leaves3.data)
check:
hash_tree_root(leaves3) == hash_tree_root(leaves3.data)
leaves3.add a
leaves3.add b
leaves3.add c
hash_tree_root(leaves3) == hash_tree_root(leaves3.data)
timedTest "basictype":
var leaves = HashList[uint64, 1'i64 shl 3]()
while leaves.len < leaves.maxLen:
leaves.add leaves.lenu64
check hash_tree_root(leaves) == hash_tree_root(leaves.data)
check:
leaves.add leaves.lenu64
hash_tree_root(leaves) == hash_tree_root(leaves.data)
suiteReport "SSZ dynamic navigator":
timedTest "navigating fields":
@ -197,10 +209,45 @@ suiteReport "hash":
both: it.arr[0].data[0] = byte 1
both: it.li.add Eth2Digest()
both: check: it.li.add Eth2Digest()
var y: HashArray[32, uint64]
doAssert hash_tree_root(y) == hash_tree_root(y.data)
for i in 0..<y.len:
y[i] = 42'u64
doAssert hash_tree_root(y) == hash_tree_root(y.data)
timedTest "HashList":
type MyList = HashList[uint64, 1024]
var
small, large: MyList
check: small.add(10'u64)
for i in 0..<100:
check: large.add(uint64(i))
let
sroot = hash_tree_root(small)
lroot = hash_tree_root(large)
doAssert sroot == hash_tree_root(small.data)
doAssert lroot == hash_tree_root(large.data)
var
sbytes = SSZ.encode(small)
lbytes = SSZ.encode(large)
sloaded = SSZ.decode(sbytes, MyList)
lloaded = SSZ.decode(lbytes, MyList)
doAssert sroot == hash_tree_root(sloaded)
doAssert lroot == hash_tree_root(lloaded)
# Here we smoke test that the cache is reset correctly even when reading
# into an existing instance - the instances are size-swapped so the reader
# will have some more work to do
readSszValue(sbytes, lloaded)
readSszValue(lbytes, sloaded)
doAssert lroot == hash_tree_root(sloaded)
doAssert sroot == hash_tree_root(lloaded)