avoid state copy in state transition (#930)

In BlockPool, we keep the head state around, so it's trivial to restore
the temporary state there and keep going as if nothing happened.

This solves 3 problems:
* stack space - the state copy on mainnet is huge
* GC scanning - using stack space for state slows down the GC
significantly
* reckless copying - the copy itself takes a long time

In state_sim, we'll do the same and allocate on heap - this helps a
little with GC - without it, the collection of the temporary strings
created with `toHex` while printing the json dominates the trace.
This commit is contained in:
Jacek Sieka 2020-04-26 21:13:33 +02:00 committed by GitHub
parent be475a82d7
commit 03a147ab8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 89 additions and 61 deletions

View File

@ -398,7 +398,17 @@ proc add*(
# TODO if the block is from the future, we should not be resolving it (yet),
# but maybe we should use it as a hint that our clock is wrong?
updateStateData(pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1))
if not state_transition(pool.tmpState.data, signedBlock, {}):
let
poolPtr = unsafeAddr pool # safe because restore is short-lived
proc restore(v: var HashedBeaconState) =
# TODO address this ugly workaround - there should probably be a
# `state_transition` that takes a `StateData` instead and updates
# the block as well
doAssert v.addr == addr poolPtr.tmpState.data
poolPtr.tmpState = poolPtr.headState
if not state_transition(pool.tmpState.data, signedBlock, {}, restore):
# TODO find a better way to log all this block data
notice "Invalid block",
blck = shortLog(blck),
@ -561,26 +571,6 @@ func checkMissing*(pool: var BlockPool): seq[FetchRecord] =
if v.tries.popcount() == 1:
result.add(FetchRecord(root: k, historySlots: v.slots))
proc skipAndUpdateState(
state: var HashedBeaconState, slot: Slot,
afterUpdate: proc (state: HashedBeaconState) {.gcsafe.}) =
while state.data.slot < slot:
# Process slots one at a time in case afterUpdate needs to see empty states
process_slots(state, state.data.slot + 1)
afterUpdate(state)
proc skipAndUpdateState(
state: var HashedBeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags,
afterUpdate: proc (state: HashedBeaconState) {.gcsafe.}): bool =
skipAndUpdateState(state, signedBlock.message.slot - 1, afterUpdate)
let ok = state_transition(state, signedBlock, flags)
afterUpdate(state)
ok
proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
# TODO we save state at every epoch start but never remove them - we also
# potentially save multiple states per slot if reorgs happen, meaning
@ -636,6 +626,31 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
# TODO this should be atomic with the above write..
currentCache.putStateRoot(blck.root, state.data.slot, state.root)
proc skipAndUpdateState(
pool: BlockPool,
state: var HashedBeaconState, blck: BlockRef, slot: Slot) =
while state.data.slot < slot:
# Process slots one at a time in case afterUpdate needs to see empty states
process_slots(state, state.data.slot + 1)
pool.putState(state, blck)
proc skipAndUpdateState(
pool: BlockPool,
state: var StateData, blck: BlockData, flags: UpdateFlags): bool =
pool.skipAndUpdateState(state.data, blck.refs, blck.data.message.slot - 1)
var statePtr = unsafeAddr state # safe because `restore` is locally scoped
proc restore(v: var HashedBeaconState) =
doAssert (addr(statePtr.data) == addr v)
statePtr[] = pool.headState
let ok = state_transition(state.data, blck.data, flags, restore)
if ok:
pool.putState(state.data, blck.refs)
ok
proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot):
seq[BlockData] =
logScope: pcs = "replay_state"
@ -765,8 +780,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
if state.blck.root == bs.blck.root and state.data.data.slot <= bs.slot:
if state.data.data.slot != bs.slot:
# Might be that we're moving to the same block but later slot
skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState):
pool.putState(state, bs.blck)
pool.skipAndUpdateState(state.data, bs.blck, bs.slot)
return # State already at the right spot
@ -785,14 +799,12 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
# applied. Pathologically quadratic in slot number, naïvely.
for i in countdown(ancestors.len - 1, 0):
let ok =
skipAndUpdateState(state.data,
ancestors[i].data,
{skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) do (state: HashedBeaconState):
pool.putState(state, ancestors[i].refs)
pool.skipAndUpdateState(
state, ancestors[i],
{skipBlsValidation, skipMerkleValidation, skipStateRootValidation})
doAssert ok, "Blocks in database should never fail to apply.."
skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState):
pool.putState(state, bs.blck)
pool.skipAndUpdateState(state.data, bs.blck, bs.slot)
state.blck = bs.blck

View File

@ -138,8 +138,15 @@ proc verifyStateRoot(state: BeaconState, blck: BeaconBlock): bool =
else:
true
type
RollbackProc* = proc(v: var BeaconState) {.gcsafe.}
proc noRollback*(state: var BeaconState) =
trace "Skipping rollback of broken state"
proc state_transition*(
state: var BeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags): bool {.nbench.}=
state: var BeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags,
rollback: RollbackProc): bool {.nbench.} =
## Time in the beacon chain moves by slots. Every time (haha.) that happens,
## we will update the beacon state. Normally, the state updates will be driven
## by the contents of a new block, but it may happen that the block goes
@ -151,6 +158,12 @@ proc state_transition*(
## The flags are used to specify that certain validations should be skipped
## for the new block. This is done during block proposal, to create a state
## whose hash can be included in the new block.
##
## `rollback` is called if the transition fails and the given state has been
## partially changed. If a temporary state was given to `state_transition`,
## it is safe to use `noRollback` and leave it broken, else the state
## object should be rolled back to a consistent state. If the transition fails
## before the state has been updated, `rollback` will not be called.
#
# TODO this function can be written with a loop inside to handle all empty
# slots up to the slot of the new_block - but then again, why not eagerly
@ -167,11 +180,7 @@ proc state_transition*(
# many functions will mutate `state` partially without rolling back
# the changes in case of failure (look out for `var BeaconState` and
# bool return values...)
## TODO, of cacheState/processEpoch/processSlot/processBloc, only the last
## might fail, so should this bother capturing here, or?
var old_state = newClone(state)
doAssert not rollback.isNil, "use noRollback if it's ok to mess up state"
# These should never fail.
process_slots(state, signedBlock.message.slot)
@ -194,7 +203,7 @@ proc state_transition*(
return true
# Block processing failed, roll back changes
state = old_state[]
rollback(state)
false
# Hashed-state transition functions
@ -250,11 +259,16 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) =
trace "Couldn't update metrics", msg = e.msg
state.root = hash_tree_root(state.data)
proc state_transition*(
state: var HashedBeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags): bool =
# Save for rollback
var old_state = clone(state)
type
RollbackHashedProc* = proc(state: var HashedBeaconState) {.gcsafe.}
proc noRollback*(state: var HashedBeaconState) =
trace "Skipping rollback of broken state"
proc state_transition*(
state: var HashedBeaconState, signedBlock: SignedBeaconBlock,
flags: UpdateFlags, rollback: RollbackHashedProc): bool =
doAssert not rollback.isNil, "use noRollback if it's ok to mess up state"
process_slots(state, signedBlock.message.slot)
if skipBLSValidation in flags or
@ -275,6 +289,6 @@ proc state_transition*(
return true
# Block processing failed, roll back changes
state.data[] = old_state.data[]
state.root = old_state.root
rollback(state)
false

View File

@ -149,7 +149,8 @@ proc runFullTransition*(dir, preState, blocksPrefix: string, blocksQty: int, ski
let signedBlock = parseSSZ(blockPath, SignedBeaconBlock)
let flags = if skipBLS: {skipBlsValidation}
else: {}
let success = state_transition(state[], signedBlock.message, flags)
let success = state_transition(
state[], signedBlock, flags, noRollback)
echo "State transition status: ", if success: "SUCCESS ✓" else: "FAILURE ⚠️"
proc runProcessSlots*(dir, preState: string, numSlots: uint64) =

View File

@ -10,7 +10,7 @@ cli do(pre: string, blck: string, post: string, verifyStateRoot = false):
flags = if verifyStateRoot: {skipStateRootValidation} else: {}
var stateY = HashedBeaconState(data: stateX, root: hash_tree_root(stateX))
if not state_transition(stateY, blckX, flags):
if not state_transition(stateY, blckX, flags, noRollback):
error "State transition failed"
SSZ.saveFile(post, stateY.data)
else:
SSZ.saveFile(post, stateY.data)

View File

@ -106,7 +106,7 @@ proc nfuzz_attester_slashing(input: openArray[byte], output: ptr byte,
proc nfuzz_block(input: openArray[byte], output: ptr byte,
output_size: ptr uint, disable_bls: bool): bool {.exportc, raises: [FuzzCrashError, Defect].} =
decodeAndProcess(BlockInput):
state_transition(data.state[], data.beaconBlock, flags)
state_transition(data.state[], data.beaconBlock, flags, noRollback)
proc nfuzz_block_header(input: openArray[byte], output: ptr byte,
output_size: ptr uint, disable_bls: bool): bool {.exportc, raises: [FuzzCrashError, Defect].} =

View File

@ -78,15 +78,13 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
echo "Generating Genesis..."
let
genesisState =
initialize_beacon_state_from_eth1(Eth2Digest(), 0, deposits, flags)
genesisBlock = get_initial_beacon_block(genesisState[])
state = initialize_beacon_state_from_eth1(Eth2Digest(), 0, deposits, flags)
let genesisBlock = get_initial_beacon_block(state[])
echo "Starting simulation..."
var
attestations = initTable[Slot, seq[Attestation]]()
state = genesisState
latest_block_root = hash_tree_root(genesisBlock.message)
timers: array[Timers, RunningStat]
attesters: RunningStat
@ -102,10 +100,10 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
write(stdout, ".")
if last:
writeJson("state.json", state)
writeJson("state.json", state[])
else:
if state.slot mod json_interval.uint64 == 0:
writeJson(jsonName(prefix, state.slot), state)
if state[].slot mod json_interval.uint64 == 0:
writeJson(jsonName(prefix, state.slot), state[])
write(stdout, ":")
else:
write(stdout, ".")

View File

@ -131,4 +131,4 @@ proc add*(state: var BeaconState, attestation: Attestation, slot: Slot) =
signMockBlock(state, signedBlock)
doAssert state_transition(
state, signedBlock, flags = {skipStateRootValidation})
state, signedBlock, flags = {skipStateRootValidation}, noRollback)

View File

@ -65,4 +65,5 @@ proc applyEmptyBlock*(state: var BeaconState) =
## Do a state transition with an empty signed block
## on the current slot
let signedBlock = mockBlock(state, state.slot, flags = {})
doAssert state_transition(state, signedBlock, {skipStateRootValidation})
doAssert state_transition(
state, signedBlock, {skipStateRootValidation}, noRollback)

View File

@ -44,10 +44,12 @@ proc runTest(identifier: string) =
if hasPostState:
# TODO: The EF is using invalid BLS keys so we can't verify them
let success = state_transition(preState[], blck, flags = {skipBlsValidation})
let success = state_transition(
preState[], blck, flags = {skipBlsValidation}, noRollback)
doAssert success, "Failure when applying block " & $i
else:
let success = state_transition(preState[], blck, flags = {})
let success = state_transition(
preState[], blck, flags = {}, noRollback)
doAssert not success, "We didn't expect this invalid block to be processed"
# check: preState.hash_tree_root() == postState.hash_tree_root()

View File

@ -38,7 +38,7 @@ suiteReport "Block processing" & preset():
previous_block_root = hash_tree_root(genesisBlock.message)
new_block = makeTestBlock(state[], previous_block_root)
let block_ok = state_transition(state[], new_block, {})
let block_ok = state_transition(state[], new_block, {}, noRollback)
check:
block_ok
@ -58,7 +58,7 @@ suiteReport "Block processing" & preset():
for i in 1..SLOTS_PER_EPOCH.int:
let new_block = makeTestBlock(state[], previous_block_root)
let block_ok = state_transition(state[], new_block, {})
let block_ok = state_transition(state[], new_block, {}, noRollback)
check:
block_ok
@ -91,7 +91,7 @@ suiteReport "Block processing" & preset():
new_block = makeTestBlock(state[], previous_block_root,
attestations = @[attestation]
)
discard state_transition(state[], new_block, {})
check state_transition(state[], new_block, {}, noRollback)
check:
# TODO epoch attestations can get multiplied now; clean up paths to