mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-17 08:56:45 +00:00
avoid genericAssign for beacon node types (#1166)
* avoid genericAssign for beacon node types ok, I got fed up of this function messing up cpu measurements - it's so ridiculously slow, it's sad. before, while syncing: ``` 40,65% beacon_node_shared_witti_0 [.] genericAssignAux__U5DxFPRpHCCZDKWQzM9adaw 9,02% libc-2.31.so [.] __memmove_avx_unaligned_erms 7,07% beacon_node_shared_witti_0 [.] BIG_384_58_monty 5,19% beacon_node_shared_witti_0 [.] BIG_384_58_mul 2,72% beacon_node_shared_witti_0 [.] memcpy@plt 1,18% [kernel] [k] rb_next 1,17% beacon_node_shared_witti_0 [.] genericReset 1,06% [kernel] [k] map_private_extent_buffer ``` after: ``` 24,88% beacon_node_shared_witti_0 [.] BIG_384_58_monty 20,29% beacon_node_shared_witti_0 [.] BIG_384_58_mul 3,15% beacon_node_shared_witti_0 [.] BIG_384_58_norm 2,93% beacon_node_shared_witti_0 [.] BIG_384_58_add 2,55% beacon_node_shared_witti_0 [.] BIG_384_58_sqr 1,64% beacon_node_shared_witti_0 [.] BIG_384_58_mod 1,63% beacon_node_shared_witti_0 [.] sha256Transform__BJNBQtWr9bJwzqbyfKXd38Q 1,48% beacon_node_shared_witti_0 [.] FP_BLS381_add 1,39% beacon_node_shared_witti_0 [.] BIG_384_58_sub 1,33% beacon_node_shared_witti_0 [.] BIG_384_58_dnorm 1,14% beacon_node_shared_witti_0 [.] FP2_BLS381_mul 1,05% beacon_node_shared_witti_0 [.] BIG_384_58_cmove 1,05% beacon_node_shared_witti_0 [.] get_shuffled_seq__4uncAHNsSG3Pndo5H11U9aQ ``` * better field iteration
This commit is contained in:
parent
42832cefa8
commit
78b767f645
@ -141,7 +141,7 @@ proc getState*(
|
||||
proc decode(data: openArray[byte]) =
|
||||
try:
|
||||
# TODO can't write to output directly..
|
||||
outputAddr[] = SSZ.decode(data, BeaconState)
|
||||
assign(outputAddr[], SSZ.decode(data, BeaconState))
|
||||
except SerializationError as e:
|
||||
# If the data can't be deserialized, it could be because it's from a
|
||||
# version of the software that uses a different SSZ encoding
|
||||
|
@ -301,7 +301,7 @@ proc getState(
|
||||
# Nonetheless, this is an ugly workaround that needs to go away
|
||||
doAssert false, "Cannot alias headState"
|
||||
|
||||
outputAddr[] = dag.headState
|
||||
assign(outputAddr[], dag.headState)
|
||||
|
||||
if not db.getState(stateRoot, output.data.data, restore):
|
||||
return false
|
||||
@ -351,7 +351,7 @@ func putStateCache(
|
||||
let entry =
|
||||
if dag.cachedStates.len == MAX_CACHE_SIZE: dag.cachedStates.pop().state
|
||||
else: (ref HashedBeaconState)()
|
||||
entry[] = state
|
||||
assign(entry[], state)
|
||||
|
||||
insert(dag.cachedStates, (blck.root, state.data.slot, entry))
|
||||
trace "CandidateChains.putState(): state cache updated",
|
||||
@ -529,7 +529,7 @@ proc rewindState(dag: CandidateChains, state: var StateData, bs: BlockSlot):
|
||||
# used in the front-end.
|
||||
let idx = dag.getStateCacheIndex(parBs.blck.root, parBs.slot)
|
||||
if idx >= 0:
|
||||
state.data = dag.cachedStates[idx].state[]
|
||||
assign(state.data, dag.cachedStates[idx].state[])
|
||||
let ancestor = ancestors.pop()
|
||||
state.blck = ancestor.refs
|
||||
|
||||
@ -605,7 +605,7 @@ proc getStateDataCached(dag: CandidateChains, state: var StateData, bs: BlockSlo
|
||||
|
||||
let idx = dag.getStateCacheIndex(bs.blck.root, bs.slot)
|
||||
if idx >= 0:
|
||||
state.data = dag.cachedStates[idx].state[]
|
||||
assign(state.data, dag.cachedStates[idx].state[])
|
||||
state.blck = bs.blck
|
||||
beacon_state_data_cache_hits.inc()
|
||||
return true
|
||||
|
@ -183,7 +183,7 @@ proc add*(
|
||||
# `state_transition` that takes a `StateData` instead and updates
|
||||
# the block as well
|
||||
doAssert v.addr == addr poolPtr.tmpState.data
|
||||
poolPtr.tmpState = poolPtr.headState
|
||||
assign(poolPtr.tmpState, poolPtr.headState)
|
||||
|
||||
var stateCache = getEpochCache(parent, dag.tmpState.data.data)
|
||||
if not state_transition(
|
||||
|
@ -22,7 +22,7 @@
|
||||
{.push raises: [Defect].}
|
||||
|
||||
import
|
||||
macros, hashes, json, strutils, tables,
|
||||
macros, hashes, json, strutils, tables, typetraits,
|
||||
stew/[byteutils], chronicles,
|
||||
json_serialization/types as jsonTypes,
|
||||
../ssz/types as sszTypes, ./crypto, ./digest
|
||||
@ -649,3 +649,33 @@ chronicles.formatIt Attestation: it.shortLog
|
||||
import json_serialization
|
||||
export json_serialization
|
||||
export writeValue, readValue
|
||||
|
||||
static:
|
||||
# Sanity checks - these types should be trivial enough to copy with memcpy
|
||||
doAssert supportsCopyMem(Validator)
|
||||
doAssert supportsCopyMem(Eth2Digest)
|
||||
|
||||
func assign*[T](tgt: var T, src: T) =
|
||||
# The default `genericAssignAux` that gets generated for assignments in nim
|
||||
# is ridiculously slow. When syncing, the application was spending 50%+ CPU
|
||||
# time in it - `assign`, in the same test, doesn't even show in the perf trace
|
||||
|
||||
when supportsCopyMem(T):
|
||||
copyMem(addr tgt, unsafeAddr src, sizeof(tgt))
|
||||
elif T is object|tuple:
|
||||
for t, s in fields(tgt, src):
|
||||
assign(t, s)
|
||||
elif T is List|BitList:
|
||||
assign(distinctBase tgt, distinctBase src)
|
||||
elif T is seq:
|
||||
tgt.setLen(src.len)
|
||||
when supportsCopyMem(type(tgt[0])):
|
||||
if tgt.len > 0:
|
||||
copyMem(addr tgt[0], unsafeAddr src[0], sizeof(tgt[0]) * tgt.len)
|
||||
else:
|
||||
for i in 0..<tgt.len:
|
||||
assign(tgt[i], src[i])
|
||||
elif T is ref:
|
||||
tgt = src
|
||||
else:
|
||||
unsupported T
|
||||
|
@ -176,7 +176,7 @@ proc makeBeaconBlockForHeadAndSlot*(node: BeaconNode,
|
||||
# `state_transition` that takes a `StateData` instead and updates
|
||||
# the block as well
|
||||
doAssert v.addr == addr poolPtr.tmpState.data
|
||||
poolPtr.tmpState = poolPtr.headState
|
||||
assign(poolPtr.tmpState, poolPtr.headState)
|
||||
|
||||
var cache = get_empty_per_epoch_cache()
|
||||
let message = makeBeaconBlock(
|
||||
|
Loading…
x
Reference in New Issue
Block a user