mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-21 10:48:17 +00:00
avoid genericAssign for beacon node types (#1166)
* avoid genericAssign for beacon node types ok, I got fed up of this function messing up cpu measurements - it's so ridiculously slow, it's sad. before, while syncing: ``` 40,65% beacon_node_shared_witti_0 [.] genericAssignAux__U5DxFPRpHCCZDKWQzM9adaw 9,02% libc-2.31.so [.] __memmove_avx_unaligned_erms 7,07% beacon_node_shared_witti_0 [.] BIG_384_58_monty 5,19% beacon_node_shared_witti_0 [.] BIG_384_58_mul 2,72% beacon_node_shared_witti_0 [.] memcpy@plt 1,18% [kernel] [k] rb_next 1,17% beacon_node_shared_witti_0 [.] genericReset 1,06% [kernel] [k] map_private_extent_buffer ``` after: ``` 24,88% beacon_node_shared_witti_0 [.] BIG_384_58_monty 20,29% beacon_node_shared_witti_0 [.] BIG_384_58_mul 3,15% beacon_node_shared_witti_0 [.] BIG_384_58_norm 2,93% beacon_node_shared_witti_0 [.] BIG_384_58_add 2,55% beacon_node_shared_witti_0 [.] BIG_384_58_sqr 1,64% beacon_node_shared_witti_0 [.] BIG_384_58_mod 1,63% beacon_node_shared_witti_0 [.] sha256Transform__BJNBQtWr9bJwzqbyfKXd38Q 1,48% beacon_node_shared_witti_0 [.] FP_BLS381_add 1,39% beacon_node_shared_witti_0 [.] BIG_384_58_sub 1,33% beacon_node_shared_witti_0 [.] BIG_384_58_dnorm 1,14% beacon_node_shared_witti_0 [.] FP2_BLS381_mul 1,05% beacon_node_shared_witti_0 [.] BIG_384_58_cmove 1,05% beacon_node_shared_witti_0 [.] get_shuffled_seq__4uncAHNsSG3Pndo5H11U9aQ ``` * better field iteration
This commit is contained in:
parent
42832cefa8
commit
78b767f645
@ -141,7 +141,7 @@ proc getState*(
|
|||||||
proc decode(data: openArray[byte]) =
|
proc decode(data: openArray[byte]) =
|
||||||
try:
|
try:
|
||||||
# TODO can't write to output directly..
|
# TODO can't write to output directly..
|
||||||
outputAddr[] = SSZ.decode(data, BeaconState)
|
assign(outputAddr[], SSZ.decode(data, BeaconState))
|
||||||
except SerializationError as e:
|
except SerializationError as e:
|
||||||
# If the data can't be deserialized, it could be because it's from a
|
# If the data can't be deserialized, it could be because it's from a
|
||||||
# version of the software that uses a different SSZ encoding
|
# version of the software that uses a different SSZ encoding
|
||||||
|
@ -301,7 +301,7 @@ proc getState(
|
|||||||
# Nonetheless, this is an ugly workaround that needs to go away
|
# Nonetheless, this is an ugly workaround that needs to go away
|
||||||
doAssert false, "Cannot alias headState"
|
doAssert false, "Cannot alias headState"
|
||||||
|
|
||||||
outputAddr[] = dag.headState
|
assign(outputAddr[], dag.headState)
|
||||||
|
|
||||||
if not db.getState(stateRoot, output.data.data, restore):
|
if not db.getState(stateRoot, output.data.data, restore):
|
||||||
return false
|
return false
|
||||||
@ -351,7 +351,7 @@ func putStateCache(
|
|||||||
let entry =
|
let entry =
|
||||||
if dag.cachedStates.len == MAX_CACHE_SIZE: dag.cachedStates.pop().state
|
if dag.cachedStates.len == MAX_CACHE_SIZE: dag.cachedStates.pop().state
|
||||||
else: (ref HashedBeaconState)()
|
else: (ref HashedBeaconState)()
|
||||||
entry[] = state
|
assign(entry[], state)
|
||||||
|
|
||||||
insert(dag.cachedStates, (blck.root, state.data.slot, entry))
|
insert(dag.cachedStates, (blck.root, state.data.slot, entry))
|
||||||
trace "CandidateChains.putState(): state cache updated",
|
trace "CandidateChains.putState(): state cache updated",
|
||||||
@ -529,7 +529,7 @@ proc rewindState(dag: CandidateChains, state: var StateData, bs: BlockSlot):
|
|||||||
# used in the front-end.
|
# used in the front-end.
|
||||||
let idx = dag.getStateCacheIndex(parBs.blck.root, parBs.slot)
|
let idx = dag.getStateCacheIndex(parBs.blck.root, parBs.slot)
|
||||||
if idx >= 0:
|
if idx >= 0:
|
||||||
state.data = dag.cachedStates[idx].state[]
|
assign(state.data, dag.cachedStates[idx].state[])
|
||||||
let ancestor = ancestors.pop()
|
let ancestor = ancestors.pop()
|
||||||
state.blck = ancestor.refs
|
state.blck = ancestor.refs
|
||||||
|
|
||||||
@ -605,7 +605,7 @@ proc getStateDataCached(dag: CandidateChains, state: var StateData, bs: BlockSlo
|
|||||||
|
|
||||||
let idx = dag.getStateCacheIndex(bs.blck.root, bs.slot)
|
let idx = dag.getStateCacheIndex(bs.blck.root, bs.slot)
|
||||||
if idx >= 0:
|
if idx >= 0:
|
||||||
state.data = dag.cachedStates[idx].state[]
|
assign(state.data, dag.cachedStates[idx].state[])
|
||||||
state.blck = bs.blck
|
state.blck = bs.blck
|
||||||
beacon_state_data_cache_hits.inc()
|
beacon_state_data_cache_hits.inc()
|
||||||
return true
|
return true
|
||||||
|
@ -183,7 +183,7 @@ proc add*(
|
|||||||
# `state_transition` that takes a `StateData` instead and updates
|
# `state_transition` that takes a `StateData` instead and updates
|
||||||
# the block as well
|
# the block as well
|
||||||
doAssert v.addr == addr poolPtr.tmpState.data
|
doAssert v.addr == addr poolPtr.tmpState.data
|
||||||
poolPtr.tmpState = poolPtr.headState
|
assign(poolPtr.tmpState, poolPtr.headState)
|
||||||
|
|
||||||
var stateCache = getEpochCache(parent, dag.tmpState.data.data)
|
var stateCache = getEpochCache(parent, dag.tmpState.data.data)
|
||||||
if not state_transition(
|
if not state_transition(
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
{.push raises: [Defect].}
|
{.push raises: [Defect].}
|
||||||
|
|
||||||
import
|
import
|
||||||
macros, hashes, json, strutils, tables,
|
macros, hashes, json, strutils, tables, typetraits,
|
||||||
stew/[byteutils], chronicles,
|
stew/[byteutils], chronicles,
|
||||||
json_serialization/types as jsonTypes,
|
json_serialization/types as jsonTypes,
|
||||||
../ssz/types as sszTypes, ./crypto, ./digest
|
../ssz/types as sszTypes, ./crypto, ./digest
|
||||||
@ -649,3 +649,33 @@ chronicles.formatIt Attestation: it.shortLog
|
|||||||
import json_serialization
|
import json_serialization
|
||||||
export json_serialization
|
export json_serialization
|
||||||
export writeValue, readValue
|
export writeValue, readValue
|
||||||
|
|
||||||
|
static:
|
||||||
|
# Sanity checks - these types should be trivial enough to copy with memcpy
|
||||||
|
doAssert supportsCopyMem(Validator)
|
||||||
|
doAssert supportsCopyMem(Eth2Digest)
|
||||||
|
|
||||||
|
func assign*[T](tgt: var T, src: T) =
|
||||||
|
# The default `genericAssignAux` that gets generated for assignments in nim
|
||||||
|
# is ridiculously slow. When syncing, the application was spending 50%+ CPU
|
||||||
|
# time in it - `assign`, in the same test, doesn't even show in the perf trace
|
||||||
|
|
||||||
|
when supportsCopyMem(T):
|
||||||
|
copyMem(addr tgt, unsafeAddr src, sizeof(tgt))
|
||||||
|
elif T is object|tuple:
|
||||||
|
for t, s in fields(tgt, src):
|
||||||
|
assign(t, s)
|
||||||
|
elif T is List|BitList:
|
||||||
|
assign(distinctBase tgt, distinctBase src)
|
||||||
|
elif T is seq:
|
||||||
|
tgt.setLen(src.len)
|
||||||
|
when supportsCopyMem(type(tgt[0])):
|
||||||
|
if tgt.len > 0:
|
||||||
|
copyMem(addr tgt[0], unsafeAddr src[0], sizeof(tgt[0]) * tgt.len)
|
||||||
|
else:
|
||||||
|
for i in 0..<tgt.len:
|
||||||
|
assign(tgt[i], src[i])
|
||||||
|
elif T is ref:
|
||||||
|
tgt = src
|
||||||
|
else:
|
||||||
|
unsupported T
|
||||||
|
@ -176,7 +176,7 @@ proc makeBeaconBlockForHeadAndSlot*(node: BeaconNode,
|
|||||||
# `state_transition` that takes a `StateData` instead and updates
|
# `state_transition` that takes a `StateData` instead and updates
|
||||||
# the block as well
|
# the block as well
|
||||||
doAssert v.addr == addr poolPtr.tmpState.data
|
doAssert v.addr == addr poolPtr.tmpState.data
|
||||||
poolPtr.tmpState = poolPtr.headState
|
assign(poolPtr.tmpState, poolPtr.headState)
|
||||||
|
|
||||||
var cache = get_empty_per_epoch_cache()
|
var cache = get_empty_per_epoch_cache()
|
||||||
let message = makeBeaconBlock(
|
let message = makeBeaconBlock(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user