avoid genericAssign for beacon node types (#1166)

* avoid genericAssign for beacon node types

ok, I got fed up of this function messing up cpu measurements - it's so
ridiculously slow, it's sad.

before, while syncing:

```
40,65%  beacon_node_shared_witti_0  [.]
genericAssignAux__U5DxFPRpHCCZDKWQzM9adaw
   9,02%  libc-2.31.so                [.] __memmove_avx_unaligned_erms
   7,07%  beacon_node_shared_witti_0  [.] BIG_384_58_monty
   5,19%  beacon_node_shared_witti_0  [.] BIG_384_58_mul
   2,72%  beacon_node_shared_witti_0  [.] memcpy@plt
   1,18%  [kernel]                    [k] rb_next
   1,17%  beacon_node_shared_witti_0  [.] genericReset
   1,06%  [kernel]                    [k] map_private_extent_buffer
```

after:

```
  24,88%  beacon_node_shared_witti_0  [.] BIG_384_58_monty
  20,29%  beacon_node_shared_witti_0  [.] BIG_384_58_mul
   3,15%  beacon_node_shared_witti_0  [.] BIG_384_58_norm
   2,93%  beacon_node_shared_witti_0  [.] BIG_384_58_add
   2,55%  beacon_node_shared_witti_0  [.] BIG_384_58_sqr
   1,64%  beacon_node_shared_witti_0  [.] BIG_384_58_mod
1,63%  beacon_node_shared_witti_0  [.]
sha256Transform__BJNBQtWr9bJwzqbyfKXd38Q
   1,48%  beacon_node_shared_witti_0  [.] FP_BLS381_add
   1,39%  beacon_node_shared_witti_0  [.] BIG_384_58_sub
   1,33%  beacon_node_shared_witti_0  [.] BIG_384_58_dnorm
   1,14%  beacon_node_shared_witti_0  [.] FP2_BLS381_mul
   1,05%  beacon_node_shared_witti_0  [.] BIG_384_58_cmove
1,05%  beacon_node_shared_witti_0  [.]
get_shuffled_seq__4uncAHNsSG3Pndo5H11U9aQ
```

* better field iteration
This commit is contained in:
Jacek Sieka 2020-06-12 21:10:22 +02:00 committed by GitHub
parent 42832cefa8
commit 78b767f645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 8 deletions

View File

@ -141,7 +141,7 @@ proc getState*(
proc decode(data: openArray[byte]) =
try:
# TODO can't write to output directly..
outputAddr[] = SSZ.decode(data, BeaconState)
assign(outputAddr[], SSZ.decode(data, BeaconState))
except SerializationError as e:
# If the data can't be deserialized, it could be because it's from a
# version of the software that uses a different SSZ encoding

View File

@ -301,7 +301,7 @@ proc getState(
# Nonetheless, this is an ugly workaround that needs to go away
doAssert false, "Cannot alias headState"
outputAddr[] = dag.headState
assign(outputAddr[], dag.headState)
if not db.getState(stateRoot, output.data.data, restore):
return false
@ -351,7 +351,7 @@ func putStateCache(
let entry =
if dag.cachedStates.len == MAX_CACHE_SIZE: dag.cachedStates.pop().state
else: (ref HashedBeaconState)()
entry[] = state
assign(entry[], state)
insert(dag.cachedStates, (blck.root, state.data.slot, entry))
trace "CandidateChains.putState(): state cache updated",
@ -529,7 +529,7 @@ proc rewindState(dag: CandidateChains, state: var StateData, bs: BlockSlot):
# used in the front-end.
let idx = dag.getStateCacheIndex(parBs.blck.root, parBs.slot)
if idx >= 0:
state.data = dag.cachedStates[idx].state[]
assign(state.data, dag.cachedStates[idx].state[])
let ancestor = ancestors.pop()
state.blck = ancestor.refs
@ -605,7 +605,7 @@ proc getStateDataCached(dag: CandidateChains, state: var StateData, bs: BlockSlo
let idx = dag.getStateCacheIndex(bs.blck.root, bs.slot)
if idx >= 0:
state.data = dag.cachedStates[idx].state[]
assign(state.data, dag.cachedStates[idx].state[])
state.blck = bs.blck
beacon_state_data_cache_hits.inc()
return true

View File

@ -183,7 +183,7 @@ proc add*(
# `state_transition` that takes a `StateData` instead and updates
# the block as well
doAssert v.addr == addr poolPtr.tmpState.data
poolPtr.tmpState = poolPtr.headState
assign(poolPtr.tmpState, poolPtr.headState)
var stateCache = getEpochCache(parent, dag.tmpState.data.data)
if not state_transition(

View File

@ -22,7 +22,7 @@
{.push raises: [Defect].}
import
macros, hashes, json, strutils, tables,
macros, hashes, json, strutils, tables, typetraits,
stew/[byteutils], chronicles,
json_serialization/types as jsonTypes,
../ssz/types as sszTypes, ./crypto, ./digest
@ -649,3 +649,33 @@ chronicles.formatIt Attestation: it.shortLog
import json_serialization
export json_serialization
export writeValue, readValue
static:
# Sanity checks - these types should be trivial enough to copy with memcpy
doAssert supportsCopyMem(Validator)
doAssert supportsCopyMem(Eth2Digest)
func assign*[T](tgt: var T, src: T) =
# The default `genericAssignAux` that gets generated for assignments in nim
# is ridiculously slow. When syncing, the application was spending 50%+ CPU
# time in it - `assign`, in the same test, doesn't even show in the perf trace
when supportsCopyMem(T):
copyMem(addr tgt, unsafeAddr src, sizeof(tgt))
elif T is object|tuple:
for t, s in fields(tgt, src):
assign(t, s)
elif T is List|BitList:
assign(distinctBase tgt, distinctBase src)
elif T is seq:
tgt.setLen(src.len)
when supportsCopyMem(type(tgt[0])):
if tgt.len > 0:
copyMem(addr tgt[0], unsafeAddr src[0], sizeof(tgt[0]) * tgt.len)
else:
for i in 0..<tgt.len:
assign(tgt[i], src[i])
elif T is ref:
tgt = src
else:
unsupported T

View File

@ -176,7 +176,7 @@ proc makeBeaconBlockForHeadAndSlot*(node: BeaconNode,
# `state_transition` that takes a `StateData` instead and updates
# the block as well
doAssert v.addr == addr poolPtr.tmpState.data
poolPtr.tmpState = poolPtr.headState
assign(poolPtr.tmpState, poolPtr.headState)
var cache = get_empty_per_epoch_cache()
let message = makeBeaconBlock(