From e8379389e728ffcd63d2b1333298c359372b7e95 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Wed, 26 Jul 2023 09:47:46 +0200 Subject: [PATCH] speed up state/block loading (#5207) * speed up state/block loading When loading blocks and states from db/era, we currently redundantly check their CRC32 - for a state, this costs 50ms of loading time presently (110mb uncompressed size) on a decent laptop. * remove `maxDecompressedDbRecordSize` - not actually used on recent data since we store the framed format - also, we're in luck: we blew past the limit quite some time ago * fix obsolete exception-based error checking * avoid `zeroMem` when reading from era store see https://github.com/status-im/nim-snappy/pull/22 for benchmarks * bump snappy --- beacon_chain/beacon_chain_db.nim | 42 ++++++++++++-------------------- beacon_chain/era_db.nim | 23 ++++++++++++----- ncli/e2store.nim | 3 ++- ncli/ncli_db.nim | 2 +- vendor/nim-snappy | 2 +- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/beacon_chain/beacon_chain_db.nim b/beacon_chain/beacon_chain_db.nim index 97b913f48..6672bc624 100644 --- a/beacon_chain/beacon_chain_db.nim +++ b/beacon_chain/beacon_chain_db.nim @@ -203,13 +203,6 @@ type slot*: Slot parent_root*: Eth2Digest -const - # The largest object we're saving is the BeaconState, and by far, the largest - # part of it is the validator - each validator takes up at least 129 bytes - # in phase0, which means 100k validators is >12mb - in addition to this, - # there are several MB of hashes. - maxDecompressedDbRecordSize = 64*1024*1024 - # Subkeys essentially create "tables" within the key-value store by prefixing # each entry with a table id @@ -628,7 +621,7 @@ proc decodeSSZ*[T](data: openArray[byte], output: var T): bool = proc decodeSnappySSZ[T](data: openArray[byte], output: var T): bool = try: - let decompressed = snappy.decode(data, maxDecompressedDbRecordSize) + let decompressed = snappy.decode(data) readSszBytes(decompressed, output, updateRoot = false) true except SerializationError as e: @@ -640,7 +633,7 @@ proc decodeSnappySSZ[T](data: openArray[byte], output: var T): bool = proc decodeSZSSZ[T](data: openArray[byte], output: var T): bool = try: - let decompressed = decodeFramed(data) + let decompressed = decodeFramed(data, checkIntegrity = false) readSszBytes(decompressed, output, updateRoot = false) true except CatchableError as e: @@ -989,8 +982,8 @@ proc getPhase0BlockSSZ( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.decode(data, maxDecompressedDbRecordSize) - except CatchableError: success = false + dataPtr[] = snappy.decode(data) + success = dataPtr[].len > 0 db.backend.get(subkey(phase0.SignedBeaconBlock, key), decode).expectDb() and success @@ -999,9 +992,8 @@ proc getPhase0BlockSZ( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.encodeFramed( - snappy.decode(data, maxDecompressedDbRecordSize)) - except CatchableError: success = false + dataPtr[] = snappy.encodeFramed(snappy.decode(data)) + success = dataPtr[].len > 0 db.backend.get(subkey(phase0.SignedBeaconBlock, key), decode).expectDb() and success @@ -1012,8 +1004,8 @@ proc getBlockSSZ*( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.decode(data, maxDecompressedDbRecordSize) - except CatchableError: success = false + dataPtr[] = snappy.decode(data) + success = dataPtr[].len > 0 db.blocks[ConsensusFork.Phase0].get(key.data, decode).expectDb() and success or db.v0.getPhase0BlockSSZ(key, data) @@ -1023,8 +1015,8 @@ proc getBlockSSZ*( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.decode(data, maxDecompressedDbRecordSize) - except CatchableError: success = false + dataPtr[] = snappy.decode(data) + success = dataPtr[].len > 0 db.blocks[T.toFork].get(key.data, decode).expectDb() and success proc getBlockSSZ*[ @@ -1034,8 +1026,8 @@ proc getBlockSSZ*[ let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = decodeFramed(data) - except CatchableError: success = false + dataPtr[] = decodeFramed(data, checkIntegrity = false) + success = dataPtr[].len > 0 db.blocks[T.toFork].get(key.data, decode).expectDb() and success proc getBlockSSZ*( @@ -1067,9 +1059,8 @@ proc getBlockSZ*( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.encodeFramed( - snappy.decode(data, maxDecompressedDbRecordSize)) - except CatchableError: success = false + dataPtr[] = snappy.encodeFramed(snappy.decode(data)) + success = dataPtr[].len > 0 db.blocks[ConsensusFork.Phase0].get(key.data, decode).expectDb() and success or db.v0.getPhase0BlockSZ(key, data) @@ -1079,9 +1070,8 @@ proc getBlockSZ*( let dataPtr = addr data # Short-lived var success = true func decode(data: openArray[byte]) = - try: dataPtr[] = snappy.encodeFramed( - snappy.decode(data, maxDecompressedDbRecordSize)) - except CatchableError: success = false + dataPtr[] = snappy.encodeFramed(snappy.decode(data)) + success = dataPtr[].len > 0 db.blocks[T.toFork].get(key.data, decode).expectDb() and success proc getBlockSZ*[ diff --git a/beacon_chain/era_db.nim b/beacon_chain/era_db.nim index dcea515a3..b84ce182c 100644 --- a/beacon_chain/era_db.nim +++ b/beacon_chain/era_db.nim @@ -106,12 +106,20 @@ proc getBlockSSZ*( f: EraFile, slot: Slot, bytes: var seq[byte]): Result[void, string] = var tmp: seq[byte] ? f.getBlockSZ(slot, tmp) + let + len = uncompressedLenFramed(tmp).valueOr: + return err("Cannot read uncompressed length, era file corrupt?") - try: - bytes = decodeFramed(tmp) - ok() - except CatchableError as exc: - err(exc.msg) + if len > int.high.uint64: + return err("Invalid uncompressed size") + + bytes = newSeqUninitialized[byte](len) + + # Where it matters, we will integrity-check the data with SSZ - no + # need to waste cycles on crc32 + discard uncompressFramed(tmp, bytes, checkIntegrity = false).valueOr: + return err("Block failed to decompress, era file corrupt?") + ok() proc getStateSZ*( f: EraFile, slot: Slot, bytes: var seq[byte]): Result[void, string] = @@ -151,7 +159,10 @@ proc getStateSSZ*( else: len bytes = newSeqUninitialized[byte](wanted) - discard uncompressFramed(tmp, bytes).valueOr: + + # Where it matters, we will integrity-check the data with SSZ - no + # need to waste cycles on crc32 + discard uncompressFramed(tmp, bytes, checkIntegrity = false).valueOr: return err("State failed to decompress, era file corrupt?") ok() diff --git a/ncli/e2store.nim b/ncli/e2store.nim index 8763c5669..ded342350 100644 --- a/ncli/e2store.nim +++ b/ncli/e2store.nim @@ -166,7 +166,8 @@ proc readRecord*(f: IoHandle, data: var seq[byte]): Result[Header, string] = if header.len > 0: ? f.checkBytesLeft(header.len) - data.setLen(header.len) + if data.len != header.len: + data = newSeqUninitialized[byte](header.len) ? readFileExact(f, data) diff --git a/ncli/ncli_db.nim b/ncli/ncli_db.nim index 702939dbf..612acf183 100644 --- a/ncli/ncli_db.nim +++ b/ncli/ncli_db.nim @@ -620,7 +620,7 @@ proc cmdImportEra(conf: DbConf, cfg: RuntimeConfig) = if header.typ == SnappyBeaconBlock: withTimer(timers[tBlock]): - let uncompressed = decodeFramed(data) + let uncompressed = decodeFramed(data, checkIntegrity = false) let blck = try: readSszForkedSignedBeaconBlock(cfg, uncompressed) except CatchableError as exc: error "Invalid snappy block", msg = exc.msg, file diff --git a/vendor/nim-snappy b/vendor/nim-snappy index 6da3e98f5..ecbcee1d1 160000 --- a/vendor/nim-snappy +++ b/vendor/nim-snappy @@ -1 +1 @@ -Subproject commit 6da3e98f5420a0e86a1844b928cf38c8b656e2bc +Subproject commit ecbcee1d100140db6cb9c13d753d739fb5102fa3