e2store: avoid uint48 (#4625)

In SSZ, `uint32` is used for offsets, effectively limiting the size of
an SSZ entry to 2**32 bytes.

Also, `uint48` isn't a valid SSZ type, so the header was not correctly
defined according to the SSZ spec - the extra 2 bytes are left for
future expansion instead.
This commit is contained in:
Jacek Sieka 2023-02-15 14:51:17 +01:00 committed by GitHub
parent 1ac7f1a47a
commit 822c339607
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 10 deletions

View File

@ -17,10 +17,15 @@ The header corresponds to an SSZ object defined as such:
```python
class Header(Container):
type: Vector[byte, 2]
length: uint48
length: uint32
reserved: uint16
```
The `length` is the length of the data that follows the header, not including the length of the header itself. For example, the entry with header type `[0x22, 0x32]`, the length `4` and the bytes `[0x01, 0x02, 0x03, 0x04]` will be stored as the byte sequence `[0x22, 0x32, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]`.
The `length` is the length of the data that follows the header, not including the length of the header itself.
The `reserved` field must be set to `0`.
For example, an entry with header type `[0x22, 0x32]`, length `4` and the content `[0x01, 0x02, 0x03, 0x04]` will be stored as the byte sequence `[0x22, 0x32, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]`.
`.e2s` files may freely be concatenated, and may contain out-of-order records.
@ -40,7 +45,7 @@ def read_entry(f):
if not header: return None
typ = header[0:2] # 2 bytes of type
dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
dlen = struct.unpack("<I", header[2:6])[0] # 4 bytes of unsigned little-endian length
data = f.read(dlen)
@ -67,7 +72,6 @@ def print_stats(name):
print("type", k.hex(), "bytes", v[0], "count", v[1], "average", v[0] / v[1])
```
## Writing
`e2s` files are written record-by-record starting with a version record. Files may be concatenated freely, meaning that the version record may appear multiple times in the file and a single file may have multiple versions.
@ -340,3 +344,9 @@ In the end though, the applied block state is used throughout in the protocol -
## How can block roots be accessed without computing them?
Each era file contains a full `BeaconState` object whose `block_roots` field corresponds to the block contents of the file. The easiest way to access the roots is to read the "header" of the `BeaconState` without reading all fields.
## Why is length `uint32`?
Offsets in `SSZ` are `uint32` thus from a practical point of view, any one SSZ object may generally not exceed that size.
A future entry type can introduce chunking should larger entries be needed, or spill the remaining size bytes into `reserved`, effectively turning the encoding of the length into a fictive `uint48` type.

View File

@ -20,8 +20,9 @@ const
SnappyBeaconState* = [byte 0x02, 0x00]
TypeFieldLen = 2
LengthFieldLen = 6
HeaderFieldLen = TypeFieldLen + LengthFieldLen
LengthFieldLen = 4
ReservedFieldLen = 2
HeaderFieldLen = TypeFieldLen + LengthFieldLen + ReservedFieldLen
FAR_FUTURE_ERA* = Era(not 0'u64)
@ -71,10 +72,14 @@ proc append(f: IoHandle, data: openArray[byte]): Result[void, string] =
ok()
proc appendHeader(f: IoHandle, typ: Type, dataLen: int): Result[int64, string] =
if dataLen.uint64 > uint32.high:
return err("entry does not fit 32-bit length")
let start = ? getFilePos(f).mapErr(toString)
? append(f, typ)
? append(f, toBytesLE(dataLen.uint64).toOpenArray(0, 5))
? append(f, toBytesLE(dataLen.uint32))
? append(f, [0'u8, 0'u8])
ok(start)
@ -137,9 +142,9 @@ proc readHeader(f: IoHandle): Result[Header, string] =
typ: Type
discard typ.copyFrom(buf)
# Cast safe because we had only 6 bytes of length data
# Cast safe because we had only 4 bytes of length data
let
len = cast[int64](uint64.fromBytesLE(buf.toOpenArray(2, 9)))
len = cast[int64](uint32.fromBytesLE(buf.toOpenArray(2, 5)))
# No point reading these..
if len > int.high(): return err("header length exceeds int.high")

View File

@ -5,7 +5,7 @@ def read_entry(f):
if not header: return (None, None)
typ = header[0:2] # 2 bytes of type
dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
dlen = struct.unpack("<I", header[2:6])[0] # 4 bytes of unsigned little-endian length
data = f.read(dlen)