e2store: avoid uint48 (#4625)
In SSZ, `uint32` is used for offsets, effectively limiting the size of an SSZ entry to 2**32 bytes. Also, `uint48` isn't a valid SSZ type, so the header was not correctly defined according to the SSZ spec - the extra 2 bytes are left for future expansion instead.
This commit is contained in:
parent
1ac7f1a47a
commit
822c339607
|
@ -17,10 +17,15 @@ The header corresponds to an SSZ object defined as such:
|
||||||
```python
|
```python
|
||||||
class Header(Container):
|
class Header(Container):
|
||||||
type: Vector[byte, 2]
|
type: Vector[byte, 2]
|
||||||
length: uint48
|
length: uint32
|
||||||
|
reserved: uint16
|
||||||
```
|
```
|
||||||
|
|
||||||
The `length` is the length of the data that follows the header, not including the length of the header itself. For example, the entry with header type `[0x22, 0x32]`, the length `4` and the bytes `[0x01, 0x02, 0x03, 0x04]` will be stored as the byte sequence `[0x22, 0x32, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]`.
|
The `length` is the length of the data that follows the header, not including the length of the header itself.
|
||||||
|
|
||||||
|
The `reserved` field must be set to `0`.
|
||||||
|
|
||||||
|
For example, an entry with header type `[0x22, 0x32]`, length `4` and the content `[0x01, 0x02, 0x03, 0x04]` will be stored as the byte sequence `[0x22, 0x32, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]`.
|
||||||
|
|
||||||
`.e2s` files may freely be concatenated, and may contain out-of-order records.
|
`.e2s` files may freely be concatenated, and may contain out-of-order records.
|
||||||
|
|
||||||
|
@ -40,7 +45,7 @@ def read_entry(f):
|
||||||
if not header: return None
|
if not header: return None
|
||||||
|
|
||||||
typ = header[0:2] # 2 bytes of type
|
typ = header[0:2] # 2 bytes of type
|
||||||
dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
|
dlen = struct.unpack("<I", header[2:6])[0] # 4 bytes of unsigned little-endian length
|
||||||
|
|
||||||
data = f.read(dlen)
|
data = f.read(dlen)
|
||||||
|
|
||||||
|
@ -67,7 +72,6 @@ def print_stats(name):
|
||||||
print("type", k.hex(), "bytes", v[0], "count", v[1], "average", v[0] / v[1])
|
print("type", k.hex(), "bytes", v[0], "count", v[1], "average", v[0] / v[1])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Writing
|
## Writing
|
||||||
|
|
||||||
`e2s` files are written record-by-record starting with a version record. Files may be concatenated freely, meaning that the version record may appear multiple times in the file and a single file may have multiple versions.
|
`e2s` files are written record-by-record starting with a version record. Files may be concatenated freely, meaning that the version record may appear multiple times in the file and a single file may have multiple versions.
|
||||||
|
@ -340,3 +344,9 @@ In the end though, the applied block state is used throughout in the protocol -
|
||||||
## How can block roots be accessed without computing them?
|
## How can block roots be accessed without computing them?
|
||||||
|
|
||||||
Each era file contains a full `BeaconState` object whose `block_roots` field corresponds to the block contents of the file. The easiest way to access the roots is to read the "header" of the `BeaconState` without reading all fields.
|
Each era file contains a full `BeaconState` object whose `block_roots` field corresponds to the block contents of the file. The easiest way to access the roots is to read the "header" of the `BeaconState` without reading all fields.
|
||||||
|
|
||||||
|
## Why is length `uint32`?
|
||||||
|
|
||||||
|
Offsets in `SSZ` are `uint32` thus from a practical point of view, any one SSZ object may generally not exceed that size.
|
||||||
|
|
||||||
|
A future entry type can introduce chunking should larger entries be needed, or spill the remaining size bytes into `reserved`, effectively turning the encoding of the length into a fictive `uint48` type.
|
||||||
|
|
|
@ -20,8 +20,9 @@ const
|
||||||
SnappyBeaconState* = [byte 0x02, 0x00]
|
SnappyBeaconState* = [byte 0x02, 0x00]
|
||||||
|
|
||||||
TypeFieldLen = 2
|
TypeFieldLen = 2
|
||||||
LengthFieldLen = 6
|
LengthFieldLen = 4
|
||||||
HeaderFieldLen = TypeFieldLen + LengthFieldLen
|
ReservedFieldLen = 2
|
||||||
|
HeaderFieldLen = TypeFieldLen + LengthFieldLen + ReservedFieldLen
|
||||||
|
|
||||||
FAR_FUTURE_ERA* = Era(not 0'u64)
|
FAR_FUTURE_ERA* = Era(not 0'u64)
|
||||||
|
|
||||||
|
@ -71,10 +72,14 @@ proc append(f: IoHandle, data: openArray[byte]): Result[void, string] =
|
||||||
ok()
|
ok()
|
||||||
|
|
||||||
proc appendHeader(f: IoHandle, typ: Type, dataLen: int): Result[int64, string] =
|
proc appendHeader(f: IoHandle, typ: Type, dataLen: int): Result[int64, string] =
|
||||||
|
if dataLen.uint64 > uint32.high:
|
||||||
|
return err("entry does not fit 32-bit length")
|
||||||
|
|
||||||
let start = ? getFilePos(f).mapErr(toString)
|
let start = ? getFilePos(f).mapErr(toString)
|
||||||
|
|
||||||
? append(f, typ)
|
? append(f, typ)
|
||||||
? append(f, toBytesLE(dataLen.uint64).toOpenArray(0, 5))
|
? append(f, toBytesLE(dataLen.uint32))
|
||||||
|
? append(f, [0'u8, 0'u8])
|
||||||
|
|
||||||
ok(start)
|
ok(start)
|
||||||
|
|
||||||
|
@ -137,9 +142,9 @@ proc readHeader(f: IoHandle): Result[Header, string] =
|
||||||
typ: Type
|
typ: Type
|
||||||
discard typ.copyFrom(buf)
|
discard typ.copyFrom(buf)
|
||||||
|
|
||||||
# Cast safe because we had only 6 bytes of length data
|
# Cast safe because we had only 4 bytes of length data
|
||||||
let
|
let
|
||||||
len = cast[int64](uint64.fromBytesLE(buf.toOpenArray(2, 9)))
|
len = cast[int64](uint32.fromBytesLE(buf.toOpenArray(2, 5)))
|
||||||
|
|
||||||
# No point reading these..
|
# No point reading these..
|
||||||
if len > int.high(): return err("header length exceeds int.high")
|
if len > int.high(): return err("header length exceeds int.high")
|
||||||
|
|
|
@ -5,7 +5,7 @@ def read_entry(f):
|
||||||
if not header: return (None, None)
|
if not header: return (None, None)
|
||||||
|
|
||||||
typ = header[0:2] # 2 bytes of type
|
typ = header[0:2] # 2 bytes of type
|
||||||
dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
|
dlen = struct.unpack("<I", header[2:6])[0] # 4 bytes of unsigned little-endian length
|
||||||
|
|
||||||
data = f.read(dlen)
|
data = f.read(dlen)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue