This commit is contained in:
Jacek Sieka 2024-06-03 09:04:05 +02:00
parent 3fd8bbaaab
commit 02b6400bca
No known key found for this signature in database
GPG Key ID: A1B09461ABB656B8
1 changed files with 282 additions and 131 deletions

View File

@ -11,7 +11,9 @@
{.push raises: [].}
import
std/[bitops, sequtils, sets, tables],
std/[sequtils, sets, tables],
stew/bitops2,
stint,
eth/[common, trie/nibbles],
results,
stew/endians2,
@ -21,14 +23,99 @@ import
# Private helper
# ------------------------------------------------------------------------------
proc load64(data: openArray[byte]; start: var int): Result[uint64,AristoError] =
type
Vube* = object
# A big-endian variation of https://john-millikin.com/vu128-efficient-variable-length-integers
# to preserve sort order which broadly helps maintain locality in databases
# See also https://sqlite.org/src4/doc/trunk/www/varint.wiki for more on this
# topic.
#
# The format uses at most 9 bytes for a uint64. The encoding proceeds as follows:
#
# If the number is <= 247, a single byte encodes the value - otherwise, we
# store the length offset by 247 in the first byte and the shortest possible
# big-endian encoding in the rest. uint256 follows the same pattern naturally.
#
# The encoding uses at most 1 byte more than the original size of the integer
# and could plausibly therefor encode integers up to 255 bytes long.
VubeInt* = uint64|UInt256
func singleByte(_: type Vube, I: type uint64): byte =
byte(255 - sizeof(I))
func singleByte(_: type Vube, I: type UInt256): byte =
byte(255 - sizeof(I))
func singleHigh(_: type Vube, I: type uint64): uint64 =
uint64(Vube.singleByte(I))
func singleHigh(_: type Vube, I: type UInt256): UInt256 =
u256(Vube.singleByte(I))
func len*(T: type Vube, x: auto): int8 =
## Returns number of bytes required to encode integer ``x`` as vube.
if x <= Vube.singleHigh(typeof(x)):
1
else:
sizeof(x) + 1
func maxLen*(_: type Vube, I: type): int8 =
## The maximum number of bytes needed to encode any value of type I
Vube.len(I.high())
type VubeBuf*[I: VubeInt] = object
data*: array[maxLen(Vube, I), byte]
len*: int8
template toOpenArray[T: VubeBuf](b: T): openArray[byte] =
b.data.toOpenArray(0, b.len - 1)
proc toBytes*[I: VubeInt](data: I, _: type Vube): VubeBuf[I] =
const singleHigh = Vube.singleHigh(I)
# Maximum number that we can encode with a single byte - the other values
# are used to encode the length
if data <= singleHigh:
result.data[0] = when data is StUint:
data.truncate(uint8)
else:
uint8(data)
result.len = 1
else:
let
be = data.toBytesBE()
len = sizeof(data) - (data.leadingZeros() shr 3)
result.data[0] = Vube.singleByte(I) + byte(len)
result.data[1 .. len] = be.toOpenArray(be.high - len + 1, be.high)
result.len = int8(len + 1)
proc fromBytes*(
I: type VubeInt, data: openArray[byte], _: type Vube
): tuple[val: I, len: int8] =
const singleHigh = Vube.singleByte(I)
let code = data[0]
if code <= singleHigh:
when I is StUint:
(u256(code), int8 1)
else:
(uint64(code), int8 1)
else:
let len = int8(code - singleHigh)
var tmp: array[sizeof(I), byte]
tmp[tmp.high - len + 1 .. tmp.high] = data.toOpenArray(1, len)
(I.fromBytesBE(tmp), len)
proc load64(data: openArray[byte], start: var int): Result[uint64, AristoError] =
if data.len < start + 9:
return err(DeblobPayloadTooShortInt64)
let val = uint64.fromBytesBE(data.toOpenArray(start, start + 7))
start += 8
ok val
proc load256(data: openArray[byte]; start: var int): Result[UInt256,AristoError] =
proc load256(data: openArray[byte], start: var int): Result[UInt256, AristoError] =
if data.len < start + 33:
return err(DeblobPayloadTooShortInt256)
let val = UInt256.fromBytesBE(data.toOpenArray(start, start + 31))
@ -39,6 +126,18 @@ proc load256(data: openArray[byte]; start: var int): Result[UInt256,AristoError]
# Public functions
# ------------------------------------------------------------------------------
template blobify*(v: uint64): untyped =
v.toBytes(Vube)
template blobify*(v: StUint): untyped =
v.toBytes(Vube)
proc blobifyTo2*(v: uint64, data: var Blob) =
data.add v.toBytes(Vube).toOpenArray
proc blobifyTo2*(v: UInt256, data: var Blob) =
data.add v.toBytes(Vube).toOpenArray
proc blobifyTo*(pyl: PayloadRef, data: var Blob) =
if pyl.isNil:
return
@ -49,7 +148,6 @@ proc blobifyTo*(pyl: PayloadRef, data: var Blob) =
of RlpData:
data &= pyl.rlpBlob
data &= @[0x6a.byte]
of AccountData:
var mask: byte
if 0 < pyl.account.nonce:
@ -73,34 +171,91 @@ proc blobifyTo*(pyl: PayloadRef, data: var Blob) =
data &= [mask]
proc blobifyTo*(vtx: VertexRef; data: var Blob): Result[void,AristoError] =
## This function serialises the vertex argument to a database record.
## Contrary to RLP based serialisation, these records aim to align on
## fixed byte boundaries.
## ::
## Branch:
## uint64, ... -- list of up to 16 child vertices lookup keys
## uint16 -- index bitmap
## 0x08 -- marker(8)
##
## Extension:
## uint64 -- child vertex lookup key
## Blob -- hex encoded partial path (at least one byte)
## 0x80 + xx -- marker(2) + pathSegmentLen(6)
##
## Leaf:
## Blob -- opaque leaf data payload (might be zero length)
## Blob -- hex encoded partial path (at least one byte)
## 0xc0 + yy -- marker(2) + partialPathLen(6)
##
## For a branch record, the bytes of the `access` array indicate the position
## of the Patricia Trie vertex reference. So the `vertexID` with index `n` has
## ::
## 8 * n * ((access shr (n * 4)) and 15)
##
proc blobifyTo2*(pyl: PayloadRef, data: var Blob) =
if pyl.isNil:
return
case pyl.pType
of RawData:
data &= pyl.rawBlob
data &= [0x6b.byte]
of RlpData:
data &= pyl.rlpBlob
data &= @[0x6a.byte]
of AccountData:
var mask: byte
if 0 < pyl.account.nonce:
mask = mask or 0x01
data &= pyl.account.nonce.uint64.blobify().toOpenArray()
if high(uint64).u256 < pyl.account.balance:
mask = mask or 0x08
data &= pyl.account.balance.blobify().toOpenArray()
elif 0 < pyl.account.balance:
mask = mask or 0x04
data &= pyl.account.balance.truncate(uint64).uint64.blobify().toOpenArray()
if VertexID(0) < pyl.account.storageID:
mask = mask or 0x10
data &= pyl.account.storageID.uint64.blobify().toOpenArray()
if pyl.account.codeHash != VOID_CODE_HASH:
mask = mask or 0x80
data &= pyl.account.codeHash.data
data &= [mask]
proc blobifyTo2*(vtx: VertexRef, data: var Blob): Result[void, AristoError] =
if not vtx.isValid:
return err(BlobifyNilVertex)
case vtx.vType:
case vtx.vType
of Branch:
var
access = 0u16
pos = data.len
for n in 0 .. 15:
if vtx.bVid[n].isValid:
access = access or (1u16 shl n)
data &= vtx.bVid[n].uint64.blobify().toOpenArray()
if data.len - pos < 16:
return err(BlobifyBranchMissingRefs)
data &= access.toBytesBE()
data &= [0x08u8]
of Extension:
let
pSegm = vtx.ePfx.hexPrefixEncode(isleaf = false)
psLen = pSegm.len.byte
if psLen == 0 or 33 < psLen:
return err(BlobifyExtPathOverflow)
if not vtx.eVid.isValid:
return err(BlobifyExtMissingRefs)
data &= vtx.eVid.uint64.blobify.toOpenArray()
data &= pSegm
data &= [0x80u8 or psLen]
of Leaf:
let
pSegm = vtx.lPfx.hexPrefixEncode(isleaf = true)
psLen = pSegm.len.byte
if psLen == 0 or 33 < psLen:
return err(BlobifyLeafPathOverflow)
vtx.lData.blobifyTo(data)
data &= pSegm
data &= [0xC0u8 or psLen]
ok()
proc blobifyTo2*(vGen: openArray[VertexID], data: var Blob) =
## This function serialises a list of vertex IDs.
## ::
## uint64, ... -- list of IDs
## 0x7c -- marker(8)
##
for w in vGen:
data &= w.uint64.blobify().toOpenArray()
data.add 0x7Cu8
import snappy
proc blobifyTo*(vtx: VertexRef, data: var Blob): Result[void, AristoError] =
if not vtx.isValid:
return err(BlobifyNilVertex)
case vtx.vType
of Branch:
var
access = 0u16
@ -133,8 +288,23 @@ proc blobifyTo*(vtx: VertexRef; data: var Blob): Result[void,AristoError] =
vtx.lData.blobifyTo(data)
data &= pSegm
data &= [0xC0u8 or psLen]
ok()
{.noSideEffect.}:
var bytes {.global.}: int
var bytes2 {.global.}: int
var bytes3 {.global.}: int
var bytes4 {.global.}: int
var count {.global.}: int
inc count
bytes += data.len()
var data2: seq[byte]
discard blobifyTo2(vtx, data2)
bytes2 += data2.len
bytes3 += snappy.encode(data).len
bytes4 += snappy.encode(data2).len
if count mod 100 == 0:
debugEcho [count, bytes, bytes2, bytes3, bytes4], " ", [bytes.float / count.float, bytes2.float / bytes.float, bytes4.float / bytes3.float]
ok()
proc blobify*(vtx: VertexRef): Result[Blob, AristoError] =
## Variant of `blobify()`
@ -142,7 +312,7 @@ proc blobify*(vtx: VertexRef): Result[Blob, AristoError] =
?vtx.blobifyTo data
ok(move(data))
proc blobifyTo*(vGen: openArray[VertexID]; data: var Blob) =
proc blobifyTo*(vGen: openArray[VertexID], data: var Blob) =
## This function serialises a list of vertex IDs.
## ::
## uint64, ... -- list of IDs
@ -156,7 +326,7 @@ proc blobify*(vGen: openArray[VertexID]): Blob =
## Variant of `blobify()`
vGen.blobifyTo result
proc blobifyTo*(lSst: SavedState; data: var Blob) =
proc blobifyTo*(lSst: SavedState, data: var Blob) =
## Serialise a last saved state record
data.setLen(73)
(addr data[0]).copyMem(unsafeAddr lSst.src.data[0], 32)
@ -169,8 +339,7 @@ proc blobify*(lSst: SavedState): Blob =
## Variant of `blobify()`
lSst.blobifyTo result
proc blobifyTo*(filter: FilterRef; data: var Blob): Result[void,AristoError] =
proc blobifyTo*(filter: FilterRef, data: var Blob): Result[void, AristoError] =
## This function serialises an Aristo DB filter object
## ::
## uint64 -- filter ID
@ -191,7 +360,10 @@ proc blobifyTo*(filter: FilterRef; data: var Blob): Result[void,AristoError] =
##
func blobify(lid: HashKey): Blob =
let n = lid.len
if n < 32: @[n.byte] & @(lid.data) & 0u8.repeat(31 - n) else: @(lid.data)
if n < 32:
@[n.byte] & @(lid.data) & 0u8.repeat(31 - n)
else:
@(lid.data)
if not filter.isValid:
return err(BlobifyNilFilter)
@ -270,7 +442,7 @@ proc blobify*(filter: FilterRef): Result[Blob, AristoError] =
?filter.blobifyTo data
ok move(data)
proc blobifyTo*(vFqs: openArray[(QueueID,QueueID)]; data: var Blob) =
proc blobifyTo*(vFqs: openArray[(QueueID, QueueID)], data: var Blob) =
## This function serialises a list of filter queue IDs.
## ::
## uint64, ... -- list of IDs
@ -287,10 +459,7 @@ proc blobify*(vFqs: openArray[(QueueID,QueueID)]): Blob =
# -------------
proc deblobify(
data: openArray[byte];
pyl: var PayloadRef;
): Result[void,AristoError] =
proc deblobify(data: openArray[byte], pyl: var PayloadRef): Result[void, AristoError] =
if data.len == 0:
pyl = PayloadRef(pType: RawData)
return ok()
@ -307,7 +476,7 @@ proc deblobify(
pAcc = PayloadRef(pType: AccountData)
start = 0
case mask and 0x03:
case mask and 0x03
of 0x00:
discard
of 0x01:
@ -315,7 +484,7 @@ proc deblobify(
else:
return err(DeblobNonceLenUnsupported)
case mask and 0x0c:
case mask and 0x0c
of 0x00:
discard
of 0x04:
@ -325,7 +494,7 @@ proc deblobify(
else:
return err(DeblobBalanceLenUnsupported)
case mask and 0x30:
case mask and 0x30
of 0x00:
discard
of 0x10:
@ -333,7 +502,7 @@ proc deblobify(
else:
return err(DeblobStorageLenUnsupported)
case mask and 0xc0:
case mask and 0xc0
of 0x00:
pAcc.account.codeHash = VOID_CODE_HASH
of 0x80:
@ -346,13 +515,15 @@ proc deblobify(
pyl = pAcc
ok()
proc deblobify*(record: openArray[byte]; vtx: var VertexRef): Result[void,AristoError] =
proc deblobify*(
record: openArray[byte], vtx: var VertexRef
): Result[void, AristoError] =
## De-serialise a data record encoded with `blobify()`. The second
## argument `vtx` can be `nil`.
if record.len < 3: # minimum `Leaf` record
return err(DeblobVtxTooShort)
case record[^1] shr 6:
case record[^1] shr 6
of 0: # `Branch` vertex
if record[^1] != 0x08u8:
return err(DeblobUnknown)
@ -371,15 +542,12 @@ proc deblobify*(record: openArray[byte]; vtx: var VertexRef): Result[void,Aristo
while access != 0:
if maxOffset < offs:
return err(DeblobBranchInxOutOfRange)
let n = access.firstSetBit - 1
let n = access.firstOne - 1
access.clearBit n
vtxList[n] = (uint64.fromBytesBE record.toOpenArray(offs, offs + 7)).VertexID
offs += 8
# End `while`
vtx = VertexRef(
vType: Branch,
bVid: vtxList)
vtx = VertexRef(vType: Branch, bVid: vtxList)
of 2: # `Extension` vertex
let
sLen = record[^1].int and 0x3f # length of path segment
@ -394,8 +562,8 @@ proc deblobify*(record: openArray[byte]; vtx: var VertexRef): Result[void,Aristo
vtx = VertexRef(
vType: Extension,
eVid: (uint64.fromBytesBE record.toOpenArray(0, 7)).VertexID,
ePfx: pathSegment)
ePfx: pathSegment,
)
of 3: # `Leaf` vertex
let
sLen = record[^1].int and 0x3f # length of path segment
@ -408,25 +576,19 @@ proc deblobify*(record: openArray[byte]; vtx: var VertexRef): Result[void,Aristo
return err(DeblobLeafGotExtPrefix)
var pyl: PayloadRef
?record.toOpenArray(0, pLen - 1).deblobify(pyl)
vtx = VertexRef(
vType: Leaf,
lPfx: pathSegment,
lData: pyl)
vtx = VertexRef(vType: Leaf, lPfx: pathSegment, lData: pyl)
else:
return err(DeblobUnknown)
ok()
proc deblobify*(data: openArray[byte]; T: type VertexRef): Result[T,AristoError] =
proc deblobify*(data: openArray[byte], T: type VertexRef): Result[T, AristoError] =
## Variant of `deblobify()` for vertex deserialisation.
var vtx = T(nil) # will be auto-initialised
?data.deblobify vtx
ok vtx
proc deblobify*(
data: openArray[byte];
vGen: var seq[VertexID];
data: openArray[byte], vGen: var seq[VertexID]
): Result[void, AristoError] =
## De-serialise the data record encoded with `blobify()` into the vertex ID
## generator argument `vGen`.
@ -442,18 +604,14 @@ proc deblobify*(
vGen.add (uint64.fromBytesBE data.toOpenArray(w, w + 7)).VertexID
ok()
proc deblobify*(
data: openArray[byte];
T: type seq[VertexID];
): Result[T,AristoError] =
proc deblobify*(data: openArray[byte], T: type seq[VertexID]): Result[T, AristoError] =
## Variant of `deblobify()` for deserialising the vertex ID generator state
var vGen: T
?data.deblobify vGen
ok move(vGen)
proc deblobify*(
data: openArray[byte];
lSst: var SavedState;
data: openArray[byte], lSst: var SavedState
): Result[void, AristoError] =
## De-serialise the last saved state data record previously encoded with
## `blobify()`.
@ -466,24 +624,20 @@ proc deblobify*(
lSst.serial = uint64.fromBytesBE data[64 .. 72]
ok()
proc deblobify*(
data: openArray[byte];
T: type SavedState;
): Result[T,AristoError] =
proc deblobify*(data: openArray[byte], T: type SavedState): Result[T, AristoError] =
## Variant of `deblobify()` for deserialising a last saved state data record
var lSst: T
?data.deblobify lSst
ok move(lSst)
proc deblobify*(data: Blob; filter: var FilterRef): Result[void,AristoError] =
proc deblobify*(data: Blob, filter: var FilterRef): Result[void, AristoError] =
## De-serialise an Aristo DB filter object
if data.len < 80: # minumum length 80 for an empty filter
return err(DeblobFilterTooShort)
if data[^1] != 0x7d:
return err(DeblobWrongType)
func deblob(data: openArray[byte]; shortKey: bool): Result[HashKey,void] =
func deblob(data: openArray[byte], shortKey: bool): Result[HashKey, void] =
if shortKey:
HashKey.fromBytes data.toOpenArray(1, min(int data[0], 31))
else:
@ -512,7 +666,8 @@ proc deblobify*(data: Blob; filter: var FilterRef): Result[void,AristoError] =
let
keyFlag = data[offs] shr 6
vtxFlag = ((uint32.fromBytesBE data.toOpenArray(offs, offs+3)) and 0x3fff_ffff).int
vtxFlag =
((uint32.fromBytesBE data.toOpenArray(offs, offs + 3)) and 0x3fff_ffff).int
vLen = if vtxFlag == 0x3fff_ffff: 0 else: vtxFlag
if keyFlag == 0 and vtxFlag == 0:
return err(DeblobFilterTrpVtxSizeGarbled) # no blind records
@ -545,15 +700,14 @@ proc deblobify*(data: Blob; filter: var FilterRef): Result[void,AristoError] =
filter = f
ok()
proc deblobify*(data: Blob; T: type FilterRef): Result[T,AristoError] =
proc deblobify*(data: Blob, T: type FilterRef): Result[T, AristoError] =
## Variant of `deblobify()` for deserialising an Aristo DB filter object
var filter: T
?data.deblobify filter
ok filter
proc deblobify*(
data: Blob;
vFqs: var seq[(QueueID,QueueID)];
data: Blob, vFqs: var seq[(QueueID, QueueID)]
): Result[void, AristoError] =
## De-serialise the data record encoded with `blobify()` into a filter queue
## ID argument liet `vFqs`.
@ -572,10 +726,7 @@ proc deblobify*(
vFqs.add (a, b)
ok()
proc deblobify*(
data: Blob;
T: type seq[(QueueID,QueueID)];
): Result[T,AristoError] =
proc deblobify*(data: Blob, T: type seq[(QueueID, QueueID)]): Result[T, AristoError] =
## Variant of `deblobify()` for deserialising the vertex ID generator state
var vFqs: seq[(QueueID, QueueID)]
?data.deblobify vFqs