leb128 + bitops fixes (#66)

Leb128 is a variable-length encoding for unsigned integers that is used
in a number of contexts - in particular, wasm, dwarf and protobuf.

This is an optimized low-level implementation that unrolls the loop
reading/writing the buffer - it is suitable to use as base for a more
specific API - no memory allocations, no exceptions.

This PR also fixes bitops2 to not raise on certaing uint->int
conversions, adapting bitops to nim 1.0 conversion rules by using a cast
instead of raising on uint->int conversion
This commit is contained in:
Jacek Sieka 2020-12-15 16:07:20 +01:00 committed by GitHub
parent 46068b49ae
commit 5cf4feabea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 338 additions and 40 deletions

View File

@ -58,21 +58,15 @@ func firstOneNim(x: uint32): int =
0
else:
let k = not x + 1 # get two's complement
1 + lookup[((x and k) * 0x077CB531'u32) shr 27].int
cast[int](1 + lookup[((x and k) * 0x077CB531'u32) shr 27])
func firstOneNim(x: uint8|uint16): int = firstOneNim(x.uint32)
func firstOneNim(x: uint64): int =
## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
template convert[T](x: uint64): T =
when nimvm:
T(x and high(T))
else:
cast[T](x)
if convert[uint32](x) == 0:
32 + firstOneNim(uint32(x shr 32'u32))
if (x and uint32.high) == 0:
cast[int](32 + uint(firstOneNim(uint32(x shr 32'u32))))
else:
firstOneNim(uint32(x))
@ -88,7 +82,7 @@ func log2truncNim(x: uint8|uint16|uint32): int =
v = v or v shr 4
v = v or v shr 8
v = v or v shr 16
lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
cast[int](lookup[uint32(v * 0x07C4ACDD'u32) shr 27])
func log2truncNim(x: uint64): int =
## Quickly find the log base 2 of a 64-bit integer.
@ -105,7 +99,7 @@ func log2truncNim(x: uint64): int =
v = v or v shr 8
v = v or v shr 16
v = v or v shr 32
lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
cast[int](lookup[(v * 0x03F6EAF2CD271461'u64) shr 58])
func countOnesNim(x: uint8|uint16|uint32): int =
## Counts the set bits in integer. (also called Hamming weight.)
@ -114,7 +108,7 @@ func countOnesNim(x: uint8|uint16|uint32): int =
var v = x.uint32
v = v - ((v shr 1) and 0x55555555)
v = (v and 0x33333333) + ((v shr 2) and 0x33333333)
(((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int
cast[int](((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24)
func countOnesNim(x: uint64): int =
## Counts the set bits in integer. (also called Hamming weight.)
@ -123,7 +117,7 @@ func countOnesNim(x: uint64): int =
v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
((v * 0x0101010101010101'u64) shr 56'u64).int
cast[int]((v * 0x0101010101010101'u64) shr 56'u64)
func parityNim(x: SomeUnsignedInt): int =
# formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
@ -136,7 +130,7 @@ func parityNim(x: SomeUnsignedInt): int =
v = v xor (v shr 8)
v = v xor (v shr 4)
v = v and 0xf
((0x6996'u shr v) and 1).int
cast[int]((0x6996'u shr v) and 1)
when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
@ -158,24 +152,26 @@ when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
func countOnesBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(culonglong):
builtin_popcountll(x.culonglong).int
cast[int](builtin_popcountll(x.culonglong))
else:
builtin_popcount(x.cuint).int
cast[int](builtin_popcount(x.cuint))
func parityBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(culonglong):
builtin_parityll(x.culonglong).int
cast[int](builtin_parityll(x.culonglong))
else:
builtin_parity(x.cuint).int
cast[int](builtin_parity(x.cuint))
func firstOneBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(clonglong):
builtin_ffsll(clonglong(x))
cast[int](builtin_ffsll(cast[clonglong](x)))
else:
builtin_ffs(x.cuint.cint)
cast[int](builtin_ffs(cast[cint](x.cuint)))
func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32)
func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v)
func log2truncBuiltin(v: uint8|uint16|uint32): int =
cast[int](31 - cast[cuint](builtin_clz(v.uint32)))
func log2truncBuiltin(v: uint64): int =
cast[int](63 - cast[cuint](builtin_clzll(v)))
elif defined(vcc) and useBuiltins:
const arch64 = sizeof(int) == 8
@ -195,19 +191,22 @@ elif defined(vcc) and useBuiltins:
func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".}
func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".}
func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int
func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int
func countOnesBuiltin(v: uint8|uint16): int =
cast[int](builtin_popcnt16(v.uint16))
func countOnesBuiltin(v: uint32): int =
cast[int](builtin_popcnt32(v))
func countOnesBuiltin(v: uint64): int =
when arch64:
builtin_popcnt64(v).int
cast[int](builtin_popcnt64(v))
else:
builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int +
builtin_popcnt32((v shr 32'u64).uint32).int
cast[int](
builtin_popcnt32((v and uint32.high).uint32) +
builtin_popcnt32((v shr 32'u64).uint32))
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == cuchar(0): def
else: index.int
else: cast[int](index)
func firstOneBuiltin(v: uint8|uint16|uint32): int =
1 + checkedScan(bitScanForward, v.culong, -1)
@ -220,8 +219,8 @@ elif defined(vcc) and useBuiltins:
template bitScan(fnc: untyped, x: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v).int == 0: 0
else: index.int
if fnc(index.addr, v) == cuchar(0): 0
else: cast[int](index)
func log2truncBuiltin(v: uint8|uint16|uint32): int =
bitScan(bitScanReverse, v.culong)
@ -253,21 +252,23 @@ elif defined(icc) and useBuiltins:
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v).int == 0: def
else: index.int
if fnc(index.addr, v) == cuchar(0): def
else: cast[int](index)
template bitScan(fnc: untyped, x: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v).int == 0: 0
else: index.int
if fnc(index.addr, v) == cuchar(0): 0
else: cast[int](index)
func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.cint).int
func countOnesBuiltin(v: uint8|uint16|uint32): int =
cast[int](builtin_popcnt32(cast[cint](v)))
func countOnesBuiltin(v: uint64): int =
when arch64:
builtin_popcnt64(v).int
cast[int](builtin_popcnt64(v))
else:
builtin_popcnt32((v and 0xFFFFFFFF'u64).cint).int +
builtin_popcnt32((v shr 32'u64).cint).int
cast[int](
builtin_popcnt32(cast[cint](v and 0xFFFFFFFF'u64)) +
builtin_popcnt32(cast[cint](v shr 32'u64)))
func firstOneBuiltin(v: uint8|uint16|uint32): int =
1 + checkedScan(bitScanForward, v.culong, -1)
@ -292,8 +293,6 @@ func countOnes*(x: SomeUnsignedInt): int {.inline.} =
##
## Example:
## doAssert countOnes(0b01000100'u8) == 2
# TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
# like GCC and MSVC
when nimvm:
countOnesNim(x)
else:

175
stew/leb128.nim Normal file
View File

@ -0,0 +1,175 @@
## Low-level little-endian base 128 variable length integer/byte converters, as
## described in https://en.wikipedia.org/wiki/LEB128 - up to 64 bits supported.
##
## The leb128 encoding is used in DWARF and WASM.
##
## It is also fully compatible with the unsigned varint encoding found in
## `protobuf` and `go`, and can thus be used directly. It's easy to build
## support for the two kinds (zig-zag and cast) of signed encodings on top.
##
## This is not the only way to encode variable length integers - variations
## exist like sqlite and utf-8 - in particular, the `std/varints` module
## implements the sqlite flavour.
##
## This implementation contains low-level primitives suitable for building
## more easy-to-use API.
##
## Exception/Defect free as of nim 1.2.
##
## Security notes:
##
## leb128 allows overlong byte sequences that decode into the same integer -
## the library decodes these sequences to a certain extent, but will stop
## decoding at the maximum length that a minimal encoder will produce. For
## example, the byte sequence `[byte 0x80, 0x80, 0x00]`, when decoded as a
## `uint64` is a valid encoding for `0` because the maximum length of a minimal
## `uint64` encoding is 10 bytes - however, because all minimal encodings
## for `uint8` fit in 2 bytes, decoding the same byte sequence as `uint8` will
## yield an error return.
##
## To be strict about overlong encodings, compare the decoded number of bytes
## with `Leb128.len(decoded_value)`.
{.push raises: [].}
import
stew/bitops2
const
# Given the truncated logarithm of a 64-bit number, how many bytes do we need
# to encode it?
lengths = block:
var v: array[64, int8]
for i in 0..<64:
v[i] = int8((i + 7) div 7)
v
type
Leb128* = object
## Type used to mark leb128 encoding helpers
# log2trunc by definition never returns values >64, thus we can remove checks
{.push checks: off.}
func len*(T: type Leb128, x: SomeUnsignedInt): int8 =
## Returns number of bytes required to encode integer ``x`` as leb128.
if x == 0: 1 # Always at least one byte!
else: lengths[log2trunc(x)]
{.pop.}
func maxLen*(T: type Leb128, I: type): int8 =
## The maximum number of bytes needed to encode any value of type I
Leb128.len(I.high)
type
Leb128Buf*[T: SomeUnsignedInt] = object
data*: array[maxLen(Leb128, T), byte] # len(data) <= 10
len*: int8 # >= 1 when holding valid leb128
template write7(next: untyped) =
# write 7 bits of data
if v > type(v)(127):
result.data[result.len] = cast[byte](v and type(v)(0xff)) or 0x80'u8
result.len += 1
v = v shr 7
next
# LebBuf size corresponds to maximum size that the type will be encoded to, thus
# there can be no out-of-bounds accesses here - likewise with the length
# arithmetic
{.push checks: off.}
func toBytes*[I: SomeUnsignedInt](v: I, T: type Leb128): Leb128Buf[I] {.noinit.} =
## Convert an unsigned integer to the smallest leb128 representation possible
##
## Example:
## 15'u16.toBytes(Leb128)
var
v = v
result.len = 0
# A clever developer would write something clever for the unrolling -
# fortunately, we have clever compilers that remove the excess unrolls based
# on size!
write7(): # 7
write7(): # 14
write7(): # 21
write7(): # 28
write7(): # 35
write7(): # 42
write7(): # 49
write7(): # 56
write7(): # 63
discard
# high bit not set since v <= 127 at this point!
result.data[result.len] = cast[byte](v and type(v)(0xff))
result.len += 1
template read7(shift: untyped) =
# Read 7 bits of data and return iff these are the last 7 bits
if (shift div 7) >= xlen:
return (I(0), 0'i8) # Not enough data - return 0 bytes read
let
b = x[shift div 7]
valb = b and 0x7f'u8 # byte without high bit
val = I(valb)
vals = val shl shift
when shift > (sizeof(val) * 8 - 7):
# Check for overflow in the "unused" bits of the byte we just read
if vals shr shift != val:
return (I(0), -cast[int8]((shift div 7) + 1))
res = res or vals
if b == valb: # High bit not set, we're done
return (res, cast[int8]((shift div 7) + 1))
func fromBytes*(
I: type SomeUnsignedInt,
x: openArray[byte],
T: type Leb128): tuple[val: I, len: int8] {.noinit.} =
## Parse a LEB128 byte sequence and return value and how many bytes were
## parsed - if parsing fails, len <= 0 will be returned - 0 when there are not
## enough bytes and -len on overflow, signalling how many bytes were parsed
let xlen = x.len()
var
res: I
read7(0)
read7(7)
read7(14)
read7(21)
read7(28)
read7(35)
read7(42)
read7(49)
read7(56)
read7(63)
(I(0), -11'i8)
{.pop.}
template toOpenArray*(v: Leb128Buf): openArray[byte] =
toOpenArray(v.data, 0, v.len - 1)
template len*(v: Leb128Buf): int8 = v.len
template `@`*(v: Leb128Buf): seq[byte] = @(v.toOpenArray())
iterator items*(v: Leb128Buf): byte =
for i in 0..<v.len: yield v.data[i]
template fromBytes*(
I: type SomeUnsignedInt,
x: Leb128Buf): tuple[val: I, len: int8] =
# x is not guaranteed to be valid, so we treat it like any other buffer!
I.fromBytes(x.toOpenArray(), Leb128)
func scan*(
I: type SomeUnsignedInt,
x: openArray[byte],
T: type Leb128): int8 {.noinit.} =
## Scan a buffer for a valid leb128-encoded value that at most fits in a
## uint64, and report how many bytes it uses
# TODO this can be done efficiently with SSE
I.fromBytes(x, Leb128).len

View File

@ -1,5 +1,25 @@
## This module implements Variable Integer `VARINT`.
{.deprecated: "use leb128 or a higher level decoder".}
# There are better variations on this module around:
# * stew/leb128 implements the core varint encoding
# * nim-protobuf-serialization and nim-libp2p contain higher-level protobuf
# varint encoding/decoding
#
# This module has a couple of problems as written:
# * Name conflict with std/varints which implements a _different_ varint
# encoding (sqlite-style)
# * the `Stream` interface in this file is underdefined (ie there's a hidden
# implicit dependency on nim-serialization - the stateful byte-by-byte
# decoder should likely be moved there instead
# * The signed integer support is biased towards casting, whereas the most
# "common" way of encoding signed integers in protobuf is "zig-zag" which
# whose support is missing - above all, biasing towards one of the two signed
# integer formats is error-prone
# * there is no detection of overlong sequences
# * overflows in high bits of nibble are not detected
import
bitops2

104
tests/test_leb128.nim Normal file
View File

@ -0,0 +1,104 @@
import
unittest, random,
../stew/[byteutils, leb128, results]
const edgeValues = {
0'u64 : "00",
1'u64 : "01",
(1'u64 shl 7) - 1'u64 : "7f",
(1'u64 shl 7) : "8001",
(1'u64 shl 7) + 1'u64 : "8101",
(1'u64 shl 14) - 1'u64 : "ff7f",
(1'u64 shl 14) : "808001",
(1'u64 shl 21) - 1'u64 : "ffff7f",
(1'u64 shl 21) : "80808001",
(1'u64 shl 28) - 1'u64 : "ffffff7f",
(1'u64 shl 28) : "8080808001",
(1'u64 shl 35) - 1'u64 : "ffffffff7f",
(1'u64 shl 35) : "808080808001",
(1'u64 shl 42) - 1'u64 : "ffffffffff7f",
(1'u64 shl 42) : "80808080808001",
(1'u64 shl 49) - 1'u64 : "ffffffffffff7f",
(1'u64 shl 49) : "8080808080808001",
(1'u64 shl 56) - 1'u64 : "ffffffffffffff7f",
(1'u64 shl 56) : "808080808080808001",
(1'u64 shl 63) - 1'u64 : "ffffffffffffffff7f",
(1'u64 shl 63) : "80808080808080808001",
0xFFFF_FFFF_FFFF_FFFF'u64 : "ffffffffffffffffff01"
}
suite "leb128":
template roundtripTest(value: typed) =
let
leb {.inject.} = value.toBytes(Leb128)
roundtripVal = type(value).fromBytes(leb.toOpenArray(), Leb128)
check:
value == roundtripVal.val
test "Success edge cases test":
for pair in edgeValues:
let (value, hex) = pair
roundtripTest value
check:
toHex(leb.toOpenArray()) == hex
test "roundtrip random values":
template testSome(T: type) =
for i in 0..10000:
# TODO nim 1.0 random casts limits to int, so anything bigger will crash
# * sigh *
# https://github.com/nim-lang/Nim/issues/16360
let
v1 = rand(T(0) .. cast[T](int.high))
roundtripTest v1
testSome(uint8)
testSome(uint16)
testSome(uint32)
testSome(uint64)
test "lengths":
const lengths = {
0'u64 : 1,
1'u64 : 1,
(1'u64 shl 7) - 1'u64 : 1,
(1'u64 shl 7) : 2,
(1'u64 shl 7) + 1'u64 : 2,
(1'u64 shl 14) - 1'u64 : 2,
(1'u64 shl 14) : 3,
(1'u64 shl 21) - 1'u64 : 3,
(1'u64 shl 21) : 4,
(1'u64 shl 28) - 1'u64 : 4,
(1'u64 shl 28) : 5,
(1'u64 shl 35) - 1'u64 : 5,
(1'u64 shl 35) : 6,
(1'u64 shl 42) - 1'u64 : 6,
(1'u64 shl 42) : 7,
(1'u64 shl 49) - 1'u64 : 7,
(1'u64 shl 49) : 8,
(1'u64 shl 56) - 1'u64 : 8,
(1'u64 shl 56) : 9,
(1'u64 shl 63) - 1'u64 : 9,
(1'u64 shl 63) : 10,
0xFFFF_FFFF_FFFF_FFFF'u64 : 10
}
for pair in lengths:
check: Leb128.len(pair[0]) == pair[1]
test "errors":
check:
uint8.fromBytes([0x80'u8], Leb128) == (0'u8, 0'i8)
uint8.fromBytes([0x80'u8, 0x80], Leb128) == (0'u8, 0'i8)
uint8.fromBytes(toBytes(256'u16, Leb128).toOpenArray(), Leb128).len < 0
uint8.fromBytes([0x80'u8, 0x02], Leb128) == (0'u8, -2'i8) # 2 bytes consumed and overflow
uint8.fromBytes([0x80'u8, 0x02, 0x05], Leb128) == (0'u8, -2'i8) # 2 bytes consumed and overflow
uint64.fromBytes([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02], Leb128).len < 0
uint64.fromBytes([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], Leb128) == (0'u64, 0'i8)
check:
uint8.scan([0x80'u8], Leb128) == 0
uint8.scan([0x80'u8, 0x80], Leb128) == 0
uint8.scan(toBytes(256'u16, Leb128).toOpenArray(), Leb128) < 0
uint64.scan([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02], Leb128) < 0
uint64.scan([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], Leb128) == 0