From 5cf4feabea0820d7f03b146b0973a57973bcc4c1 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Tue, 15 Dec 2020 16:07:20 +0100 Subject: [PATCH] leb128 + bitops fixes (#66) Leb128 is a variable-length encoding for unsigned integers that is used in a number of contexts - in particular, wasm, dwarf and protobuf. This is an optimized low-level implementation that unrolls the loop reading/writing the buffer - it is suitable to use as base for a more specific API - no memory allocations, no exceptions. This PR also fixes bitops2 to not raise on certaing uint->int conversions, adapting bitops to nim 1.0 conversion rules by using a cast instead of raising on uint->int conversion --- stew/bitops2.nim | 79 ++++++++++--------- stew/leb128.nim | 175 ++++++++++++++++++++++++++++++++++++++++++ stew/varints.nim | 20 +++++ tests/test_leb128.nim | 104 +++++++++++++++++++++++++ 4 files changed, 338 insertions(+), 40 deletions(-) create mode 100644 stew/leb128.nim create mode 100644 tests/test_leb128.nim diff --git a/stew/bitops2.nim b/stew/bitops2.nim index 4c101bf..290f37e 100644 --- a/stew/bitops2.nim +++ b/stew/bitops2.nim @@ -58,21 +58,15 @@ func firstOneNim(x: uint32): int = 0 else: let k = not x + 1 # get two's complement - 1 + lookup[((x and k) * 0x077CB531'u32) shr 27].int + cast[int](1 + lookup[((x and k) * 0x077CB531'u32) shr 27]) func firstOneNim(x: uint8|uint16): int = firstOneNim(x.uint32) func firstOneNim(x: uint64): int = ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup - template convert[T](x: uint64): T = - when nimvm: - T(x and high(T)) - else: - cast[T](x) - - if convert[uint32](x) == 0: - 32 + firstOneNim(uint32(x shr 32'u32)) + if (x and uint32.high) == 0: + cast[int](32 + uint(firstOneNim(uint32(x shr 32'u32)))) else: firstOneNim(uint32(x)) @@ -88,7 +82,7 @@ func log2truncNim(x: uint8|uint16|uint32): int = v = v or v shr 4 v = v or v shr 8 v = v or v shr 16 - lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int + cast[int](lookup[uint32(v * 0x07C4ACDD'u32) shr 27]) func log2truncNim(x: uint64): int = ## Quickly find the log base 2 of a 64-bit integer. @@ -105,7 +99,7 @@ func log2truncNim(x: uint64): int = v = v or v shr 8 v = v or v shr 16 v = v or v shr 32 - lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int + cast[int](lookup[(v * 0x03F6EAF2CD271461'u64) shr 58]) func countOnesNim(x: uint8|uint16|uint32): int = ## Counts the set bits in integer. (also called Hamming weight.) @@ -114,7 +108,7 @@ func countOnesNim(x: uint8|uint16|uint32): int = var v = x.uint32 v = v - ((v shr 1) and 0x55555555) v = (v and 0x33333333) + ((v shr 2) and 0x33333333) - (((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int + cast[int](((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24) func countOnesNim(x: uint64): int = ## Counts the set bits in integer. (also called Hamming weight.) @@ -123,7 +117,7 @@ func countOnesNim(x: uint64): int = v = v - ((v shr 1'u64) and 0x5555555555555555'u64) v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64) v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64) - ((v * 0x0101010101010101'u64) shr 56'u64).int + cast[int]((v * 0x0101010101010101'u64) shr 56'u64) func parityNim(x: SomeUnsignedInt): int = # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel @@ -136,7 +130,7 @@ func parityNim(x: SomeUnsignedInt): int = v = v xor (v shr 8) v = v xor (v shr 4) v = v and 0xf - ((0x6996'u shr v) and 1).int + cast[int]((0x6996'u shr v) and 1) when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins: @@ -158,24 +152,26 @@ when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins: func countOnesBuiltin(x: SomeUnsignedInt): int = when bitsof(x) == bitsof(culonglong): - builtin_popcountll(x.culonglong).int + cast[int](builtin_popcountll(x.culonglong)) else: - builtin_popcount(x.cuint).int + cast[int](builtin_popcount(x.cuint)) func parityBuiltin(x: SomeUnsignedInt): int = when bitsof(x) == bitsof(culonglong): - builtin_parityll(x.culonglong).int + cast[int](builtin_parityll(x.culonglong)) else: - builtin_parity(x.cuint).int + cast[int](builtin_parity(x.cuint)) func firstOneBuiltin(x: SomeUnsignedInt): int = when bitsof(x) == bitsof(clonglong): - builtin_ffsll(clonglong(x)) + cast[int](builtin_ffsll(cast[clonglong](x))) else: - builtin_ffs(x.cuint.cint) + cast[int](builtin_ffs(cast[cint](x.cuint))) - func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32) - func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v) + func log2truncBuiltin(v: uint8|uint16|uint32): int = + cast[int](31 - cast[cuint](builtin_clz(v.uint32))) + func log2truncBuiltin(v: uint64): int = + cast[int](63 - cast[cuint](builtin_clzll(v))) elif defined(vcc) and useBuiltins: const arch64 = sizeof(int) == 8 @@ -195,19 +191,22 @@ elif defined(vcc) and useBuiltins: func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "".} func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "".} - func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int - func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int + func countOnesBuiltin(v: uint8|uint16): int = + cast[int](builtin_popcnt16(v.uint16)) + func countOnesBuiltin(v: uint32): int = + cast[int](builtin_popcnt32(v)) func countOnesBuiltin(v: uint64): int = when arch64: - builtin_popcnt64(v).int + cast[int](builtin_popcnt64(v)) else: - builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int + - builtin_popcnt32((v shr 32'u64).uint32).int + cast[int]( + builtin_popcnt32((v and uint32.high).uint32) + + builtin_popcnt32((v shr 32'u64).uint32)) template checkedScan(fnc: untyped, x: typed, def: typed): int = var index{.noinit.}: culong if fnc(index.addr, v) == cuchar(0): def - else: index.int + else: cast[int](index) func firstOneBuiltin(v: uint8|uint16|uint32): int = 1 + checkedScan(bitScanForward, v.culong, -1) @@ -220,8 +219,8 @@ elif defined(vcc) and useBuiltins: template bitScan(fnc: untyped, x: typed): int = var index{.noinit.}: culong - if fnc(index.addr, v).int == 0: 0 - else: index.int + if fnc(index.addr, v) == cuchar(0): 0 + else: cast[int](index) func log2truncBuiltin(v: uint8|uint16|uint32): int = bitScan(bitScanReverse, v.culong) @@ -253,21 +252,23 @@ elif defined(icc) and useBuiltins: template checkedScan(fnc: untyped, x: typed, def: typed): int = var index{.noinit.}: culong - if fnc(index.addr, v).int == 0: def - else: index.int + if fnc(index.addr, v) == cuchar(0): def + else: cast[int](index) template bitScan(fnc: untyped, x: typed): int = var index{.noinit.}: culong - if fnc(index.addr, v).int == 0: 0 - else: index.int + if fnc(index.addr, v) == cuchar(0): 0 + else: cast[int](index) - func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.cint).int + func countOnesBuiltin(v: uint8|uint16|uint32): int = + cast[int](builtin_popcnt32(cast[cint](v))) func countOnesBuiltin(v: uint64): int = when arch64: - builtin_popcnt64(v).int + cast[int](builtin_popcnt64(v)) else: - builtin_popcnt32((v and 0xFFFFFFFF'u64).cint).int + - builtin_popcnt32((v shr 32'u64).cint).int + cast[int]( + builtin_popcnt32(cast[cint](v and 0xFFFFFFFF'u64)) + + builtin_popcnt32(cast[cint](v shr 32'u64))) func firstOneBuiltin(v: uint8|uint16|uint32): int = 1 + checkedScan(bitScanForward, v.culong, -1) @@ -292,8 +293,6 @@ func countOnes*(x: SomeUnsignedInt): int {.inline.} = ## ## Example: ## doAssert countOnes(0b01000100'u8) == 2 - # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. - # like GCC and MSVC when nimvm: countOnesNim(x) else: diff --git a/stew/leb128.nim b/stew/leb128.nim new file mode 100644 index 0000000..fbb11e3 --- /dev/null +++ b/stew/leb128.nim @@ -0,0 +1,175 @@ +## Low-level little-endian base 128 variable length integer/byte converters, as +## described in https://en.wikipedia.org/wiki/LEB128 - up to 64 bits supported. +## +## The leb128 encoding is used in DWARF and WASM. +## +## It is also fully compatible with the unsigned varint encoding found in +## `protobuf` and `go`, and can thus be used directly. It's easy to build +## support for the two kinds (zig-zag and cast) of signed encodings on top. +## +## This is not the only way to encode variable length integers - variations +## exist like sqlite and utf-8 - in particular, the `std/varints` module +## implements the sqlite flavour. +## +## This implementation contains low-level primitives suitable for building +## more easy-to-use API. +## +## Exception/Defect free as of nim 1.2. +## +## Security notes: +## +## leb128 allows overlong byte sequences that decode into the same integer - +## the library decodes these sequences to a certain extent, but will stop +## decoding at the maximum length that a minimal encoder will produce. For +## example, the byte sequence `[byte 0x80, 0x80, 0x00]`, when decoded as a +## `uint64` is a valid encoding for `0` because the maximum length of a minimal +## `uint64` encoding is 10 bytes - however, because all minimal encodings +## for `uint8` fit in 2 bytes, decoding the same byte sequence as `uint8` will +## yield an error return. +## +## To be strict about overlong encodings, compare the decoded number of bytes +## with `Leb128.len(decoded_value)`. + +{.push raises: [].} + +import + stew/bitops2 + +const + # Given the truncated logarithm of a 64-bit number, how many bytes do we need + # to encode it? + lengths = block: + var v: array[64, int8] + for i in 0..<64: + v[i] = int8((i + 7) div 7) + v + +type + Leb128* = object + ## Type used to mark leb128 encoding helpers + +# log2trunc by definition never returns values >64, thus we can remove checks +{.push checks: off.} +func len*(T: type Leb128, x: SomeUnsignedInt): int8 = + ## Returns number of bytes required to encode integer ``x`` as leb128. + if x == 0: 1 # Always at least one byte! + else: lengths[log2trunc(x)] +{.pop.} + +func maxLen*(T: type Leb128, I: type): int8 = + ## The maximum number of bytes needed to encode any value of type I + Leb128.len(I.high) + +type + Leb128Buf*[T: SomeUnsignedInt] = object + data*: array[maxLen(Leb128, T), byte] # len(data) <= 10 + len*: int8 # >= 1 when holding valid leb128 + +template write7(next: untyped) = + # write 7 bits of data + if v > type(v)(127): + result.data[result.len] = cast[byte](v and type(v)(0xff)) or 0x80'u8 + result.len += 1 + v = v shr 7 + next + +# LebBuf size corresponds to maximum size that the type will be encoded to, thus +# there can be no out-of-bounds accesses here - likewise with the length +# arithmetic +{.push checks: off.} +func toBytes*[I: SomeUnsignedInt](v: I, T: type Leb128): Leb128Buf[I] {.noinit.} = + ## Convert an unsigned integer to the smallest leb128 representation possible + ## + ## Example: + ## 15'u16.toBytes(Leb128) + var + v = v + result.len = 0 + + # A clever developer would write something clever for the unrolling - + # fortunately, we have clever compilers that remove the excess unrolls based + # on size! + write7(): # 7 + write7(): # 14 + write7(): # 21 + write7(): # 28 + write7(): # 35 + write7(): # 42 + write7(): # 49 + write7(): # 56 + write7(): # 63 + discard + + # high bit not set since v <= 127 at this point! + result.data[result.len] = cast[byte](v and type(v)(0xff)) + result.len += 1 + +template read7(shift: untyped) = + # Read 7 bits of data and return iff these are the last 7 bits + if (shift div 7) >= xlen: + return (I(0), 0'i8) # Not enough data - return 0 bytes read + + let + b = x[shift div 7] + valb = b and 0x7f'u8 # byte without high bit + val = I(valb) + vals = val shl shift + + when shift > (sizeof(val) * 8 - 7): + # Check for overflow in the "unused" bits of the byte we just read + if vals shr shift != val: + return (I(0), -cast[int8]((shift div 7) + 1)) + + res = res or vals + if b == valb: # High bit not set, we're done + return (res, cast[int8]((shift div 7) + 1)) + +func fromBytes*( + I: type SomeUnsignedInt, + x: openArray[byte], + T: type Leb128): tuple[val: I, len: int8] {.noinit.} = + ## Parse a LEB128 byte sequence and return value and how many bytes were + ## parsed - if parsing fails, len <= 0 will be returned - 0 when there are not + ## enough bytes and -len on overflow, signalling how many bytes were parsed + let xlen = x.len() + var + res: I + + read7(0) + read7(7) + read7(14) + read7(21) + read7(28) + read7(35) + read7(42) + read7(49) + read7(56) + read7(63) + + (I(0), -11'i8) + +{.pop.} + +template toOpenArray*(v: Leb128Buf): openArray[byte] = + toOpenArray(v.data, 0, v.len - 1) + +template len*(v: Leb128Buf): int8 = v.len +template `@`*(v: Leb128Buf): seq[byte] = @(v.toOpenArray()) +iterator items*(v: Leb128Buf): byte = + for i in 0..