From 5cf4feabea0820d7f03b146b0973a57973bcc4c1 Mon Sep 17 00:00:00 2001
From: Jacek Sieka <jacek@status.im>
Date: Tue, 15 Dec 2020 16:07:20 +0100
Subject: [PATCH] leb128 + bitops fixes (#66)

Leb128 is a variable-length encoding for unsigned integers that is used
in a number of contexts - in particular, wasm, dwarf and protobuf.

This is an optimized low-level implementation that unrolls the loop
reading/writing the buffer - it is suitable to use as base for a more
specific API - no memory allocations, no exceptions.

This PR also fixes bitops2 to not raise on certaing uint->int
conversions, adapting bitops to nim 1.0 conversion rules by using a cast
instead of raising on uint->int conversion
---
 stew/bitops2.nim      |  79 ++++++++++---------
 stew/leb128.nim       | 175 ++++++++++++++++++++++++++++++++++++++++++
 stew/varints.nim      |  20 +++++
 tests/test_leb128.nim | 104 +++++++++++++++++++++++++
 4 files changed, 338 insertions(+), 40 deletions(-)
 create mode 100644 stew/leb128.nim
 create mode 100644 tests/test_leb128.nim

diff --git a/stew/bitops2.nim b/stew/bitops2.nim
index 4c101bf..290f37e 100644
--- a/stew/bitops2.nim
+++ b/stew/bitops2.nim
@@ -58,21 +58,15 @@ func firstOneNim(x: uint32): int =
     0
   else:
     let k = not x + 1 # get two's complement
-    1 + lookup[((x and k) * 0x077CB531'u32) shr 27].int
+    cast[int](1 + lookup[((x and k) * 0x077CB531'u32) shr 27])
 
 func firstOneNim(x: uint8|uint16): int = firstOneNim(x.uint32)
 func firstOneNim(x: uint64): int =
   ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
   # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
 
-  template convert[T](x: uint64): T =
-    when nimvm:
-      T(x and high(T))
-    else:
-      cast[T](x)
-
-  if convert[uint32](x) == 0:
-    32 + firstOneNim(uint32(x shr 32'u32))
+  if (x and uint32.high) == 0:
+    cast[int](32 + uint(firstOneNim(uint32(x shr 32'u32))))
   else:
     firstOneNim(uint32(x))
 
@@ -88,7 +82,7 @@ func log2truncNim(x: uint8|uint16|uint32): int =
   v = v or v shr 4
   v = v or v shr 8
   v = v or v shr 16
-  lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
+  cast[int](lookup[uint32(v * 0x07C4ACDD'u32) shr 27])
 
 func log2truncNim(x: uint64): int =
   ## Quickly find the log base 2 of a 64-bit integer.
@@ -105,7 +99,7 @@ func log2truncNim(x: uint64): int =
   v = v or v shr 8
   v = v or v shr 16
   v = v or v shr 32
-  lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
+  cast[int](lookup[(v * 0x03F6EAF2CD271461'u64) shr 58])
 
 func countOnesNim(x: uint8|uint16|uint32): int =
   ## Counts the set bits in integer. (also called Hamming weight.)
@@ -114,7 +108,7 @@ func countOnesNim(x: uint8|uint16|uint32): int =
   var v = x.uint32
   v = v - ((v shr 1) and 0x55555555)
   v = (v and 0x33333333) + ((v shr 2) and 0x33333333)
-  (((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int
+  cast[int](((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24)
 
 func countOnesNim(x: uint64): int =
   ## Counts the set bits in integer. (also called Hamming weight.)
@@ -123,7 +117,7 @@ func countOnesNim(x: uint64): int =
   v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
   v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
   v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
-  ((v * 0x0101010101010101'u64) shr 56'u64).int
+  cast[int]((v * 0x0101010101010101'u64) shr 56'u64)
 
 func parityNim(x: SomeUnsignedInt): int =
   # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
@@ -136,7 +130,7 @@ func parityNim(x: SomeUnsignedInt): int =
     v = v xor (v shr 8)
   v = v xor (v shr 4)
   v = v and 0xf
-  ((0x6996'u shr v) and 1).int
+  cast[int]((0x6996'u shr v) and 1)
 
 when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
 
@@ -158,24 +152,26 @@ when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
 
   func countOnesBuiltin(x: SomeUnsignedInt): int =
     when bitsof(x) == bitsof(culonglong):
-      builtin_popcountll(x.culonglong).int
+      cast[int](builtin_popcountll(x.culonglong))
     else:
-      builtin_popcount(x.cuint).int
+      cast[int](builtin_popcount(x.cuint))
 
   func parityBuiltin(x: SomeUnsignedInt): int =
     when bitsof(x) == bitsof(culonglong):
-      builtin_parityll(x.culonglong).int
+      cast[int](builtin_parityll(x.culonglong))
     else:
-      builtin_parity(x.cuint).int
+      cast[int](builtin_parity(x.cuint))
 
   func firstOneBuiltin(x: SomeUnsignedInt): int =
     when bitsof(x) == bitsof(clonglong):
-      builtin_ffsll(clonglong(x))
+      cast[int](builtin_ffsll(cast[clonglong](x)))
     else:
-      builtin_ffs(x.cuint.cint)
+      cast[int](builtin_ffs(cast[cint](x.cuint)))
 
-  func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32)
-  func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v)
+  func log2truncBuiltin(v: uint8|uint16|uint32): int =
+    cast[int](31 - cast[cuint](builtin_clz(v.uint32)))
+  func log2truncBuiltin(v: uint64): int =
+    cast[int](63 - cast[cuint](builtin_clzll(v)))
 
 elif defined(vcc) and useBuiltins:
   const arch64 = sizeof(int) == 8
@@ -195,19 +191,22 @@ elif defined(vcc) and useBuiltins:
     func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".}
     func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".}
 
-  func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int
-  func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int
+  func countOnesBuiltin(v: uint8|uint16): int =
+    cast[int](builtin_popcnt16(v.uint16))
+  func countOnesBuiltin(v: uint32): int =
+    cast[int](builtin_popcnt32(v))
   func countOnesBuiltin(v: uint64): int =
     when arch64:
-      builtin_popcnt64(v).int
+      cast[int](builtin_popcnt64(v))
     else:
-      builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int +
-        builtin_popcnt32((v shr 32'u64).uint32).int
+      cast[int](
+        builtin_popcnt32((v and uint32.high).uint32) +
+        builtin_popcnt32((v shr 32'u64).uint32))
 
   template checkedScan(fnc: untyped, x: typed, def: typed): int =
     var index{.noinit.}: culong
     if fnc(index.addr, v) == cuchar(0): def
-    else: index.int
+    else: cast[int](index)
 
   func firstOneBuiltin(v: uint8|uint16|uint32): int =
     1 + checkedScan(bitScanForward, v.culong, -1)
@@ -220,8 +219,8 @@ elif defined(vcc) and useBuiltins:
 
   template bitScan(fnc: untyped, x: typed): int =
     var index{.noinit.}: culong
-    if fnc(index.addr, v).int == 0: 0
-    else: index.int
+    if fnc(index.addr, v) == cuchar(0): 0
+    else: cast[int](index)
 
   func log2truncBuiltin(v: uint8|uint16|uint32): int =
     bitScan(bitScanReverse, v.culong)
@@ -253,21 +252,23 @@ elif defined(icc) and useBuiltins:
 
   template checkedScan(fnc: untyped, x: typed, def: typed): int =
     var index{.noinit.}: culong
-    if fnc(index.addr, v).int == 0: def
-    else: index.int
+    if fnc(index.addr, v) == cuchar(0): def
+    else: cast[int](index)
 
   template bitScan(fnc: untyped, x: typed): int =
     var index{.noinit.}: culong
-    if fnc(index.addr, v).int == 0: 0
-    else: index.int
+    if fnc(index.addr, v) == cuchar(0): 0
+    else: cast[int](index)
 
-  func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.cint).int
+  func countOnesBuiltin(v: uint8|uint16|uint32): int =
+    cast[int](builtin_popcnt32(cast[cint](v)))
   func countOnesBuiltin(v: uint64): int =
     when arch64:
-      builtin_popcnt64(v).int
+      cast[int](builtin_popcnt64(v))
     else:
-      builtin_popcnt32((v and 0xFFFFFFFF'u64).cint).int +
-        builtin_popcnt32((v shr 32'u64).cint).int
+      cast[int](
+        builtin_popcnt32(cast[cint](v and 0xFFFFFFFF'u64)) +
+        builtin_popcnt32(cast[cint](v shr 32'u64)))
 
   func firstOneBuiltin(v: uint8|uint16|uint32): int =
     1 + checkedScan(bitScanForward, v.culong, -1)
@@ -292,8 +293,6 @@ func countOnes*(x: SomeUnsignedInt): int {.inline.} =
   ##
   ## Example:
   ## doAssert countOnes(0b01000100'u8) == 2
-  # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
-  # like GCC and MSVC
   when nimvm:
     countOnesNim(x)
   else:
diff --git a/stew/leb128.nim b/stew/leb128.nim
new file mode 100644
index 0000000..fbb11e3
--- /dev/null
+++ b/stew/leb128.nim
@@ -0,0 +1,175 @@
+## Low-level little-endian base 128 variable length integer/byte converters, as
+## described in https://en.wikipedia.org/wiki/LEB128 - up to 64 bits supported.
+##
+## The leb128 encoding is used in DWARF and WASM.
+##
+## It is also fully compatible with the unsigned varint encoding found in
+## `protobuf` and `go`, and can thus be used directly. It's easy to build
+## support for the two kinds (zig-zag and cast) of signed encodings on top.
+##
+## This is not the only way to encode variable length integers - variations
+## exist like sqlite and utf-8 - in particular, the `std/varints` module
+## implements the sqlite flavour.
+##
+## This implementation contains low-level primitives suitable for building
+## more easy-to-use API.
+##
+## Exception/Defect free as of nim 1.2.
+##
+## Security notes:
+##
+## leb128 allows overlong byte sequences that decode into the same integer -
+## the library decodes these sequences to a certain extent, but will stop
+## decoding at the maximum length that a minimal encoder will produce. For
+## example, the byte sequence `[byte 0x80, 0x80, 0x00]`, when decoded as a
+## `uint64` is a valid encoding for `0` because the maximum length of a minimal
+## `uint64` encoding is 10 bytes - however, because all minimal encodings
+## for `uint8` fit in 2 bytes, decoding the same byte sequence as `uint8` will
+## yield an error return.
+##
+## To be strict about overlong encodings, compare the decoded number of bytes
+## with `Leb128.len(decoded_value)`.
+
+{.push raises: [].}
+
+import
+  stew/bitops2
+
+const
+  # Given the truncated logarithm of a 64-bit number, how many bytes do we need
+  # to encode it?
+  lengths = block:
+    var v: array[64, int8]
+    for i in 0..<64:
+      v[i] = int8((i + 7) div 7)
+    v
+
+type
+  Leb128* = object
+    ## Type used to mark leb128 encoding helpers
+
+# log2trunc by definition never returns values >64, thus we can remove checks
+{.push checks: off.}
+func len*(T: type Leb128, x: SomeUnsignedInt): int8 =
+  ## Returns number of bytes required to encode integer ``x`` as leb128.
+  if x == 0: 1 # Always at least one byte!
+  else: lengths[log2trunc(x)]
+{.pop.}
+
+func maxLen*(T: type Leb128, I: type): int8 =
+  ## The maximum number of bytes needed to encode any value of type I
+  Leb128.len(I.high)
+
+type
+  Leb128Buf*[T: SomeUnsignedInt] = object
+    data*: array[maxLen(Leb128, T), byte] # len(data) <= 10
+    len*: int8 # >= 1 when holding valid leb128
+
+template write7(next: untyped) =
+  # write 7 bits of data
+  if v > type(v)(127):
+    result.data[result.len] = cast[byte](v and type(v)(0xff)) or 0x80'u8
+    result.len += 1
+    v = v shr 7
+    next
+
+# LebBuf size corresponds to maximum size that the type will be encoded to, thus
+# there can be no out-of-bounds accesses here - likewise with the length
+# arithmetic
+{.push checks: off.}
+func toBytes*[I: SomeUnsignedInt](v: I, T: type Leb128): Leb128Buf[I] {.noinit.} =
+  ## Convert an unsigned integer to the smallest leb128 representation possible
+  ##
+  ## Example:
+  ## 15'u16.toBytes(Leb128)
+  var
+    v = v
+  result.len = 0
+
+  # A clever developer would write something clever for the unrolling -
+  # fortunately, we have clever compilers that remove the excess unrolls based
+  # on size!
+  write7(): # 7
+    write7(): # 14
+      write7(): # 21
+        write7(): # 28
+          write7(): # 35
+            write7(): # 42
+              write7(): # 49
+                write7(): # 56
+                  write7(): # 63
+                    discard
+
+  # high bit not set since v <= 127 at this point!
+  result.data[result.len] = cast[byte](v and type(v)(0xff))
+  result.len += 1
+
+template read7(shift: untyped) =
+  # Read 7 bits of data and return iff these are the last 7 bits
+  if (shift div 7) >= xlen:
+    return (I(0), 0'i8) # Not enough data - return 0 bytes read
+
+  let
+    b = x[shift div 7]
+    valb = b and 0x7f'u8 # byte without high bit
+    val = I(valb)
+    vals = val shl shift
+
+  when shift > (sizeof(val) * 8 - 7):
+    # Check for overflow in the "unused" bits of the byte we just read
+    if vals shr shift != val:
+      return (I(0), -cast[int8]((shift div 7) + 1))
+
+  res = res or vals
+  if b == valb: # High bit not set, we're done
+    return (res, cast[int8]((shift div 7) + 1))
+
+func fromBytes*(
+    I: type SomeUnsignedInt,
+    x: openArray[byte],
+    T: type Leb128): tuple[val: I, len: int8] {.noinit.} =
+  ## Parse a LEB128 byte sequence and return value and how many bytes were
+  ## parsed - if parsing fails, len <= 0 will be returned - 0 when there are not
+  ## enough bytes and -len on overflow, signalling how many bytes were parsed
+  let xlen = x.len()
+  var
+    res: I
+
+  read7(0)
+  read7(7)
+  read7(14)
+  read7(21)
+  read7(28)
+  read7(35)
+  read7(42)
+  read7(49)
+  read7(56)
+  read7(63)
+
+  (I(0), -11'i8)
+
+{.pop.}
+
+template toOpenArray*(v: Leb128Buf): openArray[byte] =
+  toOpenArray(v.data, 0, v.len - 1)
+
+template len*(v: Leb128Buf): int8 = v.len
+template `@`*(v: Leb128Buf): seq[byte] = @(v.toOpenArray())
+iterator items*(v: Leb128Buf): byte =
+  for i in 0..<v.len: yield v.data[i]
+
+template fromBytes*(
+    I: type SomeUnsignedInt,
+    x: Leb128Buf): tuple[val: I, len: int8] =
+  # x is not guaranteed to be valid, so we treat it like any other buffer!
+  I.fromBytes(x.toOpenArray(), Leb128)
+
+func scan*(
+    I: type SomeUnsignedInt,
+    x: openArray[byte],
+    T: type Leb128): int8 {.noinit.} =
+  ## Scan a buffer for a valid leb128-encoded value that at most fits in a
+  ## uint64, and report how many bytes it uses
+  # TODO this can be done efficiently with SSE
+  I.fromBytes(x, Leb128).len
+
diff --git a/stew/varints.nim b/stew/varints.nim
index eccf0d8..6bd68c7 100644
--- a/stew/varints.nim
+++ b/stew/varints.nim
@@ -1,5 +1,25 @@
 ## This module implements Variable Integer `VARINT`.
 
+{.deprecated: "use leb128 or a higher level decoder".}
+
+# There are better variations on this module around:
+# * stew/leb128 implements the core varint encoding
+# * nim-protobuf-serialization and nim-libp2p contain higher-level protobuf
+#   varint encoding/decoding
+#
+# This module has a couple of problems as written:
+# * Name conflict with std/varints which implements a _different_ varint
+#   encoding (sqlite-style)
+# * the `Stream` interface in this file is underdefined (ie there's a hidden
+#   implicit dependency on nim-serialization - the stateful byte-by-byte
+#   decoder should likely be moved there instead
+# * The signed integer support is biased towards casting, whereas the most
+#   "common" way of encoding signed integers in protobuf is "zig-zag" which
+#   whose support is missing - above all, biasing towards one of the two signed
+#   integer formats is error-prone
+# * there is no detection of overlong sequences
+# * overflows in high bits of nibble are not detected
+
 import
   bitops2
 
diff --git a/tests/test_leb128.nim b/tests/test_leb128.nim
new file mode 100644
index 0000000..0f7d11a
--- /dev/null
+++ b/tests/test_leb128.nim
@@ -0,0 +1,104 @@
+import
+  unittest, random,
+  ../stew/[byteutils, leb128, results]
+
+const edgeValues = {
+  0'u64                     : "00",
+  1'u64                     : "01",
+  (1'u64 shl 7) - 1'u64     : "7f",
+  (1'u64 shl 7)             : "8001",
+  (1'u64 shl 7) + 1'u64     : "8101",
+  (1'u64 shl 14) - 1'u64    : "ff7f",
+  (1'u64 shl 14)            : "808001",
+  (1'u64 shl 21) - 1'u64    : "ffff7f",
+  (1'u64 shl 21)            : "80808001",
+  (1'u64 shl 28) - 1'u64    : "ffffff7f",
+  (1'u64 shl 28)            : "8080808001",
+  (1'u64 shl 35) - 1'u64    : "ffffffff7f",
+  (1'u64 shl 35)            : "808080808001",
+  (1'u64 shl 42) - 1'u64    : "ffffffffff7f",
+  (1'u64 shl 42)            : "80808080808001",
+  (1'u64 shl 49) - 1'u64    : "ffffffffffff7f",
+  (1'u64 shl 49)            : "8080808080808001",
+  (1'u64 shl 56) - 1'u64    : "ffffffffffffff7f",
+  (1'u64 shl 56)            : "808080808080808001",
+  (1'u64 shl 63) - 1'u64    : "ffffffffffffffff7f",
+  (1'u64 shl 63)            : "80808080808080808001",
+  0xFFFF_FFFF_FFFF_FFFF'u64 : "ffffffffffffffffff01"
+}
+
+suite "leb128":
+  template roundtripTest(value: typed) =
+    let
+      leb {.inject.} = value.toBytes(Leb128)
+      roundtripVal = type(value).fromBytes(leb.toOpenArray(), Leb128)
+
+    check:
+      value == roundtripVal.val
+
+  test "Success edge cases test":
+    for pair in edgeValues:
+      let (value, hex) = pair
+      roundtripTest value
+      check:
+        toHex(leb.toOpenArray()) == hex
+
+  test "roundtrip random values":
+    template testSome(T: type) =
+      for i in 0..10000:
+        # TODO nim 1.0 random casts limits to int, so anything bigger will crash
+        #      * sigh *
+        #      https://github.com/nim-lang/Nim/issues/16360
+        let
+          v1 = rand(T(0) .. cast[T](int.high))
+        roundtripTest v1
+    testSome(uint8)
+    testSome(uint16)
+    testSome(uint32)
+    testSome(uint64)
+
+  test "lengths":
+    const lengths = {
+      0'u64                     : 1,
+      1'u64                     : 1,
+      (1'u64 shl 7) - 1'u64     : 1,
+      (1'u64 shl 7)             : 2,
+      (1'u64 shl 7) + 1'u64     : 2,
+      (1'u64 shl 14) - 1'u64    : 2,
+      (1'u64 shl 14)            : 3,
+      (1'u64 shl 21) - 1'u64    : 3,
+      (1'u64 shl 21)            : 4,
+      (1'u64 shl 28) - 1'u64    : 4,
+      (1'u64 shl 28)            : 5,
+      (1'u64 shl 35) - 1'u64    : 5,
+      (1'u64 shl 35)            : 6,
+      (1'u64 shl 42) - 1'u64    : 6,
+      (1'u64 shl 42)            : 7,
+      (1'u64 shl 49) - 1'u64    : 7,
+      (1'u64 shl 49)            : 8,
+      (1'u64 shl 56) - 1'u64    : 8,
+      (1'u64 shl 56)            : 9,
+      (1'u64 shl 63) - 1'u64    : 9,
+      (1'u64 shl 63)            : 10,
+      0xFFFF_FFFF_FFFF_FFFF'u64 : 10
+    }
+
+    for pair in lengths:
+      check: Leb128.len(pair[0]) == pair[1]
+
+  test "errors":
+    check:
+      uint8.fromBytes([0x80'u8], Leb128) == (0'u8, 0'i8)
+      uint8.fromBytes([0x80'u8, 0x80], Leb128) == (0'u8, 0'i8)
+      uint8.fromBytes(toBytes(256'u16, Leb128).toOpenArray(), Leb128).len < 0
+      uint8.fromBytes([0x80'u8, 0x02], Leb128) == (0'u8, -2'i8) # 2 bytes consumed and overflow
+      uint8.fromBytes([0x80'u8, 0x02, 0x05], Leb128) == (0'u8, -2'i8) # 2 bytes consumed and overflow
+      uint64.fromBytes([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02], Leb128).len < 0
+      uint64.fromBytes([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], Leb128) == (0'u64, 0'i8)
+
+    check:
+      uint8.scan([0x80'u8], Leb128) == 0
+      uint8.scan([0x80'u8, 0x80], Leb128) == 0
+      uint8.scan(toBytes(256'u16, Leb128).toOpenArray(), Leb128) < 0
+      uint64.scan([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02], Leb128) < 0
+      uint64.scan([0xff'u8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], Leb128) == 0