From 391c2e24b1c85a3e4b7ff1dac5350638ab073aa6 Mon Sep 17 00:00:00 2001
From: Jacek Sieka <jacek@status.im>
Date: Tue, 9 May 2023 21:28:35 +0200
Subject: [PATCH] intops: core integer primitives

This, together with bitops2 and endians2, forms the core primitive
offering for working with integers as the computer sees them.

The focus of intops is to expose a number of common integer operations
typically used to build more complex abstractions such as bigints,
mp-ints etc while having access to the best performance the compiler and
cpu can offer.

There is more to do here, but this provides an outline of what this
module could look like.

Obviously, there are no exceptions or defects around - the point of
these utilities is to stay as close as possible to bare metal. They
could be used to implement such features however (similar to how
`system/integerops` works).
---
 stew/intops.nim       | 122 ++++++++++++++++++++++++++++++++++++++++++
 tests/all_tests.nim   |   1 +
 tests/test_intops.nim |  68 +++++++++++++++++++++++
 3 files changed, 191 insertions(+)
 create mode 100644 stew/intops.nim
 create mode 100644 tests/test_intops.nim

diff --git a/stew/intops.nim b/stew/intops.nim
new file mode 100644
index 0000000..7fe53a5
--- /dev/null
+++ b/stew/intops.nim
@@ -0,0 +1,122 @@
+## Core integer primitives suitable as building blocks for higher-level
+## functionality such as bigints, saturating integer types etc - where
+## applicable, these use compiler builtins - otherwise, they fall back on native
+## Nim code that may be less efficient.
+##
+## In using these functions, it is recommended that you always call the function
+## that returns the least information needed - for example, `mulOverflow` may
+## be implemented more efficiently than `mulWiden`, meaning that if overflow
+## detection is all that is needed, use the former.
+
+# Implementation notes:
+#
+# * `uintN` is assumed to be wrapping
+# * "*Overflow" perform wrapping arithmetic while returning a bool for overflow
+# * "*Widen" return full result in multiple words
+# * overloads with carry/borrow exposed for chaining limbs
+#
+# TODO
+# * use compiler intrinsics
+# * signed ops
+# * saturating ops
+# * more primitives commonly available on CPU:s / intrinsics (pow / divmod / etc)
+
+
+func addOverflow*(x, y: SomeUnsignedInt):
+    tuple[result: SomeUnsignedInt, overflow: bool] =
+  ## Add the two integers using wrapping arithmetic, returning the result and a
+  ## boolean indicating that overflow happened.
+  ##
+  ## When used to construct bigint arithmetic, the overflow flag can be passed
+  ## as carry to the next more significant word.
+
+  let r = x + y
+  (r, r < x)
+
+func addOverflow*(x, y: SomeUnsignedInt, carry: bool):
+    tuple[result: SomeUnsignedInt, overflow: bool] =
+  ## Add two integers and carry using wrapping arithmetic, returning the
+  ## result and a boolean indicating that overflow happened.
+  ##
+  ## When used to construct bigint arithmetic, the overflow flag can be passed
+  ## as carry to the next more significant word.
+
+  let
+    (a, b) = addOverflow(x, y)
+    (c, d) = addOverflow(a, typeof(a)(carry))
+  (c, b or d)
+
+func subOverflow*(x, y: SomeUnsignedInt):
+    tuple[result: SomeUnsignedInt, overflow: bool] =
+  ## Subtract y and borrow from x using wrapping arithmetic, returning the
+  ## result and a boolean indicating whether overflow happened.
+
+  let r = x - y
+  (r, y > x)
+
+func subOverflow*(x, y: SomeUnsignedInt, borrow: bool):
+    tuple[result: SomeUnsignedInt, overflow: bool] =
+  ## Subtract y and borrow from x using wrapping arithmetic, returning the
+  ## result and a boolean indicating whether overflow happened.
+  ##
+  ## When used to construct bigint arithmetic, the overflow flag can be passed
+  ## as carry to the next more significant word.
+
+  let
+    (a, b) = subOverflow(x, y)
+    (c, d) = subOverflow(a, typeof(a)(borrow))
+  (c, b or d)
+
+func mulWiden*(x, y: uint64): tuple[lo, hi: uint64] =
+  let
+    x0 = x and uint32.high
+    x1 = x shr 32
+    y0 = y and uint32.high
+    y1 = y shr 32
+    p11 = x1 * y1
+    p01 = x0 * y1
+    p10 = x1 * y0
+    p00 = x0 * y0
+    middle = p10 + (p00 shr 32) + (p01 and uint32.high)
+    rhi = p11 + (middle shr 32) + (p01 shr 32)
+    rlo = (middle shl 32) or (p00 and uint32.high)
+
+  (rlo, rhi)
+
+func mulWiden*(x, y: uint32): tuple[lo, hi: uint32] =
+  let r = x.uint64 * y.uint64
+  (cast[uint32](r and uint32.high), cast[uint32](r shr 32))
+func mulWiden*(x, y: uint16): tuple[lo, hi: uint16] =
+  let r = x.uint32 * y.uint32
+  (cast[uint16](r and uint16.high), cast[uint16](r shr 16))
+func mulWiden*(x, y: uint8): tuple[lo, hi: uint8] =
+  let r = x.uint16 * y.uint16
+  (cast[uint8](r and uint8.high), cast[uint8](r shr 8))
+func mulWiden*(x, y: uint): tuple[lo, hi: uint] =
+  ## Perform `(x * y)` as if the computiation had been carried out in twice as
+  ## wide a type returning the low and high words.
+  when sizeof(uint) == sizeof(uint64):
+    let (a, b) = mulWiden(uint64(x), uint64(y))
+  else:
+    let (a, b) = mulWiden(uint32(x), uint64(y))
+  (uint(a), uint(b))
+
+func mulWiden*(x, y, carry: SomeUnsignedInt): tuple[lo, hi: SomeUnsignedInt] =
+  ## Perform `((x * y) + carry)` as if the computiation had been carried out in
+  ## twice as wide a type returning the low and high words
+  let
+    (lo, hi) = mulWiden(x, y)
+    (a, b) = addOverflow(lo, carry)
+    # The carry from this overflowing add can be ignored since the result of
+    # a multiplication always leaves room for adding one more `high`
+    (c, _) = addOverflow(hi, typeof(hi)(0), b)
+
+  (a, c)
+
+func mulOverflow*(x, y: SomeUnsignedInt):
+    tuple[result: SomeUnsignedInt, overflow: bool] =
+  ## Perform `(x * y)` using wrapping arithmetic, returning the result and a
+  ## boolean indicating that overflow happened.
+  let
+    (a, b) = mulWiden(x, y)
+  (a, b > 0)
diff --git a/tests/all_tests.nim b/tests/all_tests.nim
index 4366d90..a15f7e0 100644
--- a/tests/all_tests.nim
+++ b/tests/all_tests.nim
@@ -25,6 +25,7 @@ import
   test_keyed_queue,
   test_sorted_set,
   test_interval_set,
+  test_intops,
   test_macros,
   test_objects,
   test_ptrops,
diff --git a/tests/test_intops.nim b/tests/test_intops.nim
new file mode 100644
index 0000000..077a61b
--- /dev/null
+++ b/tests/test_intops.nim
@@ -0,0 +1,68 @@
+import unittest2
+
+import ../stew/intops
+
+template testAddOverflow[T: SomeUnsignedInt]() =
+  doAssert addOverflow(T.low, T.low) == (T.low, false)
+  doAssert addOverflow(T.high, T.low) == (T.high, false)
+  doAssert addOverflow(T.low, T.high) == (T.high, false)
+
+  doAssert addOverflow(T.high, T.high) == (T.high - 1, true)
+
+  doAssert addOverflow(T.high, T(0), false) == (T.high, false)
+  doAssert addOverflow(T.high, T(0), true) == (T(0), true)
+  doAssert addOverflow(T.high, T.high, true) == (T.high, true)
+
+template testSubOverflow[T: SomeUnsignedInt]() =
+  doAssert subOverflow(T.low, T.low) == (T.low, false)
+  doAssert subOverflow(T.high, T.low) == (T.high, false)
+  doAssert subOverflow(T.high, T.high) == (T.low, false)
+
+  doAssert subOverflow(T.low, T.high) == (T(1), true)
+
+  doAssert subOverflow(T.high, T.high, false) == (T(0), false)
+  doAssert subOverflow(T.high, T.high, true) == (T.high, true)
+
+template testAddOverflow() =
+  testAddOverflow[uint8]()
+  testAddOverflow[uint16]()
+  testAddOverflow[uint32]()
+  testAddOverflow[uint64]()
+  testAddOverflow[uint]()
+
+template testSubOverflow() =
+  testSubOverflow[uint8]()
+  testSubOverflow[uint16]()
+  testSubOverflow[uint32]()
+  testSubOverflow[uint64]()
+  testSubOverflow[uint]()
+
+template testMulWiden[T: SomeUnsignedInt]() =
+  doAssert mulWiden(T.low, T.low) == (T.low, T.low)
+  doAssert mulWiden(T(2), T(2)) == (T(4), T(0))
+  doAssert mulWiden(T.high, T(1)) == (T.high, T(0))
+  doAssert mulWiden(T(1), T.high) == (T.high, T(0))
+  doAssert mulWiden(T.high, T.high) == (T(1), T.high - 1)
+
+  doAssert mulWiden(T.high, T.high, T(0)) == (T(1), T.high - 1)
+  doAssert mulWiden(T.high, T.high, T.high) == (T(0), T.high)
+
+# TODO testMulOverflow
+
+template testMulWiden() =
+  testMulWiden[uint8]()
+  testMulWiden[uint16]()
+  testMulWiden[uint32]()
+  testMulWiden[uint64]()
+  testMulWiden[uint]()
+
+template test() =
+  testAddOverflow()
+  testSubOverflow()
+  testMulWiden()
+
+static: test()
+
+suite "intops":
+  test "test":
+    test()