From 5980477e0bf36677318da8f008283641d8ca8e27 Mon Sep 17 00:00:00 2001
From: mratsim <mamy_github@numforge.co>
Date: Mon, 8 Oct 2018 14:49:36 +0200
Subject: [PATCH] Add a description and why choosing a recursive impl for Stint

---
 stint/private/datatypes.nim | 143 +++++++++++++++++++++++++-----------
 1 file changed, 102 insertions(+), 41 deletions(-)

diff --git a/stint/private/datatypes.nim b/stint/private/datatypes.nim
index e9302e3..b579d0a 100644
--- a/stint/private/datatypes.nim
+++ b/stint/private/datatypes.nim
@@ -12,46 +12,71 @@
 import macros
 # The macro uintImpl must be exported
 
-when defined(stint_test):
+# #### Overview
+#
+# Stint extends the default uint8, uint16, uint32, uint64 with power of 2 integers.
+# Only limitation is your stack size so you can have uint128, uint256, uint512 ...
+# Signed int are also possible.
+#
+# As a high-level API, Stint adheres to Nim and C conventions and uses the same operators like:
+# `+`, `xor`, `not` ...
+#
+# #### Implementation
+#
+# Stint types are stored on the stack and have a structure
+# similar to a binary tree of power of two unsigned integers
+# with "high" and "low" words:
+#
+#                              Stuint[256]
+#            hi: Stuint[128]                  lo: Stuint[128]
+#     hihi: uint64    hilo: uint64    lohi: uint64    lolo: uint64
+#
+# This follows paper https://hal.archives-ouvertes.fr/hal-00582593v2
+# "Recursive double-size fixed precision arithmetic" from Jul. 2016
+# to implement an efficient fixed precision bigint for embedded devices, especially FPGAs.
+#
+# For testing purpose, the flag `-d:stint_test` can be passed at compile-time
+# to switch the backend to uint32.
+# In the future the default backend will become uint128 on supporting compilers.
+#
+# This has following benefits:
+#   - BigEndian/LittleEndian support is trivial.
+#   - Not having for loops help the compiler producing the most efficient instructions
+#     like ADC (Add with Carry)
+#   - Proving that the recursive structure works at depth 64 for uint32 backend means that
+#     it would work at depth 128 for uint64 backend.
+#     We can easily choose a uint16 or uint8 backend as well.
+#   - Due to the recursive structure, testing operations when there is:
+#       - no leaves(uint64)
+#       - a root and leaves with no nodes (uint128)
+#       - a root + intermediate nodes + leaves (uint256)
+#     should be enough to ensure they work at all sizes, edge cases included.
+#   - Adding a new backend like uint128 (GCC/Clang) or uint256 (LLVM instrinsics only) is just adding
+#     a new case in the `uintImpl` macro.
+#   - All math implementations of the operations have a straightforward translation
+#     to a high-low structure, including the fastest Karatsuba multiplication
+#     and co-recursive division algorithm by Burnikel and Ziegler.
+#     This makes translating those algorithms into Nim easier compared to an array backend.
+#     It would also probably require less code and would be much easier to audit versus
+#     the math reference papers.
+#   - For implementation of algorithms, there is no issue to take subslices of the memory representation
+#     with a recursive tree structure.
+#     On the other side, returning a `var array[N div 2, uint64]` is problematic at the moment.
+#   - Compile-time computation is possible while due to the previous issue
+#     an array backend would be required to use var openarray[uint64]
+#     i.e. pointers.
+#   - Note that while shift-right and left can easily be done an array of bytes
+#     this would have reduced performance compared to moving 64-bit words.
+#     An efficient implementation on array of words would require checking the shift
+#     versus a half-word to deal with carry-in/out from and to the adjacent words
+#     similar to a recursive implementation.
+#
+# Iterations over the whole integers, for example for `==` is always unrolled.
+# Due to being on the stack, any optimizing compiler should compile that to efficient memcmp
+
+when not defined(stint_test):
   macro uintImpl*(bits: static[int]): untyped =
-    # Test version, StUint[64] = 2 uint32. Test the logic of the library
-    assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
-    assert bits >= 16, "The number of bits in a should be greater or equal to 16"
-
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2))
-      result = newTree(nnkBracketExpr, ident("UintImpl"), inner)
-    elif bits == 64:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32"))
-    elif bits == 32:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16"))
-    elif bits == 16:
-      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8"))
-    else:
-      error "Fatal: unreachable"
-
-  macro intImpl*(bits: static[int]): untyped =
-    # Test version, StInt[64] = 2 uint32. Test the logic of the library
-    # Note that ints are implemented in terms of unsigned ints
-    # Signed operations will be built on top of that.
-    assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
-    assert bits >= 16, "The number of bits in a should be greater or equal to 16"
-
-    if bits >= 128:
-      let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl
-      result = newTree(nnkBracketExpr, ident("IntImpl"), inner)
-    elif bits == 64:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32"))
-    elif bits == 32:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16"))
-    elif bits == 16:
-      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8"))
-    else:
-      error "Fatal: unreachable"
-
-else:
-  macro uintImpl*(bits: static[int]): untyped =
-    # Release version, StUint[64] = uint64.
+    # Release version, word size is uint64 (even on 32-bit arch).
     assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
     assert bits >= 8, "The number of bits in a should be greater or equal to 8"
 
@@ -70,9 +95,9 @@ else:
       error "Fatal: unreachable"
 
   macro intImpl*(bits: static[int]): untyped =
-    # Release version, StInt[64] = int64.
+    # Release version, word size is uint64 (even on 32-bit arch).
     # Note that int of size 128+ are implemented in terms of unsigned ints
-    # Signed operations will be built on top of that.
+    # Signed operations are built on top of that.
 
     if bits >= 128:
       let inner = getAST(uintImpl(bits div 2))
@@ -87,6 +112,42 @@ else:
       result = ident("int8")
     else:
       error "Fatal: unreachable"
+else:
+  macro uintImpl*(bits: static[int]): untyped =
+    # Test version, word size is uint32. Test the logic of the library.
+    assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
+    assert bits >= 16, "The number of bits in a should be greater or equal to 16"
+
+    if bits >= 128:
+      let inner = getAST(uintImpl(bits div 2))
+      result = newTree(nnkBracketExpr, ident("UintImpl"), inner)
+    elif bits == 64:
+      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32"))
+    elif bits == 32:
+      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16"))
+    elif bits == 16:
+      result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8"))
+    else:
+      error "Fatal: unreachable"
+
+  macro intImpl*(bits: static[int]): untyped =
+    # Test version, word size is uint32. Test the logic of the library.
+    # Note that ints are implemented in terms of unsigned ints
+    # Signed operations will be built on top of that.
+    assert (bits and (bits-1)) == 0, $bits & " is not a power of 2"
+    assert bits >= 16, "The number of bits in a should be greater or equal to 16"
+
+    if bits >= 128:
+      let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl
+      result = newTree(nnkBracketExpr, ident("IntImpl"), inner)
+    elif bits == 64:
+      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32"))
+    elif bits == 32:
+      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16"))
+    elif bits == 16:
+      result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8"))
+    else:
+      error "Fatal: unreachable"
 
 proc getSize*(x: NimNode): static[int] =
   # Default Nim's `sizeof` doesn't always work at compile-time, pending PR https://github.com/nim-lang/Nim/pull/5664