From 5980477e0bf36677318da8f008283641d8ca8e27 Mon Sep 17 00:00:00 2001 From: mratsim Date: Mon, 8 Oct 2018 14:49:36 +0200 Subject: [PATCH] Add a description and why choosing a recursive impl for Stint --- stint/private/datatypes.nim | 143 +++++++++++++++++++++++++----------- 1 file changed, 102 insertions(+), 41 deletions(-) diff --git a/stint/private/datatypes.nim b/stint/private/datatypes.nim index e9302e3..b579d0a 100644 --- a/stint/private/datatypes.nim +++ b/stint/private/datatypes.nim @@ -12,46 +12,71 @@ import macros # The macro uintImpl must be exported -when defined(stint_test): +# #### Overview +# +# Stint extends the default uint8, uint16, uint32, uint64 with power of 2 integers. +# Only limitation is your stack size so you can have uint128, uint256, uint512 ... +# Signed int are also possible. +# +# As a high-level API, Stint adheres to Nim and C conventions and uses the same operators like: +# `+`, `xor`, `not` ... +# +# #### Implementation +# +# Stint types are stored on the stack and have a structure +# similar to a binary tree of power of two unsigned integers +# with "high" and "low" words: +# +# Stuint[256] +# hi: Stuint[128] lo: Stuint[128] +# hihi: uint64 hilo: uint64 lohi: uint64 lolo: uint64 +# +# This follows paper https://hal.archives-ouvertes.fr/hal-00582593v2 +# "Recursive double-size fixed precision arithmetic" from Jul. 2016 +# to implement an efficient fixed precision bigint for embedded devices, especially FPGAs. +# +# For testing purpose, the flag `-d:stint_test` can be passed at compile-time +# to switch the backend to uint32. +# In the future the default backend will become uint128 on supporting compilers. +# +# This has following benefits: +# - BigEndian/LittleEndian support is trivial. +# - Not having for loops help the compiler producing the most efficient instructions +# like ADC (Add with Carry) +# - Proving that the recursive structure works at depth 64 for uint32 backend means that +# it would work at depth 128 for uint64 backend. +# We can easily choose a uint16 or uint8 backend as well. +# - Due to the recursive structure, testing operations when there is: +# - no leaves(uint64) +# - a root and leaves with no nodes (uint128) +# - a root + intermediate nodes + leaves (uint256) +# should be enough to ensure they work at all sizes, edge cases included. +# - Adding a new backend like uint128 (GCC/Clang) or uint256 (LLVM instrinsics only) is just adding +# a new case in the `uintImpl` macro. +# - All math implementations of the operations have a straightforward translation +# to a high-low structure, including the fastest Karatsuba multiplication +# and co-recursive division algorithm by Burnikel and Ziegler. +# This makes translating those algorithms into Nim easier compared to an array backend. +# It would also probably require less code and would be much easier to audit versus +# the math reference papers. +# - For implementation of algorithms, there is no issue to take subslices of the memory representation +# with a recursive tree structure. +# On the other side, returning a `var array[N div 2, uint64]` is problematic at the moment. +# - Compile-time computation is possible while due to the previous issue +# an array backend would be required to use var openarray[uint64] +# i.e. pointers. +# - Note that while shift-right and left can easily be done an array of bytes +# this would have reduced performance compared to moving 64-bit words. +# An efficient implementation on array of words would require checking the shift +# versus a half-word to deal with carry-in/out from and to the adjacent words +# similar to a recursive implementation. +# +# Iterations over the whole integers, for example for `==` is always unrolled. +# Due to being on the stack, any optimizing compiler should compile that to efficient memcmp + +when not defined(stint_test): macro uintImpl*(bits: static[int]): untyped = - # Test version, StUint[64] = 2 uint32. Test the logic of the library - assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" - assert bits >= 16, "The number of bits in a should be greater or equal to 16" - - if bits >= 128: - let inner = getAST(uintImpl(bits div 2)) - result = newTree(nnkBracketExpr, ident("UintImpl"), inner) - elif bits == 64: - result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32")) - elif bits == 32: - result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16")) - elif bits == 16: - result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8")) - else: - error "Fatal: unreachable" - - macro intImpl*(bits: static[int]): untyped = - # Test version, StInt[64] = 2 uint32. Test the logic of the library - # Note that ints are implemented in terms of unsigned ints - # Signed operations will be built on top of that. - assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" - assert bits >= 16, "The number of bits in a should be greater or equal to 16" - - if bits >= 128: - let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl - result = newTree(nnkBracketExpr, ident("IntImpl"), inner) - elif bits == 64: - result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32")) - elif bits == 32: - result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16")) - elif bits == 16: - result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8")) - else: - error "Fatal: unreachable" - -else: - macro uintImpl*(bits: static[int]): untyped = - # Release version, StUint[64] = uint64. + # Release version, word size is uint64 (even on 32-bit arch). assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" assert bits >= 8, "The number of bits in a should be greater or equal to 8" @@ -70,9 +95,9 @@ else: error "Fatal: unreachable" macro intImpl*(bits: static[int]): untyped = - # Release version, StInt[64] = int64. + # Release version, word size is uint64 (even on 32-bit arch). # Note that int of size 128+ are implemented in terms of unsigned ints - # Signed operations will be built on top of that. + # Signed operations are built on top of that. if bits >= 128: let inner = getAST(uintImpl(bits div 2)) @@ -87,6 +112,42 @@ else: result = ident("int8") else: error "Fatal: unreachable" +else: + macro uintImpl*(bits: static[int]): untyped = + # Test version, word size is uint32. Test the logic of the library. + assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" + assert bits >= 16, "The number of bits in a should be greater or equal to 16" + + if bits >= 128: + let inner = getAST(uintImpl(bits div 2)) + result = newTree(nnkBracketExpr, ident("UintImpl"), inner) + elif bits == 64: + result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32")) + elif bits == 32: + result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16")) + elif bits == 16: + result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8")) + else: + error "Fatal: unreachable" + + macro intImpl*(bits: static[int]): untyped = + # Test version, word size is uint32. Test the logic of the library. + # Note that ints are implemented in terms of unsigned ints + # Signed operations will be built on top of that. + assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" + assert bits >= 16, "The number of bits in a should be greater or equal to 16" + + if bits >= 128: + let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl + result = newTree(nnkBracketExpr, ident("IntImpl"), inner) + elif bits == 64: + result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32")) + elif bits == 32: + result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16")) + elif bits == 16: + result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8")) + else: + error "Fatal: unreachable" proc getSize*(x: NimNode): static[int] = # Default Nim's `sizeof` doesn't always work at compile-time, pending PR https://github.com/nim-lang/Nim/pull/5664