# Stint # Copyright 2018 Status Research & Development GmbH # Licensed under either of # # * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) # * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) # # at your option. This file may not be copied, modified, or distributed except according to those terms. # TODO: test if GCC/Clang support uint128 natively import macros # The macro uintImpl must be exported # #### Overview # # Stint extends the default uint8, uint16, uint32, uint64 with power of 2 integers. # Only limitation is your stack size so you can have uint128, uint256, uint512 ... # Signed int are also possible. # # As a high-level API, Stint adheres to Nim and C conventions and uses the same operators like: # `+`, `xor`, `not` ... # # #### Implementation # # Stint types are stored on the stack and have a structure # similar to a binary tree of power of two unsigned integers # with "high" and "low" words: # # Stuint[256] # hi: Stuint[128] lo: Stuint[128] # hihi: uint64 hilo: uint64 lohi: uint64 lolo: uint64 # # This follows paper https://hal.archives-ouvertes.fr/hal-00582593v2 # "Recursive double-size fixed precision arithmetic" from Jul. 2016 # to implement an efficient fixed precision bigint for embedded devices, especially FPGAs. # # For testing purpose, the flag `-d:stint_test` can be passed at compile-time # to switch the backend to uint32. # In the future the default backend will become uint128 on supporting compilers. # # This has following benefits: # - BigEndian/LittleEndian support is trivial. # - Not having for loops help the compiler producing the most efficient instructions # like ADC (Add with Carry) # - Proving that the recursive structure works at depth 64 for uint32 backend means that # it would work at depth 128 for uint64 backend. # We can easily choose a uint16 or uint8 backend as well. # - Due to the recursive structure, testing operations when there is: # - no leaves(uint64) # - a root and leaves with no nodes (uint128) # - a root + intermediate nodes + leaves (uint256) # should be enough to ensure they work at all sizes, edge cases included. # - Adding a new backend like uint128 (GCC/Clang) or uint256 (LLVM instrinsics only) is just adding # a new case in the `uintImpl` macro. # - All math implementations of the operations have a straightforward translation # to a high-low structure, including the fastest Karatsuba multiplication # and co-recursive division algorithm by Burnikel and Ziegler. # This makes translating those algorithms into Nim easier compared to an array backend. # It would also probably require less code and would be much easier to audit versus # the math reference papers. # - For implementation of algorithms, there is no issue to take subslices of the memory representation # with a recursive tree structure. # On the other side, returning a `var array[N div 2, uint64]` is problematic at the moment. # - Compile-time computation is possible while due to the previous issue # an array backend would be required to use var openarray[uint64] # i.e. pointers. # - Note that while shift-right and left can easily be done an array of bytes # this would have reduced performance compared to moving 64-bit words. # An efficient implementation on array of words would require checking the shift # versus a half-word to deal with carry-in/out from and to the adjacent words # similar to a recursive implementation. # # Iterations over the whole integers, for example for `==` is always unrolled. # Due to being on the stack, any optimizing compiler should compile that to efficient memcmp # # When not to use Stint: # # 1. Constant-time arithmetics # - Do not use Stint if you need side-channels resistance, # This requires to avoid all branches (i.e. no booleans) # 2. Arbitrary-precision with lots of small-values # - If you need arbitrary precision but most of the time you store mall values # you will waste a lot of memory unless you use an object variant of various Stint sizes. # type MyUint = object # case kind: int # of 0..64: uint64 # of 66..128: ref Stuint[128] # of 129..256: ref Stuint[256] # ... # # Note: if you work with huge size, you can allocate stints on the heap with # for example `type HeapInt8192 = ref Stint[8192]. # If you have a lot of computations and intermediate variables it's probably worthwhile # to explore creating an object pool to reuse the memory buffers. when not defined(stint_test): macro uintImpl*(bits: static[int]): untyped = # Release version, word size is uint64 (even on 32-bit arch). assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" assert bits >= 8, "The number of bits in a should be greater or equal to 8" if bits >= 128: let inner = getAST(uintImpl(bits div 2)) result = newTree(nnkBracketExpr, ident("UintImpl"), inner) elif bits == 64: result = ident("uint64") elif bits == 32: result = ident("uint32") elif bits == 16: result = ident("uint16") elif bits == 8: result = ident("uint8") else: error "Fatal: unreachable" macro intImpl*(bits: static[int]): untyped = # Release version, word size is uint64 (even on 32-bit arch). # Note that int of size 128+ are implemented in terms of unsigned ints # Signed operations are built on top of that. if bits >= 128: let inner = getAST(uintImpl(bits div 2)) result = newTree(nnkBracketExpr, ident("IntImpl"), inner) elif bits == 64: result = ident("int64") elif bits == 32: result = ident("int32") elif bits == 16: result = ident("int16") elif bits == 8: result = ident("int8") else: error "Fatal: unreachable" else: macro uintImpl*(bits: static[int]): untyped = # Test version, word size is uint32. Test the logic of the library. assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" assert bits >= 16, "The number of bits in a should be greater or equal to 16" if bits >= 128: let inner = getAST(uintImpl(bits div 2)) result = newTree(nnkBracketExpr, ident("UintImpl"), inner) elif bits == 64: result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint32")) elif bits == 32: result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint16")) elif bits == 16: result = newTree(nnkBracketExpr, ident("UintImpl"), ident("uint8")) else: error "Fatal: unreachable" macro intImpl*(bits: static[int]): untyped = # Test version, word size is uint32. Test the logic of the library. # Note that ints are implemented in terms of unsigned ints # Signed operations will be built on top of that. assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" assert bits >= 16, "The number of bits in a should be greater or equal to 16" if bits >= 128: let inner = getAST(uintImpl(bits div 2)) # IntImpl is built on top of UintImpl result = newTree(nnkBracketExpr, ident("IntImpl"), inner) elif bits == 64: result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint32")) elif bits == 32: result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint16")) elif bits == 16: result = newTree(nnkBracketExpr, ident("IntImpl"), ident("uint8")) else: error "Fatal: unreachable" proc getSize*(x: NimNode): static[int] = # Default Nim's `sizeof` doesn't always work at compile-time, pending PR https://github.com/nim-lang/Nim/pull/5664 var multiplier = 1 var node = x.getTypeInst while node.kind == nnkBracketExpr: assert eqIdent(node[0], "UintImpl") or eqIdent(node[0], "IntImpl"), ( "getSize only supports primitive integers, Stint and Stuint") multiplier *= 2 node = node[1] # node[1] has the type # size(node[1]) * multiplier is the size in byte # For optimization we cast to the biggest possible uint result = if eqIdent(node, "uint64") or eqIdent(node, "int64"): multiplier * 64 elif eqIdent(node, "uint32") or eqIdent(node, "int32"): multiplier * 32 elif eqIdent(node, "uint16") or eqIdent(node, "int16"): multiplier * 16 elif eqIdent(node, "uint8") or eqIdent(node, "int8"): multiplier * 8 elif eqIdent(node, "int") or eqIdent(node, "uint"): multiplier * 8 * sizeof(int) else: assert false, "Error when computing the size. Found: " & $node 0 macro getSize*(x: typed): untyped = let size = getSize(x) result = quote do: `size` type # ### Private ### # BaseUint* = UintImpl or SomeUnsignedInt UintImpl*[Baseuint] = object when system.cpuEndian == littleEndian: lo*, hi*: BaseUint else: hi*, lo*: BaseUint IntImpl*[Baseuint] = object # Ints are implemented in terms of uints when system.cpuEndian == littleEndian: lo*, hi*: BaseUint else: hi*, lo*: BaseUint # ### Private ### # StUint*[bits: static[int]] = object data*: uintImpl(bits) StInt*[bits: static[int]] = object data*: intImpl(bits)