# Constantine # Copyright (c) 2018-2019 Status Research & Development GmbH # Copyright (c) 2020-Present Mamy André-Ratsimbazafy # Licensed and distributed under either of # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. import ../config/common, ../primitives when UseASM_X86_32: import ./assembly/limbs_asm_x86 when UseASM_X86_64: import ./assembly/limbs_asm_mul_x86 import ./assembly/limbs_asm_mul_x86_adx_bmi2 # ############################################################ # # Limbs raw representation and operations # # ############################################################ # # This file holds the raw operations done on big ints # The representation is optimized for: # - constant-time (not leaking secret data via side-channel) # - performance # - generated code size, datatype size and stack usage # in this order # # The "limbs" API limits code duplication # due to generic/static monomorphization for bit-width # that are represented with the same number of words. # # It also exposes at the number of words to the compiler # to allow aggressive unrolling and inlining for example # of multi-precision addition which is so small (2 instructions per word) # that inlining it improves both performance and code-size # even for 2 curves (secp256k1 and BN254) that could share the code. # # The limb-endianess is little-endian, less significant limb is at index 0. # The word-endianness is native-endian. # No exceptions allowed {.push raises: [].} # ############################################################ # # Limbs Primitives # # ############################################################ {.push inline.} # The following primitives are small enough on regular limb sizes # (BN254 and secp256k1 -> 4 limbs, BLS12-381 -> 6 limbs) # that inline both decreases the code size and increases speed # as we avoid the parmeter packing/unpacking ceremony at function entry/exit # and unrolling overhead is minimal. # Initialization # ------------------------------------------------------------ func setZero*(a: var Limbs) = ## Set ``a`` to 0 zeroMem(a[0].addr, sizeof(a)) func setOne*(a: var Limbs) = ## Set ``a`` to 1 a[0] = SecretWord(1) when a.len > 1: zeroMem(a[1].addr, (a.len - 1) * sizeof(SecretWord)) # Copy # ------------------------------------------------------------ func ccopy*(a: var Limbs, b: Limbs, ctl: SecretBool) = ## Constant-time conditional copy ## If ctl is true: b is copied into a ## if ctl is false: b is not copied and a is untouched ## Time and memory accesses are the same whether a copy occurs or not when UseASM_X86_32: ccopy_asm(a, b, ctl) else: for i in 0 ..< a.len: ctl.ccopy(a[i], b[i]) func cswap*(a, b: var Limbs, ctl: CTBool) = ## Swap ``a`` and ``b`` if ``ctl`` is true ## ## Constant-time: ## Whether ``ctl`` is true or not, the same ## memory accesses are done (unless the compiler tries to be clever) var mask = -(SecretWord ctl) for i in 0 ..< a.len: let t = mask and (a[i] xor b[i]) a[i] = a[i] xor t b[i] = b[i] xor t # Comparison # ------------------------------------------------------------ func `==`*(a, b: Limbs): SecretBool = ## Returns true if 2 limbs are equal ## Comparison is constant-time var accum = Zero for i in 0 ..< a.len: accum = accum or (a[i] xor b[i]) result = accum.isZero() func `<`*(a, b: Limbs): SecretBool = ## Returns true if a < b ## Comparison is constant-time var diff: SecretWord var borrow: Borrow for i in 0 ..< a.len: subB(borrow, diff, a[i], b[i], borrow) result = (SecretBool)(borrow) func `<=`*(a, b: Limbs): SecretBool = ## Returns true if a <= b ## Comparison is constant-time not(b < a) func isZero*(a: Limbs): SecretBool = ## Returns true if ``a`` is equal to zero var accum = Zero for i in 0 ..< a.len: accum = accum or a[i] result = accum.isZero() func eq*(a: Limbs, n: SecretWord): SecretBool = ## Returns true if ``a`` is equal ## to the specified small word result = a[0] == n for i in 1 ..< a.len: result = result and a[i].isZero() func isOne*(a: Limbs): SecretBool = ## Returns true if ``a`` is equal to one a.eq(SecretWord(1)) func isOdd*(a: Limbs): SecretBool = ## Returns true if a is odd SecretBool(a[0] and SecretWord(1)) func isEven*(a: Limbs): SecretBool = ## Returns true if a is even not SecretBool(a[0] and SecretWord(1)) # Bit manipulation # ------------------------------------------------------------ func shiftRight*(a: var Limbs, k: int) {.inline.}= ## Shift right by k. ## ## k MUST be less than the base word size (2^32 or 2^64) # We don't reuse shr as this is an in-place operation # Do we need to return the shifted out part? # # Note: for speed, loading a[i] and a[i+1] # instead of a[i-1] and a[i] # is probably easier to parallelize for the compiler # (antidependence WAR vs loop-carried dependence RAW) # checkWordShift(k) for i in 0 ..< a.len-1: a[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k)) a[a.len-1] = a[a.len-1] shr k # Basic Arithmetic # ------------------------------------------------------------ func add*(a: var Limbs, b: Limbs): Carry = ## Limbs addition ## Returns the carry when UseASM_X86_32: result = add_asm(a, a, b) else: result = Carry(0) for i in 0 ..< a.len: addC(result, a[i], a[i], b[i], result) func add*(a: var Limbs, w: SecretWord): Carry = ## Limbs addition, add a number that fits in a word ## Returns the carry result = Carry(0) addC(result, a[0], a[0], w, result) for i in 1 ..< a.len: addC(result, a[i], a[i], Zero, result) func sum*(r: var Limbs, a, b: Limbs): Carry = ## Sum `a` and `b` into `r` ## `r` is initialized/overwritten ## ## Returns the carry when UseASM_X86_32: result = add_asm(r, a, b) else: result = Carry(0) for i in 0 ..< a.len: addC(result, r[i], a[i], b[i], result) func sub*(a: var Limbs, b: Limbs): Borrow = ## Limbs substraction ## Returns the borrow when UseASM_X86_32: result = sub_asm(a, a, b) else: result = Borrow(0) for i in 0 ..< a.len: subB(result, a[i], a[i], b[i], result) func sub*(a: var Limbs, w: SecretWord): Borrow = ## Limbs substraction, sub a number that fits in a word ## Returns the borrow result = Borrow(0) subB(result, a[0], a[0], w, result) for i in 1 ..< a.len: subB(result, a[i], a[i], Zero, result) func diff*(r: var Limbs, a, b: Limbs): Borrow = ## Diff `a` and `b` into `r` ## `r` is initialized/overwritten ## ## Returns the borrow when UseASM_X86_32: result = sub_asm(r, a, b) else: result = Borrow(0) for i in 0 ..< a.len: subB(result, r[i], a[i], b[i], result) # Conditional arithmetic # ------------------------------------------------------------ func cadd*(a: var Limbs, b: Limbs, ctl: SecretBool): Carry = ## Limbs conditional addition ## Returns the carry ## ## if ctl is true: a <- a + b ## if ctl is false: a <- a ## The carry is always computed whether ctl is true or false ## ## Time and memory accesses are the same whether a copy occurs or not result = Carry(0) var sum: SecretWord for i in 0 ..< a.len: addC(result, sum, a[i], b[i], result) ctl.ccopy(a[i], sum) func cadd*(a: var Limbs, w: SecretWord, ctl: SecretBool): Borrow = ## Limbs conditional addition, sub a number that fits in a word ## Returns the borrow result = Carry(0) var diff: SecretWord addC(result, diff, a[0], w, result) ctl.ccopy(a[0], diff) for i in 1 ..< a.len: addC(result, diff, a[i], Zero, result) ctl.ccopy(a[i], diff) func csub*(a: var Limbs, b: Limbs, ctl: SecretBool): Borrow = ## Limbs conditional substraction ## Returns the borrow ## ## if ctl is true: a <- a - b ## if ctl is false: a <- a ## The borrow is always computed whether ctl is true or false ## ## Time and memory accesses are the same whether a copy occurs or not result = Borrow(0) var diff: SecretWord for i in 0 ..< a.len: subB(result, diff, a[i], b[i], result) ctl.ccopy(a[i], diff) func csub*(a: var Limbs, w: SecretWord, ctl: SecretBool): Borrow = ## Limbs conditional substraction, sub a number that fits in a word ## Returns the borrow result = Borrow(0) var diff: SecretWord subB(result, diff, a[0], w, result) ctl.ccopy(a[0], diff) for i in 1 ..< a.len: subB(result, diff, a[i], Zero, result) ctl.ccopy(a[i], diff) func cneg*(a: var Limbs, ctl: CTBool) = ## Conditional negation. ## Negate if ``ctl`` is true # Algorithm: # In two-complement representation # -x <=> not(x) + 1 <=> x xor 0xFF... + 1 # and # x <=> x xor 0x00...<=> x xor 0x00... + 0 # # So we need to xor all words and then add 1 # The "+1" might carry # So we fuse the 2 steps let mask = -SecretWord(ctl) # Obtain a 0xFF... or 0x00... mask var carry = SecretWord(ctl) for i in 0 ..< a.len: let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively carry = SecretWord(t < carry) # Carry on a[i] = t {.pop.} # inline # Multiplication # ------------------------------------------------------------ func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) = ## Multi-precision multiplication ## r <- a*b ## ## `a`, `b`, `r` can have a different number of limbs ## if `r`.limbs.len < a.limbs.len + b.limbs.len ## The result will be truncated, i.e. it will be ## a * b (mod (2^WordBitwidth)^r.limbs.len) ## ## `r` must not alias ``a`` or ``b`` when UseASM_X86_64 and aLen <= 6: if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()): mul_asm_adx_bmi2(r, a, b) else: mul_asm(r, a, b) elif UseASM_X86_64: mul_asm(r, a, b) else: # We use Product Scanning / Comba multiplication var t, u, v = SecretWord(0) staticFor i, 0, min(a.len+b.len, r.len): const ib = min(b.len-1, i) const ia = i - ib staticFor j, 0, min(a.len - ia, ib+1): mulAcc(t, u, v, a[ia+j], b[ib-j]) r[i] = v v = u u = t t = SecretWord(0) if aLen+bLen < rLen: for i in aLen+bLen ..< rLen: r[i] = SecretWord 0 func prod_high_words*[rLen, aLen, bLen]( r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen], lowestWordIndex: static int) = ## Multi-precision multiplication keeping only high words ## r <- a*b >> (2^WordBitWidth)^lowestWordIndex ## ## `a`, `b`, `r` can have a different number of limbs ## if `r`.limbs.len < a.limbs.len + b.limbs.len - lowestWordIndex ## The result will be truncated, i.e. it will be ## a * b >> (2^WordBitWidth)^lowestWordIndex (mod (2^WordBitwidth)^r.limbs.len) # # This is useful for # - Barret reduction # - Approximating multiplication by a fractional constant in the form f(a) = K/C * a # with K and C known at compile-time. # We can instead find a well chosen M = (2^WordBitWidth)^w, with M > C (i.e. M is a power of 2 bigger than C) # Precompute P = K*M/C at compile-time # and at runtime do P*a/M <=> P*a >> (WordBitWidth*w) # i.e. prod_high_words(result, P, a, w) # We use Product Scanning / Comba multiplication var t, u, v = SecretWord(0) # Will raise warning on empty iterations var z: Limbs[rLen] # zero-init, ensure on stack and removes in-place problems # The previous 2 columns can affect the lowest word due to carries # but not the ones before (we accumulate in 3 words (t, u, v)) const w = lowestWordIndex - 2 staticFor i, max(0, w), min(a.len+b.len, r.len+lowestWordIndex): const ib = min(b.len-1, i) const ia = i - ib staticFor j, 0, min(a.len - ia, ib+1): mulAcc(t, u, v, a[ia+j], b[ib-j]) when i >= lowestWordIndex: z[i-lowestWordIndex] = v v = u u = t t = SecretWord(0) r = z {.pop.} # raises no exceptions