From 2a438653a8712e99dea8670f38f65852820f9f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mamy=20Andr=C3=A9-Ratsimbazafy?= Date: Tue, 11 Feb 2020 12:42:24 +0100 Subject: [PATCH] Don't emit useless inline C functions --- constantine/primitives.nim | 171 +++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 74 deletions(-) diff --git a/constantine/primitives.nim b/constantine/primitives.nim index a1adfed..53b61be 100644 --- a/constantine/primitives.nim +++ b/constantine/primitives.nim @@ -17,35 +17,80 @@ type Ct*[T: BaseUint] = distinct T - CTBool*[T: Ct] = distinct range[T(0)..T(1)] + CTBool*[T: Ct] = distinct T # range[T(0)..T(1)] ## To avoid the compiler replacing bitwise boolean operations ## by conditional branches, we don't use booleans. ## We use an int to prevent compiler "optimization" and introduction of branches + # Note, we could use "range" but then the codegen + # uses machine-sized signed integer types. + # signed types and machine-dependent words are undesired + # - we don't want compiler optimizing signed "undefined behavior" + # - Basic functions like BIgInt add/sub + # return and/or accept CTBool, we don't want them + # to require unnecessarily 8 bytes instead of 4 bytes + +# ############################################################ +# +# Bit hacks +# +# ############################################################ + +template isMsbSet*[T: Ct](x: T): CTBool[T] = + ## Returns the most significant bit of an integer + const msb_pos = T.sizeof * 8 - 1 + (CTBool[T])(x shr msb_pos) + +func log2*(x: uint32): uint32 = + ## Find the log base 2 of a 32-bit or less integer. + ## using De Bruijn multiplication + ## Works at compile-time, guaranteed constant-time. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, + 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] + var v = x + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + lookup[(v * 0x07C4ACDD'u32) shr 27] + +# ############################################################ +# +# Pragmas +# +# ############################################################ # No exceptions allowed {.push raises: [].} # Word primitives are inlined {.push inline.} -func ctrue*(T: typedesc[Ct or BaseUint]): auto = +# ############################################################ +# +# Constructors +# +# ############################################################ + +template ctrue*(T: typedesc[Ct or BaseUint]): auto = when T is Ct: (CTBool[T])(true) else: (CTBool[Ct[T]])(true) -func cfalse*(T: typedesc[Ct or BaseUint]): auto = +template cfalse*(T: typedesc[Ct or BaseUint]): auto = when T is Ct: (CTBool[T])(false) else: (CTBool[Ct[T]])(false) -func ct*[T: BaseUint](x: T): Ct[T] = +template ct*[T: BaseUint](x: T): Ct[T] = (Ct[T])(x) -func `$`*[T](x: Ct[T]): string = +template `$`*[T](x: Ct[T]): string = $T(x) -func `$`*(x: CTBool): string = +template `$`*(x: CTBool): string = $bool(x) # ############################################################ @@ -69,67 +114,47 @@ func `$`*(x: CTBool): string = # ################################################################# # Hard base borrows -# We should use {.borrow.} instead of {.magic.} but pending: +# We should use {.borrow.} instead: # - https://github.com/nim-lang/Nim/pull/8531 # - https://github.com/nim-lang/Nim/issues/4121 (can be workaround with #8531) -func high*(T: typedesc[Ct]): T = - not T(0) +template fmap[T: Ct](x: T, op: untyped, y: T): T = + ## Unwrap x and y from their distinct type + ## Apply op, and rewrap them + T(op(T.T(x), T.T(y))) -func `and`*[T: Ct](x, y: T): T {.magic: "BitandI".} -func `or`*[T: Ct](x, y: T): T {.magic: "BitorI".} -func `xor`*[T: Ct](x, y: T): T {.magic: "BitxorI".} -# func `not`*[T: Ct](x: T): T {.magic: "BitnotI".} # int128 changes broke the magic -template `not`*[T: Ct](x: T): T = - # Note: T.T is Ct.T is the conversion to the base type - T(not T.T(x)) +template fmapAsgn[T: Ct](x: T, op: untyped, y: T) = + ## Unwrap x and y from their distinct type + ## Apply assignment op, and rewrap them + op(T.T(x), T.T(y)) -func `+`*[T: Ct](x, y: T): T {.magic: "AddU".} -func `+=`*[T: Ct](x: var T, y: T) = - T.T(x) += (T.T)(y) -func `-`*[T: Ct](x, y: T): T {.magic: "SubU".} -func `-=`*[T: Ct](x: var T, y: T) = - T.T(x) -= (T.T)(y) -func `shr`*[T: Ct](x: T, y: SomeInteger): T {.magic: "ShrI".} -func `shl`*[T: Ct](x: T, y: SomeInteger): T {.magic: "ShlI".} +template `and`*[T: Ct](x, y: T): T = fmap(x, `and`, y) +template `or`*[T: Ct](x, y: T): T = fmap(x, `or`, y) +template `xor`*[T: Ct](x, y: T): T = fmap(x, `xor`, y) +template `not`*[T: Ct](x: T): T = T(not T.T(x)) +template `+`*[T: Ct](x, y: T): T = fmap(x, `+`, y) +template `+=`*[T: Ct](x: var T, y: T) = fmapAsgn(x, `+=`, y) +template `-`*[T: Ct](x, y: T): T = fmap(x, `-`, y) +template `-=`*[T: Ct](x: var T, y: T) = fmapAsgn(x, `-=`, y) +template `shr`*[T: Ct](x: T, y: SomeInteger): T = T(T.T(x) shr y) +template `shl`*[T: Ct](x: T, y: SomeInteger): T = T(T.T(x) shl y) -func `*`*[T: Ct](x, y: T): T {.magic: "MulU".} -# Warning ⚠️ : We assume that mul hardware multiplication is constant time -# but this is not always true, especially on ARMv7 and ARMv9 +template `*`*[T: Ct](x, y: T): T = + # Warning ⚠️ : We assume that mul hardware multiplication is constant time + # but this is not always true, especially on ARMv7 and ARMv9 + fmap(x, `*`, y) # We don't implement div/mod as we can't assume the hardware implementation # is constant-time -func `-`*(x: Ct): Ct = +template `-`*[T: Ct](x: T): T = ## Unary minus returns the two-complement representation ## of an unsigned integer - {.emit:"`result` = -`x`;".} - -# ############################################################ -# -# Bit hacks -# -# ############################################################ - -func isMsbSet*[T: Ct](x: T): CTBool[T] = - ## Returns the most significant bit of an integer - const msb_pos = T.sizeof * 8 - 1 - result = (CTBool[T])(x shr msb_pos) - -func log2*(x: uint32): uint32 = - ## Find the log base 2 of a 32-bit or less integer. - ## using De Bruijn multiplication - ## Works at compile-time, guaranteed constant-time. - # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn - const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, - 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] - var v = x - v = v or v shr 1 # first round down to one less than a power of 2 - v = v or v shr 2 - v = v or v shr 4 - v = v or v shr 8 - v = v or v shr 16 - lookup[(v * 0x07C4ACDD'u32) shr 27] + # We could use "not(x) + 1" but the codegen is not optimal + block: + var neg: T + {.emit:[neg, " = -", x, ";"].} + neg # ############################################################ # @@ -137,39 +162,36 @@ func log2*(x: uint32): uint32 = # # ############################################################ -template undistinct[T: Ct](x: CTBool[T]): T = - T(x) +template fmap[T: Ct](x: CTBool[T], op: untyped, y: CTBool[T]): CTBool[T] = + CTBool[T](op(T(x), T(y))) -func `not`*(ctl: CTBool): CTBool = +template `not`*[T: Ct](ctl: CTBool[T]): CTBool[T] = ## Negate a constant-time boolean - (type result)(ctl.undistinct xor (type ctl.undistinct)(1)) + CTBool[T](T(ctl) xor T(1)) -func `and`*(x, y: CTBool): CTBool {.magic: "BitandI".} -func `or`*(x, y: CTBool): CTBool {.magic: "BitorI".} +template `and`*(x, y: CTBool): CTBool = fmap(x, `and`, y) +template `or`*(x, y: CTBool): CTBool = fmap(x, `or`, y) -func noteq[T: Ct](x, y: T): CTBool[T] = +template noteq[T: Ct](x, y: T): CTBool[T] = const msb = T.sizeof * 8 - 1 let z = x xor y - result = (type result)((z or -z) shr msb) + CTBool[T]((z or -z) shr msb) -func `==`*[T: Ct](x, y: T): CTBool[T] = +template `==`*[T: Ct](x, y: T): CTBool[T] = not(noteq(x, y)) -func `<`*[T: Ct](x, y: T): CTBool[T] = - result = isMsbSet( +template `<`*[T: Ct](x, y: T): CTBool[T] = + isMsbSet( x xor ( (x xor y) or ((x - y) xor y) ) ) -func `<=`*[T: Ct](x, y: T): CTBool[T] = +template `<=`*[T: Ct](x, y: T): CTBool[T] = not(y < x) -func `==`*(x, y: CTBool): CTBool = - (type result)(x.undistinct == y.undistinct) - -func `xor`*(x, y: CTBool): CTBool = - (type result)(x.undistinct.noteq(y.undistinct)) +template `xor`*[T: Ct](x, y: CTBool[T]): CTBool[T] = + CTBool[T](noteq(T(x), T(y))) template mux*[T: Ct](ctl: CTBool[T], x, y: T): T = ## Multiplexer / selector @@ -182,6 +204,7 @@ template mux*[T: Ct](ctl: CTBool[T], x, y: T): T = # as mentioned in https://cryptocoding.net/index.php/Coding_rules # the alternative `(x and ctl) or (y and -ctl)` # is optimized into a branch by Clang :/ + # See also: https://www.cl.cam.ac.uk/~rja14/Papers/whatyouc.pdf # TODO: assembly fastpath for conditional mov @@ -211,10 +234,10 @@ template trmFixSystemNotEq*{x != y}[T: Ct](x, y: T): CTBool[T] = # # ############################################################ -func isNonZero*[T: Ct](x: T): CTBool[T] = +template isNonZero*[T: Ct](x: T): CTBool[T] = isMsbSet(x or -x) -func isZero*[T: Ct](x: T): CTBool[T] = +template isZero*[T: Ct](x: T): CTBool[T] = not x.isNonZero # ############################################################