diff --git a/constantine/private/word_types_internal.nim b/constantine/private/word_types_internal.nim index a1c4219..bd294a1 100644 --- a/constantine/private/word_types_internal.nim +++ b/constantine/private/word_types_internal.nim @@ -30,11 +30,12 @@ func asm_x86_64_extMul(hi, lo: var uint64, a, b: uint64) {.inline.}= # - High word in RDX # - Low word in RAX + # Don't forget to dereference the var hidden pointer in hi/lo asm """ mulq %[operand] - : "=d" (`*hi`), "=a" (`*lo`) // Don't forget to dereference the var hidden pointer + : "=d" (`*hi`), "=a" (`*lo`) : "a" (`a`), [operand] "rm" (`b`) - : // no clobbered registers + : """ func unsafeExtendedPrecMul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.}= @@ -75,11 +76,16 @@ func asm_x86_64_div2n1n(q, r: var uint64, n_hi, n_lo, d: uint64) {.inline.}= # Result # - Quotient in RAX # - Remainder in RDX + + # 1. name the register/memory "divisor" + # 2. don't forget to dereference the var hidden pointer + # 3. - + # 4. no clobbered registers beside explectly used RAX and RDX asm """ - divq %[divisor] // We name the register/memory divisor - : "=a" (`*q`), "=d" (`*r`) // Don't forget to dereference the var hidden pointer + divq %[divisor] + : "=a" (`*q`), "=d" (`*r`) : "d" (`n_hi`), "a" (`n_lo`), [divisor] "rm" (`d`) - : // no register clobbered besides explicitly used RAX and RDX + : """ func unsafeDiv2n1n*(q, r: var Ct[uint64], n_hi, n_lo, d: Ct[uint64]) {.inline.}= diff --git a/constantine/word_types.nim b/constantine/word_types.nim index 6166155..ea85c6f 100644 --- a/constantine/word_types.nim +++ b/constantine/word_types.nim @@ -126,16 +126,18 @@ func `or`*(x, y: CTBool): CTBool {.magic: "BitorI".} template mux*[T: Ct](ctl: CTBool[T], x, y: T): T = ## Multiplexer / selector - ## Returns x if ctl == 1 + ## Returns x if ctl is true ## else returns y ## So equivalent to ctl? x: y y xor (-T(ctl) and (x xor y)) # TODO verify assembly generated - # as mentionned in https://cryptocoding.net/index.php/Coding_rules + # as mentioned in https://cryptocoding.net/index.php/Coding_rules # the alternative `(x and ctl) or (y and -ctl)` # is optimized into a branch by Clang :/ + # TODO: assembly fastpath for conditional mov + func noteq[T: Ct](x, y: T): CTBool[T] = const msb = T.sizeof * 8 - 1 let z = x xor y