constantine/constantine/elliptic/ec_scalar_mul.nim

# Constantine
# Copyright (c) 2018-2019    Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
  ../primitives,
  ../config/[common, curves],
  ../arithmetic,
  ../towers,
  ./ec_weierstrass_projective,
  ./ec_endomorphism_accel

# ############################################################
#                                                            #
#                   Scalar Multiplication                    #
#                                                            #
# ############################################################
#
# Scalar multiplication is a key algorithm for cryptographic protocols:
# - it is slow,
# - it is performance critical as it is used to generate signatures and authenticate messages
# - it is a high-value target as the "scalar" is very often the user secret key
#
# A safe scalar multiplication MUST:
# - Use no branching (to prevent timing and simple power analysis attacks)
# - Always do the same memory accesses (in particular for table lookups) (to prevent cache-timing attacks)
# - Not expose the bitlength of the exponent (use the curve order bitlength instead)
#
# Constantine does not make an extra effort to defend against the smart-cards
# and embedded device attacks:
# - Differential Power-Analysis which may allow for example retrieving bit content depending on the cost of writing 0 or 1
#   (Address-bit DPA by Itoh, Izu and Takenaka)
# - Electro-Magnetic which can be used in a similar way to power analysis but based on EM waves
# - Fault Attacks which can be used by actively introducing faults (via a laser for example) in an algorithm
#
# The current security efforts are focused on preventing attacks
# that are effective remotely including through the network,
# a colocated VM or a malicious process on your phone.
#
# - Survey for Performance & Security Problems of Passive Side-channel Attacks     Countermeasures in ECC\
#   Rodrigo Abarúa, Claudio Valencia, and Julio López, 2019\
#   https://eprint.iacr.org/2019/010
#
# - State-of-the-art of secure ECC implementations:a survey on known side-channel attacks and countermeasures\
#   Junfeng Fan,XuGuo, Elke De Mulder, Patrick Schaumont, Bart Preneel and Ingrid Verbauwhede, 2010
#   https://www.esat.kuleuven.be/cosic/publications/article-1461.pdf

template checkScalarMulScratchspaceLen(len: int) =
  ## CHeck that there is a minimum of scratchspace to hold the temporaries
  debug:
    assert len >= 2, "Internal Error: the scratchspace for scalar multiplication should be equal or greater than 2"

func getWindowLen(bufLen: int): uint =
  ## Compute the maximum window size that fits in the scratchspace buffer
  checkScalarMulScratchspaceLen(bufLen)
  result = 4
  while (1 shl result) + 1 > bufLen:
    dec result

func scalarMulPrologue(
       P: var ECP_SWei_Proj,
       scratchspace: var openarray[ECP_SWei_Proj]
     ): uint =
  ## Setup the scratchspace
  ## Returns the fixed-window size for scalar mul with window optimization
  result = scratchspace.len.getWindowLen()
  # Precompute window content, special case for window = 1
  # (i.e scratchspace has only space for 2 temporaries)
  # The content scratchspace[2+k] is set at [k]P
  # with scratchspace[0] untouched
  if result == 1:
    scratchspace[1] = P
  else:
    scratchspace[2] = P
    for k in 2 ..< 1 shl result:
      scratchspace[k+1].sum(scratchspace[k], P)

  # Set a to infinity
  P.setInf()

func scalarMulDoubling(
       P: var ECP_SWei_Proj,
       exponent: openArray[byte],
       tmp: var ECP_SWei_Proj,
       window: uint,
       acc, acc_len: var uint,
       e: var int
     ): tuple[k, bits: uint] {.inline.} =
  ## Doubling steps of doubling and add for scalar multiplication
  ## Get the next k bits in range [1, window)
  ## and double k times
  ## Returns the number of doubling done and the corresponding bits.
  ##
  ## Updates iteration variables and accumulators
  #
  # ⚠️: Extreme care should be used to not leak
  #    the exponent bits nor its real bitlength
  #    i.e. if the exponent is zero but encoded in a
  #    256-bit integer, only "256" should leak
  #    as for most applications like ECDSA or BLS signature schemes
  #    the scalar is the user secret key.

  # Get the next bits
  # acc/acc_len must be uint to avoid Nim runtime checks leaking bits
  # e is public
  var k = window
  if acc_len < window:
    if e < exponent.len:
      acc = (acc shl 8) or exponent[e].uint
      inc e
      acc_len += 8
    else: # Drained all exponent bits
      k = acc_len

  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
  acc_len -= k

  # We have k bits and can do k doublings
  for i in 0 ..< k:
    tmp.double(P)
    P = tmp

  return (k, bits)


func scalarMulGeneric*(
       P: var ECP_SWei_Proj,
       scalar: openArray[byte],
       scratchspace: var openArray[ECP_SWei_Proj]
     ) =
  ## Elliptic Curve Scalar Multiplication
  ##
  ##   P <- [k] P
  ##
  ## This uses fixed-window optimization if possible
  ## `scratchspace` MUST be of size 2 .. 2^4
  ##
  ## This is suitable to use with secret `scalar`, in particular
  ## to derive a public key from a private key or
  ## to sign a message.
  ##
  ## Particular care has been given to defend against the following side-channel attacks:
  ## - timing attacks: all exponents of the same length
  ##   will take the same time including
  ##   a "zero" exponent of length 256-bit
  ## - cache-timing attacks: Constantine does use a precomputed table
  ##   but when extracting a value from the table
  ##   the whole table is always accessed with the same pattern
  ##   preventing malicious attacks through CPU cache delay analysis.
  ## - simple power-analysis and electromagnetic attacks: Constantine always do the same
  ##   double and add sequences and those cannot be analyzed to distinguish
  ##   the exponent 0 and 1.
  ##
  ## I.e. As far as the author know, Constantine implements all countermeasures to the known
  ##      **remote** attacks on ECC implementations.
  ##
  ## Disclaimer:
  ##   Constantine is provided as-is without any guarantees.
  ##   Use at your own risks.
  ##   Thorough evaluation of your threat model, the security of any cryptographic library you are considering,
  ##   and the secrets you put in jeopardy is strongly advised before putting data at risk.
  ##   The author would like to remind users that the best code can only mitigate
  ##   but not protect against human failures which are the weakest links and largest
  ##   backdoors to secrets exploited today.
  ##
  ## Constantine is resistant to
  ## - Fault Injection attacks: Constantine does not have branches that could
  ##   be used to skip some additions and reveal which were dummy and which were real.
  ##   Dummy operations are like the double-and-add-always timing attack countermeasure.
  ##
  ##
  ## Constantine DOES NOT defend against Address-Bit Differential Power Analysis attacks by default,
  ## which allow differentiating between writing a 0 or a 1 to a memory cell.
  ## This is a threat for smart-cards and embedded devices (for example to handle authentication to a cable or satellite service)
  ## Constantine can be extended to use randomized projective coordinates to foil this attack.

  let window = scalarMulPrologue(P, scratchspace)

  # We process bits with from most to least significant.
  # At each loop iteration with have acc_len bits in acc.
  # To maintain constant-time the number of iterations
  # or the number of operations or memory accesses should be the same
  # regardless of acc & acc_len
  var
    acc, acc_len: uint
    e = 0
  while acc_len > 0 or e < scalar.len:
    let (k, bits) = scalarMulDoubling(
      P, scalar, scratchspace[0],
      window, acc, acc_len, e
    )

    # Window lookup: we set scratchspace[1] to the lookup value
    # If the window length is 1 it's already set.
    if window > 1:
      # otherwise we need a constant-time lookup
      # in particular we need the same memory accesses, we can't
      # just index the openarray with the bits to avoid cache attacks.
      for i in 1 ..< 1 shl k:
        let ctl = SecretWord(i) == SecretWord(bits)
        scratchspace[1].ccopy(scratchspace[1+i], ctl)

    # Multiply with the looked-up value
    # we need to keep the product only ig the exponent bits are not all zeroes
    scratchspace[0].sum(P, scratchspace[1])
    P.ccopy(scratchspace[0], SecretWord(bits).isNonZero())

func scalarMul*(
       P: var ECP_SWei_Proj,
       scalar: BigInt
     ) {.inline.} =
  ## Elliptic Curve Scalar Multiplication
  ##
  ##   P <- [k] P
  # This calls endomorphism accelerated scalar mul if available
  # or the generic scalar mul otherwise
  when ECP_SWei_Proj.F.C in {BN254_Snarks, BLS12_381}:
    # ⚠️ This requires the cofactor to be cleared
    scalarMulGLV(P, scalar)
  else:
    var
      scratchSpace: array[1 shl 4, ECP_SWei_Proj]
      scalarCanonicalBE: array[(scalar.bits+7)div 8, byte] # canonical big endian representation
    scalarCanonicalBE.exportRawUint(scalar, bigEndian)   # Export is constant-time
    P.scalarMulGeneric(scratchSpace)