constantine/constantine/math/extension_fields/towers.nim

# Constantine
# Copyright (c) 2018-2019    Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
  ../../platforms/abstractions,
  ../config/curves,
  ../arithmetic,
  ../io/io_fields

export Fp

when UseASM_X86_64:
  import
    ./assembly/fp2_asm_x86_adx_bmi2

# Note: to avoid burdening the Nim compiler, we rely on generic extension
# to complain if the base field procedures don't exist

# No exceptions allowed
{.push raises: [].}

# ############################################################
#                                                            #
#                    Extension Fields                        #
#                                                            #
# ############################################################
{.push inline.}

type
  NonResidue* = object
    ## Non-Residue
    ##
    ## Placeholder for the appropriate quadratic, cubic or sectic non-residue

  QuadraticExt*[F] = object
    ## Quadratic Extension field
    coords*: array[2, F]

  CubicExt*[F] = object
    ## Cubic Extension field
    coords*: array[3, F]

  ExtensionField*[F] = QuadraticExt[F] or CubicExt[F]

  Fp2*[C: static Curve] =
    QuadraticExt[Fp[C]]

  Fp4*[C: static Curve] =
    QuadraticExt[Fp2[C]]

  Fp6*[C: static Curve] =
    CubicExt[Fp2[C]]

  Fp12*[C: static Curve] =
    CubicExt[Fp4[C]]
    # QuadraticExt[Fp6[C]]

template c0*(a: ExtensionField): auto =
  a.coords[0]
template c1*(a: ExtensionField): auto =
  a.coords[1]
template c2*(a: CubicExt): auto =
  a.coords[2]

template `c0=`*(a: var ExtensionField, v: auto) =
  a.coords[0] = v
template `c1=`*(a: var ExtensionField, v: auto) =
  a.coords[1] = v
template `c2=`*(a: var CubicExt, v: auto) =
  a.coords[2] = v

template C*(E: type ExtensionField): Curve =
  E.F.C

template fieldMod*(E: type ExtensionField): auto =
  Mod(E.F.C)

# Initialization
# -------------------------------------------------------------------

func setZero*(a: var ExtensionField) =
  ## Set ``a`` to 0 in the extension field
  staticFor i, 0, a.coords.len:
    a.coords[i].setZero()

func setOne*(a: var ExtensionField) =
  ## Set ``a`` to 1 in the extension field
  a.coords[0].setOne()
  staticFor i, 1, a.coords.len:
    a.coords[i].setZero()

func fromBig*(a: var ExtensionField, src: BigInt) =
  ## Set ``a`` to the bigint value in the extension field
  a.coords[0].fromBig(src)
  staticFor i, 1, a.coords.len:
    a.coords[i].setZero()

# Comparison
# -------------------------------------------------------------------

func `==`*(a, b: ExtensionField): SecretBool =
  ## Constant-time equality check
  result = CtTrue
  staticFor i, 0, a.coords.len:
    result = result and (a.coords[i] == b.coords[i])

func isZero*(a: ExtensionField): SecretBool =
  ## Constant-time check if zero
  result = CtTrue
  staticFor i, 0, a.coords.len:
    result = result and a.coords[i].isZero()

func isOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if one
  result = CtTrue
  result = result and a.coords[0].isOne()
  staticFor i, 1, a.coords.len:
    result = result and a.coords[i].isZero()

func isMinusOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if -1
  result = CtTrue
  result = result and a.coords[0].isMinusOne()
  staticFor i, 1, a.coords.len:
    result = result and a.coords[i].isZero()

# Copies
# -------------------------------------------------------------------

func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time conditional copy
  ## If ctl is true: b is copied into a
  ## if ctl is false: b is not copied and a is unmodified
  ## Time and memory accesses are the same whether a copy occurs or not
  staticFor i, 0, a.coords.len:
    a.coords[i].ccopy(b.coords[i], ctl)

# Abelian group
# -------------------------------------------------------------------

func neg*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place negation
  staticFor i, 0, a.coords.len:
    r.coords[i].neg(a.coords[i])

func neg*(a: var ExtensionField) =
  ## Field in-place negation
  staticFor i, 0, a.coords.len:
    a.coords[i].neg()

func `+=`*(a: var ExtensionField, b: ExtensionField) =
  ## Addition in the extension field
  staticFor i, 0, a.coords.len:
    a.coords[i] += b.coords[i]

func `-=`*(a: var ExtensionField, b: ExtensionField) =
  ## Substraction in the extension field
  staticFor i, 0, a.coords.len:
    a.coords[i] -= b.coords[i]

func double*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place doubling
  staticFor i, 0, a.coords.len:
    r.coords[i].double(a.coords[i])

func double*(a: var ExtensionField) =
  ## Field in-place doubling
  staticFor i, 0, a.coords.len:
    a.coords[i].double()

func div2*(a: var ExtensionField) =
  ## Field in-place division by 2
  staticFor i, 0, a.coords.len:
    a.coords[i].div2()

func sum*(r: var ExtensionField, a, b: ExtensionField) =
  ## Sum ``a`` and ``b`` into ``r``
  staticFor i, 0, a.coords.len:
    r.coords[i].sum(a.coords[i], b.coords[i])

func diff*(r: var ExtensionField, a, b: ExtensionField) =
  ## Diff ``a`` and ``b`` into ``r``
  staticFor i, 0, a.coords.len:
    r.coords[i].diff(a.coords[i], b.coords[i])

func conj*(a: var QuadraticExt) =
  ## Computes the conjugate in-place
  a.c1.neg()

func conj*(r: var QuadraticExt, a: QuadraticExt) =
  ## Computes the conjugate out-of-place
  r.c0 = a.c0
  r.c1.neg(a.c1)

func conjneg*(a: var QuadraticExt) =
  ## Computes the negated conjugate in-place
  a.c0.neg()

func conjneg*(r: var QuadraticExt, a: QuadraticExt) =
  ## Computes the negated conjugate out-of-place
  r.c0.neg(a.c0)
  r.c1 = a.c1

func conj*(a: var CubicExt) =
  ## Computes the conjugate in-place
  a.c0.conj()
  a.c1.conjneg()
  a.c2.conj()

func conj*(r: var CubicExt, a: CubicExt) =
  ## Computes the conjugate out-of-place
  r.c0.conj(a.c0)
  r.c1.conjneg(a.c1)
  r.c2.conj(a.c2)

# Conditional arithmetic
# -------------------------------------------------------------------

func csetZero*(a: var ExtensionField, ctl: SecretBool) =
  ## Set ``a`` to 0 if ``ctl`` is true
  staticFor i, 0, a.coords.len:
    a.coords[i].csetZero(ctl)

func csetOne*(a: var ExtensionField, ctl: SecretBool) =
  ## Set ``a`` to 1 if ``ctl`` is true
  a.coords[0].csetOne(ctl)
  staticFor i, 1, a.coords.len:
    a.coords[i].csetZero(ctl)

func cneg*(a: var ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional negation
  ## Only negate if ctl is true
  staticFor i, 0, a.coords.len:
    a.coords[i].cneg(ctl)

func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional addition
  staticFor i, 0, a.coords.len:
    a.coords[i].cadd(b.coords[i], ctl)

func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional substraction
  staticFor i, 0, a.coords.len:
    a.coords[i].csub(b.coords[i], ctl)

# Multiplication by a small integer known at compile-time
# -------------------------------------------------------------------

func `*=`*(a: var ExtensionField, b: static int) =
  ## Multiplication by a small integer known at compile-time
  for i in 0 ..< a.coords.len:
    a.coords[i] *= b

func prod*(r: var ExtensionField, a: ExtensionField, b: static int) =
  ## Multiplication by a small integer known at compile-time
  r = a
  r *= b

# Multiplication by an Fp element
# -------------------------------------------------------------------

func `*=`*(a: var ExtensionField, b: Fp) =
  ## Multiply an element of Fp2 by an element of Fp
  for i in 0 ..< a.coords.len:
    a.coords[i] *= b

{.pop.} # inline

# ############################################################
#                                                            #
#              Lazy reduced extension fields                 #
#                                                            #
# ############################################################
{.push inline.}

type
  QuadraticExt2x*[F] = object
    ## Quadratic Extension field for lazy reduced fields
    coords*: array[2, F]

  CubicExt2x*[F] = object
    ## Cubic Extension field for lazy reduced fields
    coords*: array[3, F]

  ExtensionField2x*[F] = QuadraticExt2x[F] or CubicExt2x[F]

template doublePrec*(T: type ExtensionField): type =
  # For now naive unrolling, recursive template don't match
  # and I don't want to deal with types in macros
  when T is QuadraticExt:
    when T.F is QuadraticExt: # Fp4Dbl
      QuadraticExt2x[QuadraticExt2x[doublePrec(T.F.F)]]
    elif T.F is CubicExt:
      when T.F.F is QuadraticExt: # Fp12 over Fp6 over Fp2
        QuadraticExt2x[CubicExt2x[QuadraticExt2x[doublePrec(T.F.F.F)]]]
    elif T.F is Fp:           # Fp2Dbl
      QuadraticExt2x[doublePrec(T.F)]
  elif T is CubicExt:
    when T.F is QuadraticExt: #
      when T.F.F is QuadraticExt: # Fp12 over Fp4 over Fp2
        CubicExt2x[QuadraticExt2x[QuadraticExt2x[doublePrec(T.F.F.F)]]]
      elif T.F.F is Fp: # Fp6
        CubicExt2x[QuadraticExt2x[doublePrec(T.F.F)]]

func has1extraBit*(F: type Fp): bool =
  ## We construct extensions only on Fp (and not Fr)
  getSpareBits(F) >= 1

func has2extraBits*(F: type Fp): bool =
  ## We construct extensions only on Fp (and not Fr)
  getSpareBits(F) >= 2

func has1extraBit*(E: type ExtensionField): bool =
  ## We construct extensions only on Fp (and not Fr)
  getSpareBits(Fp[E.F.C]) >= 1

func has2extraBits*(E: type ExtensionField): bool =
  ## We construct extensions only on Fp (and not Fr)
  getSpareBits(Fp[E.F.C]) >= 2

template C(E: type ExtensionField2x): Curve =
  E.F.C

template c0*(a: ExtensionField2x): auto =
  a.coords[0]
template c1*(a: ExtensionField2x): auto =
  a.coords[1]
template c2*(a: CubicExt2x): auto =
  a.coords[2]

template `c0=`*(a: var ExtensionField2x, v: auto) =
  a.coords[0] = v
template `c1=`*(a: var ExtensionField2x, v: auto) =
  a.coords[1] = v
template `c2=`*(a: var CubicExt2x, v: auto) =
  a.coords[2] = v

# Initialization
# -------------------------------------------------------------------

func setZero*(a: var ExtensionField2x) =
  ## Set ``a`` to 0 in the extension field
  staticFor i, 0, a.coords.len:
    a.coords[i].setZero()

# Abelian group
# -------------------------------------------------------------------

func sumUnr*(r: var ExtensionField, a, b: ExtensionField) =
  ## Sum ``a`` and ``b`` into ``r``
  staticFor i, 0, a.coords.len:
    r.coords[i].sumUnr(a.coords[i], b.coords[i])

func diff2xUnr*(r: var ExtensionField2x, a, b: ExtensionField2x) =
  ## Double-precision substraction without reduction
  staticFor i, 0, a.coords.len:
    r.coords[i].diff2xUnr(a.coords[i], b.coords[i])

func diff2xMod*(r: var ExtensionField2x, a, b: ExtensionField2x) =
  ## Double-precision modular substraction
  staticFor i, 0, a.coords.len:
    r.coords[i].diff2xMod(a.coords[i], b.coords[i])

func sum2xUnr*(r: var ExtensionField2x, a, b: ExtensionField2x) =
  ## Double-precision addition without reduction
  staticFor i, 0, a.coords.len:
    r.coords[i].sum2xUnr(a.coords[i], b.coords[i])

func sum2xMod*(r: var ExtensionField2x, a, b: ExtensionField2x) =
  ## Double-precision modular addition
  staticFor i, 0, a.coords.len:
    r.coords[i].sum2xMod(a.coords[i], b.coords[i])

func neg2xMod*(r: var ExtensionField2x, a: ExtensionField2x) =
  ## Double-precision modular negation
  staticFor i, 0, a.coords.len:
    r.coords[i].neg2xMod(a.coords[i], b.coords[i])

# Reductions
# -------------------------------------------------------------------

func redc2x*(r: var ExtensionField, a: ExtensionField2x) =
  ## Reduction
  staticFor i, 0, a.coords.len:
    r.coords[i].redc2x(a.coords[i])

# Multiplication by a small integer known at compile-time
# -------------------------------------------------------------------

func prod2x*(r: var ExtensionField2x, a: ExtensionField2x, b: static int) =
  ## Multiplication by a small integer known at compile-time
  staticFor i, 0, a.coords.len:
    r.coords[i].prod2x(a.coords[i], b)

{.pop.} # inline

# ############################################################
#                                                            #
#                        Non-Residues                        #
#                                                            #
# ############################################################
{.push inline.}

# 𝔽p
# ----------------------------------------------------------------

func `*=`*(a: var Fp, _: type NonResidue) =
  ## Multiply an element of 𝔽p by the quadratic non-residue
  ## chosen to construct 𝔽p2
  static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
  a *= Fp.C.getNonResidueFp()

func prod*(r: var Fp, a: Fp, _: type NonResidue) =
  ## Multiply an element of 𝔽p by the quadratic non-residue
  ## chosen to construct 𝔽p2
  static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
  r.prod(a, Fp.C.getNonResidueFp())

func prod2x(r: var FpDbl, a: FpDbl, _: type NonResidue) =
  ## Multiply an element of 𝔽p by the quadratic non-residue
  ## chosen to construct 𝔽p2
  static: doAssert FpDbl.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
  r.prod2x(a, FpDbl.C.getNonResidueFp())

# 𝔽p2
# ----------------------------------------------------------------

template fromComplexExtension*[F](elem: QuadraticExt[F]): static bool =
  ## Returns true if the input is a complex extension
  ## i.e. the irreducible polynomial chosen is
  ##   x² - µ with µ = -1
  ## and so 𝔽p2 = 𝔽p[x] / x² - µ = 𝔽p[𝑖]
  when F is Fp and F.C.getNonResidueFp() == -1:
    true
  else:
    false

func prod*[C: static Curve](
       r: var Fp2[C],
       a: Fp2[C],
       _: type NonResidue) =
  ## Multiply an element of 𝔽p2 by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p4
  ## - if cubic non-residue: 𝔽p6
  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
  # Yet another const tuple unpacking bug
  const u = C.getNonResidueFp2()[0]
  const v = C.getNonResidueFp2()[1]
  const Beta {.used.} = C.getNonResidueFp()
  # ξ = u + v x
  # and x² = β
  #
  # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
  #                       => u c0 + β v c1 + (v c0 + u c1) x
  when a.fromComplexExtension() and u == 1 and v == 1:
    let t {.noInit.} = a.c0
    r.c0.diff(t, a.c1)
    r.c1.sum(t, a.c1)
  else:
    # Case:
    # - BN254_Snarks, QNR_Fp: -1, SNR_Fp2: 9+1𝑖  (𝑖 = √-1)
    # - BLS12_377, QNR_Fp: -5, SNR_Fp2: 0+1j    (j = √-5)
    # - BW6_761, SNR_Fp: -4, CNR_Fp2: 0+1j      (j = √-4)
    when u == 0:
      # BLS12_377 and BW6_761, use small addition chain
      r.mul_sparse_by_0y(a, v)
    else:
      # BN254_Snarks, u = 9, v = 1, β = -1
      # Even with u = 9, the 2x9 addition chains (8 additions total)
      # are cheaper than full Fp2 multiplication
      var t {.noInit.}: typeof(a.c0)

      t.prod(a.c0, u)
      when v == 1 and Beta == -1: # Case BN254_Snarks
        t -= a.c1                 # r0 = u c0 + β v c1
      else:
        {.error: "Unimplemented".}

      r.c1.prod(a.c1, u)
      when v == 1:                # r1 = v c0 + u c1
        r.c1 += a.c0
        # aliasing: a.c0 is unused
        r.c0 = t
      else:
        {.error: "Unimplemented".}

func `*=`*[C: static Curve](a: var Fp2[C], _: type NonResidue) =
  ## Multiply an element of 𝔽p2 by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p4
  ## - if cubic non-residue: 𝔽p6
  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
  a.prod(a, NonResidue)

func prod2x*[C: static Curve](
       r: var QuadraticExt2x[FpDbl[C]],
       a: QuadraticExt2x[FpDbl[C]],
       _: type NonResidue) =
  ## Multiplication by non-residue
  const complex = C.getNonResidueFp() == -1
  const U = C.getNonResidueFp2()[0]
  const V = C.getNonResidueFp2()[1]
  const Beta {.used.} = C.getNonResidueFp()

  when complex and U == 1 and V == 1:
    let a1 {.noInit.} = a.c1
    r.c1.sum2xMod(a.c0, a1)
    r.c0.diff2xMod(a.c0, a1)
  else:
    # Case:
    # - BN254_Snarks, QNR_Fp: -1, SNR_Fp2: 9+1𝑖  (𝑖 = √-1)
    # - BLS12_377, QNR_Fp: -5, SNR_Fp2: 0+1j    (j = √-5)
    # - BW6_761, SNR_Fp: -4, CNR_Fp2: 0+1j      (j = √-4)
    when U == 0:
      # mul_sparse_by_0v
      # r0 = β a1 v
      # r1 = a0 v
      var t {.noInit.}: FpDbl[C]
      t.prod2x(a.c1, V)
      r.c1.prod2x(a.c0, V)
      r.c0.prod2x(t, NonResidue)
    else:
      # ξ = u + v x
      # and x² = β
      #
      # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
      #                       => u c0 + β v c1 + (v c0 + u c1) x
      var t {.noInit.}: FpDbl[C]

      t.prod2x(a.c0, U)
      when V == 1 and Beta == -1:  # Case BN254_Snarks
        t.diff2xMod(t, a.c1)       # r0 = u c0 + β v c1
      else:
        {.error: "Unimplemented".}


      r.c1.prod2x(a.c1, U)
      when V == 1:                 # r1 = v c0 + u c1
        r.c1.sum2xMod(r.c1, a.c0)
        # aliasing: a.c0 is unused
        `=`(r.c0, t) # "r.c0 = t", is refused by the compiler.
      else:
        {.error: "Unimplemented".}

func `/=`*[C: static Curve](a: var Fp2[C], _: type NonResidue) =
  ## Divide an element of 𝔽p by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p4
  ## - if cubic non-residue: 𝔽p6
  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12

  # Yet another const tuple unpacking bug
  const u = C.getNonresidueFp2()[0] # Sextic non-residue to construct 𝔽p12
  const v = C.getNonresidueFp2()[1]
  const Beta = C.getNonResidueFp()  # Quadratic non-residue to construct 𝔽p2
  # ξ = u + v x
  # and x² = β
  #
  # (c0 + c1 x) / (u + v x) => (c0 + c1 x)(u - v x) / ((u + vx)(u-vx))
  #                         => u c0 - v c1 x² + (u c1 - v c0) x / (u² - x²v²)
  #                         => 1/(u² - βv²) * (uc0 - β v c1, u c1 - v c0)
  # With β = 𝑖 = √-1
  #   1/(u² + v²) * (u c0 + v c1, u c1 - v c0)
  #
  # With β = 𝑖 = √-1 and ξ = 1 + 𝑖
  #   1/2 * (c0 + c1, c1 - c0)

  when a.fromComplexExtension() and u == 1 and v == 1:
    let t {.noInit.} = a.c0
    a.c0 += a.c1
    a.c1 -= t
    a.div2()
  else:
    var a0 {.noInit.} = a.c0
    let a1 {.noInit.} = a.c1
    const u2v2 = u*u - Beta*v*v # (u² - βv²)
    # TODO can be precomputed to avoid costly inversion.
    var u2v2inv {.noInit.}: a.c0.typeof
    u2v2inv.fromUint(u2v2)
    u2v2inv.inv()

    a.c0 *= u
    a.c1 *= Beta * v
    a.c0 -= a.c1

    a.c1 = a1
    a.c1 *= u
    a0 *= v
    a.c1 -= a0
    a.c0 *= u2v2inv
    a.c1 *= u2v2inv

# Quadratic extensions
# ----------------------------------------------------------------

func prod*(r: var QuadraticExt, a: QuadraticExt, _: type NonResidue) =
  ## Multiplication by non-residue.
  ##
  ## For example is a is an element of 𝔽p4
  ## it is multiplied by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p8
  ## - if cubic non-residue: 𝔽p12
  ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
  ##
  ## Assumes that the non-residue is sqrt(lower extension non-residue)
  let t {.noInit.} = a.c0
  r.c0.prod(a.c1, NonResidue)
  r.c1 = t

func `*=`*(a: var QuadraticExt, _: type NonResidue) =
  ## Multiplication by non-residue
  ##
  ## For example is `a` is an element of 𝔽p4
  ## it is multiplied by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p8
  ## - if cubic non-residue: 𝔽p12
  ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
  ##
  ## Assumes that the non-residue is sqrt(lower extension non-residue)
  a.prod(a, NonResidue)

func prod2x*(
       r: var QuadraticExt2x,
       a: QuadraticExt2x,
       _: type NonResidue) =
  ## Multiplication by non-residue
  static: doAssert not(r.c0 is FpDbl), "Wrong dispatch, there is a specific non-residue multiplication for the base extension."
  let t {.noInit.} = a.c0
  r.c0.prod2x(a.c1, NonResidue)
  `=`(r.c1, t) # "r.c1 = t", is refused by the compiler.

# Cubic extensions
# ----------------------------------------------------------------

func prod*(r: var CubicExt, a: CubicExt, _: type NonResidue) =
  ## Multiplication by non-residue.
  ##
  ## For example is `a` is an element of 𝔽p6
  ## it is multiplied by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p12
  ## - if cubic non-residue: 𝔽p18
  ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
  ##
  ## Assumes that the non-residue is cube_root(lower extension non-residue)
  ##
  ## For all curves γ = v with v the factor for the cubic extension coordinate
  ## and v³ = ξ
  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
  let t {.noInit.} = a.c2
  r.c1 = a.c0
  r.c2 = a.c1
  r.c0.prod(t, NonResidue)

func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} =
  ## Multiply an element of 𝔽p6 by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p12
  ## - if cubic non-residue: 𝔽p18
  ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
  ##
  ## Assumes that it is cube_root(NonResidue_Fp2)
  ##
  ## For all curves γ = v with v the factor for the cubic extension coordinate
  ## and v³ = ξ
  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
  a.prod(a, NonResidue)

func prod2x*(
       r: var CubicExt2x,
       a: CubicExt2x,
       _: type NonResidue) {.inline.} =
  ## Multiplication by non-residue
  ## For all curves γ = v with v the factor for cubic extension coordinate
  ## and v³ = ξ
  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
  let t {.noInit.} = a.c2
  r.c1 = a.c0
  r.c2 = a.c1
  r.c0.prod2x(t, NonResidue)

{.pop.} # inline

# ############################################################
#                                                            #
#                     Cost functions                         #
#                                                            #
# ############################################################

func prefer_3sqr_over_2mul(F: type ExtensionField): bool {.compileTime.} =
  ## Returns true
  ## if time(3sqr) < time(2mul) in the extension fields

  let a = default(F)
  # No shortcut in the VM
  when a.c0 is ExtensionField:
    when a.c0.c0 is ExtensionField:
      return true
    else: return false
  else: return false

func has_large_NR_norm(C: static Curve): bool =
  ## Returns true if the non-residue of the extension fields
  ## has a large norm

  const j = C.getNonResidueFp()
  const u = C.getNonResidueFp2()[0]
  const v = C.getNonResidueFp2()[1]

  const norm2 = u*u + (j*v)*(j*v)

  # Compute integer square root
  var norm = 0
  while (norm+1) * (norm+1) <= norm2:
    norm += 1

  return norm > 5

func has_large_field_elem*(C: static Curve): bool =
  ## Returns true if field element are large
  ## and necessitate custom routine for assembly in particular
  let a = default(Fp[C])
  return a.mres.limbs.len > 6

# ############################################################
#                                                            #
#                  Quadratic extensions                      #
#                                                            #
# ############################################################

# Forward declarations
# ----------------------------------------------------------------------

func prod2x*(r: var QuadraticExt2x, a, b: QuadraticExt)
func square2x*(r: var QuadraticExt2x, a: QuadraticExt)

# Complex squarings
# ----------------------------------------------------------------------

func square_complex(r: var Fp2, a: Fp2) =
  ## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (c0, c1)² => (c0 + c1𝑖)²
  #           => c0² + 2 c0 c1𝑖 + (c1𝑖)²
  #           => c0²-c1² + 2 c0 c1𝑖
  #           => (c0²-c1², 2 c0 c1)
  #           or
  #           => ((c0-c1)(c0+c1), 2 c0 c1)
  #           => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1)
  #
  # Costs (naive implementation)
  # - 1 Multiplication 𝔽p
  # - 2 Squarings 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 4 * ModulusBitSize (4x 𝔽p element)
  #
  # Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space)
  #
  # - 2 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  #
  # To handle aliasing between r and a, we need
  # r to be used only when a is unneeded
  # so we can't use r fields as a temporary
  static: doAssert r.fromComplexExtension()

  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  v0.diff(a.c0, a.c1)    # v0 = c0 - c1               [1 Sub]
  v1.sum(a.c0, a.c1)     # v1 = c0 + c1               [1 Dbl, 1 Sub]
  r.c1.prod(a.c0, a.c1)  # r.c1 = c0 c1               [1 Mul, 1 Dbl, 1 Sub]
  # aliasing: a unneeded now
  r.c1.double()          # r.c1 = 2 c0 c1             [1 Mul, 2 Dbl, 1 Sub]
  r.c0.prod(v0, v1)      # r.c0 = (c0 + c1)(c0 - c1)  [2 Mul, 2 Dbl, 1 Sub]

func square2x_complex(r: var QuadraticExt2x, a: Fp2) =
  ## Double-precision unreduced complex squaring
  static: doAssert a.fromComplexExtension()

  var t0 {.noInit.}, t1 {.noInit.}: typeof(a.c0)

  when Fp2.has1extraBit():
    t0.sumUnr(a.c1, a.c1)
    t1.sumUnr(a.c0, a.c1)
  else:
    t0.double(a.c1)
    t1.sum(a.c0, a.c1)

  r.c1.prod2x(t0, a.c0)     # r1 = 2a0a1
  t0.diff(a.c0, a.c1)
  r.c0.prod2x(t0, t1)       # r0 = (a0 + a1)(a0 - a1)

# Complex multiplications
# ----------------------------------------------------------------------

func prod_complex(r: var Fp2, a, b: Fp2) =
  ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖)
  #                   => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖
  #
  # In Fp, multiplication has cost O(n²) with n the number of limbs
  # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
  # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
  #
  # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
  # where addition/substraction are significantly cheaper than multiplication
  #
  # So we always reframe the imaginary part using Karatsuba approach to save a multiplication
  # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
  #
  # Costs (naive implementation)
  # - 4 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries)
  #
  # Costs (Karatsuba)
  # - 3 Multiplications 𝔽p
  # - 3 Substraction 𝔽p (2 are fused)
  # - 2 Addition 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
  static: doAssert r.fromComplexExtension()

  var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
  a0b0.prod(a.c0, b.c0)                                         # [1 Mul]
  a1b1.prod(a.c1, b.c1)                                         # [2 Mul]

  r.c0.sum(a.c0, a.c1)  # r0 = (a0 + a1)                        # [2 Mul, 1 Add]
  r.c1.sum(b.c0, b.c1)  # r1 = (b0 + b1)                        # [2 Mul, 2 Add]
  # aliasing: a and b unneeded now
  r.c1 *= r.c0          # r1 = (b0 + b1)(a0 + a1)               # [3 Mul, 2 Add] - 𝔽p temporary

  r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1                    # [3 Mul, 2 Add, 1 Sub]
  r.c1 -= a0b0          # r1 = (b0 + b1)(a0 + a1) - a0b0        # [3 Mul, 2 Add, 2 Sub]
  r.c1 -= a1b1          # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]

func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) =
  ## Double-precision unreduced complex multiplication
  # r and a or b cannot alias
  static: doAssert a.fromComplexExtension()

  var D {.noInit.}: typeof(r.c0)
  var t0 {.noInit.}, t1 {.noInit.}: typeof(a.c0)

  r.c0.prod2x(a.c0, b.c0)        # r0 = a0 b0
  D.prod2x(a.c1, b.c1)           # d =  a1 b1
  when Fp2.has1extraBit():
    t0.sumUnr(a.c0, a.c1)
    t1.sumUnr(b.c0, b.c1)
  else:
    t0.sum(a.c0, a.c1)
    t1.sum(b.c0, b.c1)
  r.c1.prod2x(t0, t1)            # r1 = (b0 + b1)(a0 + a1)
  when Fp2.has1extraBit():
    r.c1.diff2xUnr(r.c1, r.c0)   # r1 = (b0 + b1)(a0 + a1) - a0 b0
    r.c1.diff2xUnr(r.c1, D)      # r1 = (b0 + b1)(a0 + a1) - a0 b0 - a1b1
  else:
    r.c1.diff2xMod(r.c1, r.c0)
    r.c1.diff2xMod(r.c1, D)
  r.c0.diff2xMod(r.c0, D)        # r0 = a0 b0 - a1 b1

# Generic squarings
# ----------------------------------------------------------------------

func square_generic(r: var QuadraticExt, a: QuadraticExt) =
  ## Return a² in ``r``
  ## ``r`` is initialized/overwritten
  # Algorithm (with β the non-residue in the base field)
  #
  # (c0, c1)² => (c0 + c1 w)²
  #           => c0² + 2 c0 c1 w + c1²w²
  #           => c0² + β c1² + 2 c0 c1 w
  #           => (c0² + β c1², 2 c0 c1)
  # We have 2 squarings and 1 multiplication in the base field
  # which are significantly more costly than additions.
  # For example when construction 𝔽p12 from 𝔽p6:
  # - 4 limbs like BN254:     multiplication is 20x slower than addition/substraction
  # - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction
  #
  # We can save operations with one of the following expressions
  # of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate
  #
  # Alternative 1:
  #   c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1
  #
  # Alternative 2:
  #   c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
  #
  # This gives us 2 Mul and 2 mul-nonresidue (which is costly for BN254_Snarks)
  #
  # We can also reframe the 2nd term with only squarings
  # which might be significantly faster on higher tower degrees
  #
  #   2 c0 c1 <=> (a0 + a1)² - a0² - a1²
  #
  # This gives us 3 Sqr and 1 Mul-non-residue

  when QuadraticExt.prefer_3sqr_over_2mul() or
       # Other path multiplies twice by non-residue
       QuadraticExt.C.has_large_NR_norm():
    var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
    v0.square(a.c0)
    v1.square(a.c1)

    # Aliasing: a unneeded now
    r.c1.sum(a.c0, a.c1)

    # r0 = c0² + β c1²
    r.c0.prod(v1, NonResidue)
    r.c0 += v0

    # r1 = (a0 + a1)² - a0² - a1²
    r.c1.square()
    r.c1 -= v0
    r.c1 -= v1

  else:
    var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)

    # v1 <- (c0 + β c1)
    v1.prod(a.c1, NonResidue)
    v1 += a.c0

    # v0 <- (c0 + c1)(c0 + β c1)
    v0.sum(a.c0, a.c1)
    v0 *= v1

    # v1 <- c0 c1
    v1.prod(a.c0, a.c1)

    # aliasing: a unneeded now

    # r0 = (c0 + c1)(c0 + β c1) - c0c1
    v0 -= v1

    # r1 = 2 c0c1
    r.c1.double(v1)

    # r0 = (c0 + c1)(c0 + β c1) - c0c1 - β c0c1
    v1 *= NonResidue
    r.c0.diff(v0, v1)

func square2x_disjoint*[Fdbl, F](
       r: var QuadraticExt2x[FDbl],
       a0, a1: F) =
  ## Return (a0, a1)² in r
  var V0 {.noInit.}, V1 {.noInit.}: typeof(r.c0) # Double-precision
  var t {.noInit.}: F # Single-width

  # square2x is faster than prod2x even on Fp
  # hence we always use 3 squarings over 2 products
  V0.square2x(a0)
  V1.square2x(a1)
  t.sum(a0, a1)

  # r0 = a0² + β a1² (option 1) <=> (a0 + a1)(a0 + β a1) - β a0a1 - a0a1 (option 2)
  r.c0.prod2x(V1, NonResidue)
  r.c0.sum2xMod(r.c0, V0)

  # r1 = 2 a0 a1 (option 1) = (a0 + a1)² - a0² - a1² (option 2)
  r.c1.square2x(t)
  r.c1.diff2xMod(r.c1, V0)
  r.c1.diff2xMod(r.c1, V1)

# Generic multiplications
# ----------------------------------------------------------------------

func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Returns r = a * b
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: typeof(r.c0)

  # v2 <- (a0 + a1)(b0 + b1)
  v0.sum(a.c0, a.c1)
  v1.sum(b.c0, b.c1)
  v2.prod(v0, v1)

  # v0 <- a0 b0
  # v1 <- a1 b1
  v0.prod(a.c0, b.c0)
  v1.prod(a.c1, b.c1)

  # aliasing: a and b unneeded now

  # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
  r.c1.diff(v2, v1)
  r.c1 -= v0

  # r0 <- a0 b0 + β a1 b1
  v1 *= NonResidue
  r.c0.sum(v0, v1)

func prod2x_disjoint*[Fdbl, F](
       r: var QuadraticExt2x[FDbl],
       a0, a1: F,
       b0, b1: F) =
  ## Return a * (b0, b1) in r
  static: doAssert Fdbl is doublePrec(F)

  var V0 {.noInit.}, V1 {.noInit.}: typeof(r.c0) # Double-precision
  var t0 {.noInit.}, t1 {.noInit.}: typeof(a0)   # Single-width

  # Require 2 extra bits
  V0.prod2x(a0, b0)             # v0 = a0b0
  V1.prod2x(a1, b1)             # v1 = a1b1
  when F.has1extraBit():
    t0.sumUnr(a0, a1)
    t1.sumUnr(b0, b1)
  else:
    t0.sum(a0, a1)
    t1.sum(b0, b1)

  r.c1.prod2x(t0, t1)           # r1 = (a0 + a1)(b0 + b1)
  r.c1.diff2xMod(r.c1, V0)      # r1 = (a0 + a1)(b0 + b1) - a0b0
  r.c1.diff2xMod(r.c1, V1)      # r1 = (a0 + a1)(b0 + b1) - a0b0 - a1b1

  r.c0.prod2x(V1, NonResidue)   # r0 = β a1 b1
  r.c0.sum2xMod(r.c0, V0)       # r0 = a0 b0 + β a1 b1

# Sparse complex multiplication
# -------------------------------------------------------------------

func mul_sparse_complex_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖]
  #
  # r0 = a0 b0 - a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = - a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  static: doAssert r.fromComplexExtension()

  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  var t{.noInit.}: typeof(a.c0)
  t.prod(a.c1, b)
  r.c1.prod(a.c0, b)
  r.c0.neg(t)

# Sparse generic multiplication
# -------------------------------------------------------------------

func mul_sparse_generic_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  var t{.noInit.}: typeof(a.c0)

  t.prod(a.c1, b)
  r.c1.prod(a.c0, b)
  # aliasing: a unneeded now
  r.c0.prod(t, NonResidue)

func mul_sparse_generic_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: static int) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  template b(): untyped = sparseB

  var t{.noInit.}: typeof(a.c0)

  t.prod(a.c1, b)
  r.c1.prod(a.c0, b)
  # aliasing: a unneeded now
  r.c0.prod(t, NonResidue)

func mul_sparse_generic_by_x0(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (x, 0)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b1 = 0, hence
  #
  # r0 = a0 b0
  # r1 = (a0 + a1) b0 - a0 b0 = a1 b0
  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c0
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  r.c0.prod(a.c0, b)
  r.c1.prod(a.c1, b)

func mul2x_sparse_by_x0*[Fdbl, F](
       r: var QuadraticExt2x[Fdbl], a: QuadraticExt[F],
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (x, 0)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b1 = 0, hence
  #
  # r0 = a0 b0
  # r1 = (a0 + a1) b0 - a0 b0 = a1 b0
  static: doAssert Fdbl is doublePrec(F)

  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c0
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  r.c0.prod2x(a.c0, b)
  r.c1.prod2x(a.c1, b)

func mul2x_sparse_by_0y*[Fdbl, F](
       r: var QuadraticExt2x[Fdbl], a: QuadraticExt[F],
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  static: doAssert Fdbl is doublePrec(F)

  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  r.c0.prod2x(a.c1, b)
  r.c0.prod2x(r.c0, NonResidue)
  r.c1.prod2x(a.c0, b)

# Inversion
# -------------------------------------------------------------------

func invImpl(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  #
  # Algorithm:
  #
  # 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w)
  #                 <=> (a0 - a1 w) / (a0² - a1² w²)
  # with w being our coordinate system and β the quadratic non-residue
  # we have w² = β
  # So the inverse is (a0 - a1 w) / (a0² - β a1²)

  # [2 Sqr, 1 Add]
  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  v0.square(a.c0)
  v1.square(a.c1)
  when r.fromComplexExtension():
    v0 += v1
  else:
    v1 *= NonResidue
    v0 -= v1              # v0 = a0² - β a1² (the norm / squared magnitude of a)

  # [1 Inv, 2 Sqr, 1 Add]
  v1.inv(v0)              # v1 = 1 / (a0² - β a1²)

  # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
  r.c0.prod(a.c0, v1)     # r0 = a0 / (a0² - β a1²)
  v0.neg(v1)              # v0 = -1 / (a0² - β a1²)
  r.c1.prod(a.c1, v0)     # r1 = -a1 / (a0² - β a1²)

func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ##
  ## Inversion routine is using lazy reduction

  # [2 Sqr, 1 Add]
  var V0 {.noInit.}, V1 {.noInit.}: doublePrec(typeof(r.c0))
  var t {.noInit.}: typeof(r.c0)
  V0.square2x(a.c0)
  V1.square2x(a.c1)
  when r.fromComplexExtension():
    V0.sum2xUnr(V0, V1)
  else:
    V1.prod2x(V1, NonResidue)
    V0.diff2xMod(V0, V1)  # v0 = a0² - β a1² (the norm / squared magnitude of a)

  # [1 Inv, 2 Sqr, 1 Add]
  t.redc2x(V0)
  t.inv()                  # v1 = 1 / (a0² - β a1²)

  # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
  r.c0.prod(a.c0, t)      # r0 = a0 / (a0² - β a1²)
  t.neg()                 # v0 = -1 / (a0² - β a1²)
  r.c1.prod(a.c1, t)      # r1 = -a1 / (a0² - β a1²)

# Dispatch
# ----------------------------------------------------------------------

{.push inline.}

func square2x*(r: var QuadraticExt2x, a: QuadraticExt) =
  when a.fromComplexExtension():
    when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
      if ({.noSideEffect.}: hasAdx()):
        r.coords.sqrx2x_complex_asm_adx(a.coords)
      else:
        r.square2x_complex(a)
    else:
      r.square2x_complex(a)
  else:
    r.square2x_disjoint(a.c0, a.c1)

func square*(r: var QuadraticExt, a: QuadraticExt) =
  when r.fromComplexExtension():
    when true:
      when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
        if ({.noSideEffect.}: hasAdx()):
          r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
        else:
          r.square_complex(a)
      else:
        r.square_complex(a)
    else: # slower
      var d {.noInit.}: doublePrec(typeof(r))
      d.square2x(a)
      r.c0.redc2x(d.c0)
      r.c1.redc2x(d.c1)
  else:
    when true: # r.typeof.F.C in {BLS12_377, BW6_761}:
      # BW6-761 requires too many registers for Dbl width path
      r.square_generic(a)
    else:
      # TODO understand why Fp4[BLS12_377]
      # is so slow in the branch
      # TODO:
      # - On Fp4, we can have a.c0.c0 off by p
      #   a reduction is missing
      var d {.noInit.}: doublePrec(typeof(r))
      d.square2x_disjoint(a.c0, a.c1)
      r.c0.redc2x(d.c0)
      r.c1.redc2x(d.c1)

func square*(a: var QuadraticExt) =
  ## In-place squaring
  a.square(a)

func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Multiplication r <- a*b

  when r.fromComplexExtension():
    when false:
      r.prod_complex(a, b)
    else: # faster
      when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
        if ({.noSideEffect.}: hasAdx()):
          r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
        else:
          var d {.noInit.}: doublePrec(typeof(r))
          d.prod2x_complex(a, b)
          r.c0.redc2x(d.c0)
          r.c1.redc2x(d.c1)
      else:
        var d {.noInit.}: doublePrec(typeof(r))
        d.prod2x_complex(a, b)
        r.c0.redc2x(d.c0)
        r.c1.redc2x(d.c1)
  else:
    when r.typeof.F.C.has_large_field_elem():
      # BW6-761 requires too many registers for Dbl width path
      r.prod_generic(a, b)
    else:
      var d {.noInit.}: doublePrec(typeof(r))
      d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)
      r.c0.redc2x(d.c0)
      r.c1.redc2x(d.c1)

func `*=`*(a: var QuadraticExt, b: QuadraticExt) =
  ## In-place multiplication
  a.prod(a, b)

func prod2x_disjoint*[Fdbl, F](
       r: var QuadraticExt2x[FDbl],
       a: QuadraticExt[F],
       b0, b1: F) =
  ## Return (a0, a1) * (b0, b1) in r
  r.prod2x_disjoint(a.c0, a.c1, b0, b1)

func prod2x*(r: var QuadraticExt2x, a, b: QuadraticExt) =
  ## Double-precision multiplication r <- a*b
  when a.fromComplexExtension():
    when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
      if ({.noSideEffect.}: hasAdx()):
        r.coords.mul2x_fp2_complex_asm_adx(a.coords, b.coords)
      else:
        r.prod2x_complex(a, b)
    else:
      r.prod2x_complex(a, b)
  else:
    r.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)

func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) =
  ## Sparse multiplication
  when a.fromComplexExtension():
    r.mul_sparse_complex_by_0y(a, sparseB)
  else:
    r.mul_sparse_generic_by_0y(a, sparseB)

func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: static int) =
  ## Sparse multiplication
  when a.fromComplexExtension():
    {.error: "Not implemented.".}
  else:
    r.mul_sparse_generic_by_0y(a, sparseB)

func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: auto) =
  ## Sparse in-place multiplication
  a.mul_sparse_by_0y(a, sparseB)

func mul_sparse_by_x0*(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) =
  ## Sparse in-place multiplication
  r.mul_sparse_generic_by_x0(a, sparseB)

func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) =
  ## Sparse in-place multiplication
  a.mul_sparse_by_x0(a, sparseB)

template mulCheckSparse*(a: var QuadraticExt, b: QuadraticExt) =
  when b.isOne().bool:
    discard
  elif b.isMinusOne().bool:
    a.neg()
  elif b.c0.isZero().bool and b.c1.isOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t.neg(a.c1)
      a.c1 = a.c0
      a.c0 = t
    else:
      t.prod(a.c1, NonResidue)
      a.c1 = a.c0
      a.c0 = t
  elif b.c0.isZero().bool and b.c1.isMinusOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t = a.c1
      a.c1.neg(a.c0)
      a.c0 = t
    else:
      t.prod(a.c1, NonResidue)
      a.c1.neg(a.c0)
      a.c0.neg(t)
  elif b.c0.isZero().bool:
    a.mul_sparse_by_0y(b)
  elif b.c1.isZero().bool:
    a.mul_sparse_by_x0(b)
  else:
    a *= b

func inv*(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  when true:
    r.invImpl(a)
  else: # Lazy reduction, doesn't seem to gain speed.
    r.inv2xImpl(a)

func inv*(a: var QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  a.inv(a)

{.pop.} # inline

# ############################################################
#                                                            #
#                    Cubic extensions                        #
#                                                            #
# ############################################################

# Squarings
# -------------------------------------------------------------------
# Cubic extensions can use specific squaring procedures
# beyond Schoolbook and Karatsuba:
# - Chung-Hasan (3 different algorithms)
# - Toom-Cook-3x (only in Miller Loops)
#
# Chung-Hasan papers
# http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf
# https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf
#
# The papers focus on polynomial squaring, they have been adapted
# to towered extension fields with the relevant costs in
#
# - Multiplication and Squaring on Pairing-Friendly Fields
#   Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006
#   https://eprint.iacr.org/2006/471
#
# Costs in the underlying field
# M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue
#
# | Method      | > Linear | Linear            |
# |-------------|----------|-------------------|
# | Schoolbook  | 3M + 3S  | 6A + 2B           |
# | Karatsuba   | 6S       | 13A + 2B          |
# | Tom-Cook-3x | 5S       | 33A + 2B          | (Miller loop only, factor 6)
# | CH-SQR1     | 3M + 2S  | 11A + 2B          |
# | CH-SQR2     | 2M + 3S  | 10A + 2B          |
# | CH-SQR3     | 1M + 4S  | 11A + 2B + 1 Div2 |
# | CH-SQR3x    | 1M + 4S  | 14A + 2B          | (Miller loop only, factor 2)

func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}=
  ## Returns r = a²
  var s0{.noInit.}, m01{.noInit.}, m12{.noInit.}: typeof(r.c0)

  # precomputations that use a
  m01.prod(a.c0, a.c1)
  m01.double()
  m12.prod(a.c1, a.c2)
  m12.double()
  s0.square(a.c2)
  # aliasing: a₂ unused

  # r₂ = (a₀ - a₁ + a₂)²
  r.c2.sum(a.c2, a.c0)
  r.c2 -= a.c1
  r.c2.square()
  # aliasing, a almost unneeded now

  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₂²
  r.c2 += m01
  r.c2 += m12
  r.c2 -= s0

  # r₁ = 2a₀a₁ + β a₂²
  r.c1.prod(s0, NonResidue)
  r.c1 += m01

  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₀² - a₂²
  s0.square(a.c0)
  r.c2 -= s0

  # r₀ = a₀² + β 2a₁a₂
  r.c0.prod(m12, NonResidue)
  r.c0 += s0

func square2x_Chung_Hasan_SQR2(r: var CubicExt2x, a: CubicExt) =
  ## Returns r = a²
  var m01{.noInit.}, m12{.noInit.}: typeof(r.c0) # double-width
  var t{.noInit.}: typeof(a.c0)                  # single width

  m01.prod2x(a.c0, a.c1)
  m01.sum2xMod(m01, m01)  # 2a₀a₁
  m12.prod2x(a.c1, a.c2)
  m12.sum2xMod(m12, m12)  # 2a₁a₂
  r.c0.square2x(a.c2)     # borrow r₀ = a₂² for a moment

  # r₂ = (a₀ - a₁ + a₂)²
  t.sum(a.c2, a.c0)
  t -= a.c1
  r.c2.square2x(t)

  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₂²
  r.c2.sum2xMod(r.c2, m01)
  r.c2.sum2xMod(r.c2, m12)
  r.c2.diff2xMod(r.c2, r.c0)

  # r₁ = 2a₀a₁ + β a₂²
  r.c1.prod2x(r.c0, NonResidue)
  r.c1.sum2xMod(r.c1, m01)

  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₀² - a₂²
  r.c0.square2x(a.c0)
  r.c2.diff2xMod(r.c2, r.c0)

  # r₀ = a₀² + β 2a₁a₂
  m12.prod2x(m12, NonResidue)
  r.c0.sum2xMod(r.c0, m12)

func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
  ## Returns r = a²
  var s0{.noInit.}, t{.noInit.}, m12{.noInit.}: typeof(r.c0)

  # s₀ = (a₀ + a₁ + a₂)²
  # t = ((a₀ + a₁ + a₂)² + (a₀ - a₁ + a₂)²) / 2
  s0.sum(a.c0, a.c2)
  t.diff(s0, a.c1)
  s0 += a.c1
  s0.square()
  t.square()
  t += s0
  t.div2()

  # m12 = 2a₁a₂ and r₁ = a₂²
  # then a₁ and a₂ are unused for aliasing
  m12.prod(a.c1, a.c2)
  m12.double()
  r.c1.square(a.c2)       # r₁ = a₂²

  r.c2.diff(t, r.c1)      # r₂ = t - a₂²
  r.c1 *= NonResidue      # r₁ = β a₂²
  r.c1 += s0              # r₁ = (a₀ + a₁ + a₂)² + β a₂²
  r.c1 -= m12             # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ + β a₂²
  r.c1 -= t               # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ - t + β a₂²

  s0.square(a.c0)
  # aliasing: a₀ unused

  r.c2 -= s0
  r.c0.prod(m12, NonResidue)
  r.c0 += s0

# Multiplications
# -------------------------------------------------------------------

func prodImpl(r: var CubicExt, a, b: CubicExt) =
  ## Returns r = a * b
  ## Algorithm is Karatsuba
  var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(r.c0)
  var t0{.noInit.}, t1{.noInit.}, t2{.noInit.}: typeof(r.c0)

  v0.prod(a.c0, b.c0)
  v1.prod(a.c1, b.c1)
  v2.prod(a.c2, b.c2)

  # r₀ = β ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
  t0.sum(a.c1, a.c2)
  t1.sum(b.c1, b.c2)
  t0 *= t1
  t0 -= v1
  t0 -= v2
  t0 *= NonResidue
  # r₀ = t₀ + v₀ at the end to handle aliasing

  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
  t1.sum(a.c0, a.c1)
  t2.sum(b.c0, b.c1)
  r.c1.prod(t1, t2)
  r.c1 -= v0
  r.c1 -= v1
  t1.prod(v2, NonResidue)
  r.c1 += t1

  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
  t1.sum(a.c0, a.c2)
  t2.sum(b.c0, b.c2)
  r.c2.prod(t1, t2)
  r.c2 -= v0
  r.c2 -= v2
  r.c2 += v1

  # Finish r₀
  r.c0.sum(t0, v0)

func prod2xImpl(r: var CubicExt2x, a, b: CubicExt) =
  var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: typeof(r.c0)
  var t0 {.noInit.}, t1 {.noInit.}: typeof(a.c0)

  V0.prod2x(a.c0, b.c0)
  V1.prod2x(a.c1, b.c1)
  V2.prod2x(a.c2, b.c2)

  # r₀ = β ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
  when a.c0 is Fp and CubicExt.has1extraBit():
    t0.sumUnr(a.c1, a.c2)
    t1.sumUnr(b.c1, b.c2)
  else:
    t0.sum(a.c1, a.c2)
    t1.sum(b.c1, b.c2)
  r.c0.prod2x(t0, t1) # r cannot alias a or b since it's double precision
  r.c0.diff2xMod(r.c0, V1)
  r.c0.diff2xMod(r.c0, V2)
  r.c0.prod2x(r.c0, NonResidue)
  r.c0.sum2xMod(r.c0, V0)

  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
  when a.c0 is Fp and CubicExt.has1extraBit():
    t0.sumUnr(a.c0, a.c1)
    t1.sumUnr(b.c0, b.c1)
  else:
    t0.sum(a.c0, a.c1)
    t1.sum(b.c0, b.c1)
  r.c1.prod2x(t0, t1)
  r.c1.diff2xMod(r.c1, V0)
  r.c1.diff2xMod(r.c1, V1)
  r.c2.prod2x(V2, NonResidue) # r₂ is unused and cannot alias
  r.c1.sum2xMod(r.c1, r.c2)

  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
  when a.c0 is Fp and CubicExt.has1extraBit():
    t0.sumUnr(a.c0, a.c2)
    t1.sumUnr(b.c0, b.c2)
  else:
    t0.sum(a.c0, a.c2)
    t1.sum(b.c0, b.c2)
  r.c2.prod2x(t0, t1)
  r.c2.diff2xMod(r.c2, V0)
  r.c2.diff2xMod(r.c2, V2)
  r.c2.sum2xMod(r.c2, V1)

# Sparse multiplication
# ----------------------------------------------------------------------

func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
  ## Sparse multiplication of a cubic extenion element
  ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)

  # v0 = a0 b0 = 0
  # v1 = a1 b1
  # v2 = a2 b2 = 0
  #
  # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
  #    = ξ (a1 b1 + a2 b1 - v1)
  #    = ξ a2 b1
  # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
  #    = a0 b1 + a1 b1 - v1
  #    = a0 b1
  # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
  #    = v1
  #    = a1 b1

  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}

  r.c0.prod(a.c2, b)
  r.c0 *= NonResidue
  r.c1.prod(a.c0, b)
  r.c2.prod(a.c1, b)

# Inversion
# ----------------------------------------------------------------------

func invImpl(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  #
  # Algorithm 5.23
  #
  # Arithmetic of Finite Fields
  # Chapter 5 of Guide to Pairing-Based Cryptography
  # Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\
  # https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields
  #
  # We optimize for stack usage and use 4 temporaries
  # instead of 9, because 4 * 2 (𝔽p2) * Bitsize would be:
  # - ~2032 bits for BN254
  # - ~3048 bits for BLS12-381
  var A {.noInit.}, B {.noInit.}, C {.noInit.}: typeof(r.c0)
  var t {.noInit.}: typeof(r.c0)

  # A <- a₀² - β a₁ a₂
  A.square(a.c0)
  t.prod(a.c1, a.c2)
  t *= NonResidue
  A -= t

  # B <- β a₂² - a₀ a₁
  B.square(a.c2)
  B *= NonResidue
  t.prod(a.c0, a.c1)
  B -= t

  # C <- a₁² - a₀ a₂
  C.square(a.c1)
  t.prod(a.c0, a.c2)
  C -= t

  # F in t
  # F <- β a₁ C + a₀ A + β a₂ B
  t.prod(a.c1, C)
  r.c2.prod(a.c2, B) # aliasing: last use of a₂, destroy r₂
  t += r.c2
  t *= NonResidue
  r.c0.prod(a.c0, A) # aliasing: last use of a₀, destroy r₀
  t += r.c0

  t.inv()

  # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
  r.c0.prod(A, t)
  r.c1.prod(B, t)
  r.c2.prod(C, t)

func inv2xImpl(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ## via lazy reduction
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  var aa{.noInit.}, bb{.noInit.}, cc{.noInit.}: doublePrec(typeof(r.c0))
  var ab{.noInit.}, bc{.noInit.}, ac{.noInit.}: doublePrec(typeof(r.c0))
  var A {.noInit.}, B {.noInit.}, C {.noInit.}: typeof(r.c0)
  var t {.noInit.}, t2{.noInit.}: doublePrec(typeof(r.c0))
  var f {.noInit.}: typeof(r.c0)

  aa.square2x(a.c0)
  bb.square2x(a.c1)
  cc.square2x(a.c2)
  ab.prod2x(a.c0, a.c1)
  bc.prod2x(a.c1, a.c2)
  ac.prod2x(a.c0, a.c2)

  # A <- a₀² - β a₁ a₂
  t.prod2x(bc, NonResidue)
  t.diff2xMod(aa, t)
  A.redc2x(t)

  # B <- β a₂² - a₀ a₁
  t.prod2x(cc, NonResidue)
  t.diff2xMod(t, ab)
  B.redc2x(t)

  # C <- a₁² - a₀ a₂
  t.diff2xMod(bb, ac)
  C.redc2x(t)

  # F in t
  # F <- β a₁ C + a₀ A + β a₂ B
  t.prod2x(B, a.c2)
  t2.prod2x(C, a.c1)
  t.sum2xMod(t, t2)
  t.prod2x(t, NonResidue)
  t2.prod2x(A, a.c0)
  t.sum2xUnr(t, t2)
  f.redc2x(t)

  f.inv()

  # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
  r.c0.prod(A, f)
  r.c1.prod(B, f)
  r.c2.prod(C, f)

# Dispatch
# ----------------------------------------------------------------------

{.push inline.}

func square*(r: var CubicExt, a: CubicExt) =
  ## Returns r = a²
  when CubicExt.C.has_large_NR_norm() or CubicExt.C.has_large_field_elem():
    square_Chung_Hasan_SQR3(r, a)
  else:
    var d {.noInit.}: doublePrec(typeof(a))
    d.square2x_Chung_Hasan_SQR2(a)
    r.c0.redc2x(d.c0)
    r.c1.redc2x(d.c1)
    r.c2.redc2x(d.c2)

func square*(a: var CubicExt) =
  ## In-place squaring
  a.square(a)

func square2x*(r: var CubicExt2x, a: CubicExt) =
  ## Returns r = a²
  square2x_Chung_Hasan_SQR2(r, a)

func prod*(r: var CubicExt, a, b: CubicExt) =
  ## Out-of-place multiplication
  when CubicExt.C.has_large_field_elem():
    r.prodImpl(a, b)
  else:
    var d {.noInit.}: doublePrec(typeof(r))
    d.prod2x(a, b)
    r.c0.redc2x(d.c0)
    r.c1.redc2x(d.c1)
    r.c2.redc2x(d.c2)

func prod2x*(r: var CubicExt2x, a, b: CubicExt) =
  ## Returns r = ab
  prod2xImpl(r, a, b)

func `*=`*(a: var CubicExt, b: CubicExt) =
  ## In-place multiplication
  a.prod(a, b)

func inv*(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  when true:
    r.invImpl(a)
  else:
    r.inv2xImpl(a)

func inv*(a: var CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  a.invImpl(a)

{.pop.} # inline
{.pop.} # raises no exceptions