Merge pull request #765 from mir-protocol/Fp12

Fp12 multiplication
This commit is contained in:
Dima V 2022-10-28 02:30:44 -07:00 committed by GitHub
commit 84151f083c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 969 additions and 3 deletions

View File

@ -32,6 +32,9 @@ pub(crate) fn combined_kernel() -> Kernel {
include_str!("asm/curve/secp256k1/lift_x.asm"),
include_str!("asm/curve/secp256k1/moddiv.asm"),
include_str!("asm/exp.asm"),
include_str!("asm/fields/fp6_macros.asm"),
include_str!("asm/fields/fp6_mul.asm"),
include_str!("asm/fields/fp12_mul.asm"),
include_str!("asm/halt.asm"),
include_str!("asm/main.asm"),
include_str!("asm/memory/core.asm"),

View File

@ -0,0 +1,166 @@
/// Note: uncomment this to test
/// global test_mul_Fp12:
/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out
/// DUP7
/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out
/// %store_fp6
/// // stack: in0 , f', g, in1 , g', in1, out, in0, out
/// %add_const(6)
/// // stack: in0', f', g, in1 , g', in1, out, in0, out
/// %store_fp6
/// // stack: g, in1 , g', in1, out, in0, out
/// DUP7
/// // stack: in1, g, in1 , g', in1, out, in0, out
/// %store_fp6
/// // stack: in1 , g', in1, out, in0, out
/// %add_const(6)
/// // stack: in1', g', in1, out, in0, out
/// %store_fp6
/// // stack: in1, out, in0, out
/// PUSH ret_stack
/// // stack: ret_stack, in1, out, in0, out
/// SWAP3
/// // stack: in0, in1, out, ret_stack, out
/// %jump(mul_Fp12)
/// ret_stack:
/// // stack: out
/// DUP1 %add_const(6)
/// // stack: out', out
/// %load_fp6
/// // stack: h', out
/// DUP7
/// // stack: out, h', out
/// %load_fp6
/// // stack: h, h', out
/// %jump(0xdeadbeef)
/// fp6 functions:
/// fn | num | ops | cost
/// -------------------------
/// load | 8 | 40 | 320
/// store | 5 | 40 | 200
/// dup | 5 | 6 | 30
/// swap | 4 | 16 | 64
/// add | 4 | 16 | 64
/// subr | 1 | 17 | 17
/// mul | 3 | 157 | 471
/// i9 | 1 | 9 | 9
///
/// lone stack operations:
/// op | num
/// ------------
/// ADD | 3
/// SWAP | 2
/// DUP | 6
/// PUSH | 6
/// POP | 2
/// JUMP | 1
///
/// TOTAL: 1196
/// inputs:
/// F = f + f'z
/// G = g + g'z
///
/// output:
/// H = h + h'z = FG
///
/// h = fg + sh(f'g')
/// h' = (f+f')(g+g') - fg - f'g'
///
/// memory pointers [ind' = ind+6]
/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'}
///
/// f, f', g, g' consist of six elements on the stack
global mul_Fp12:
// stack: in0, in1, out
DUP1 %add_const(6)
// stack: in0', in0, in1, out
%load_fp6
// stack: f', in0, in1, out
DUP8 %add_const(6)
// stack: in1', f', in0, in1, out
%load_fp6
// stack: g', f', in0, in1, out
PUSH ret_1
// stack: ret_1, g', f', in0, in1, out
%dup_fp6_7
// stack: f', ret_1, g', f', in0, in1, out
%dup_fp6_7
// stack: g', f', ret_1, g', f', in0, in1, out
%jump(mul_fp6)
ret_1:
// stack: f'g', g' , f', in0, in1, out
%dup_fp6_0
// stack: f'g', f'g', g' , f', in0, in1, out
%store_fp6_sh(100)
// stack: f'g', g' , f', in0, in1, out {100: sh(f'g')}
%store_fp6(106)
// stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
DUP13
// stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
DUP15
// stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%load_fp6
// stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%swap_fp6_hole
// stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%dup_fp6_7
// stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%add_fp6
// stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%swap_fp6_hole
// stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
PUSH ret_2
// stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
SWAP7
// stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%load_fp6
// stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%jump(mul_fp6)
ret_2:
// stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%store_fp6(112)
// stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%swap_fp6
// stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
PUSH ret_3
// stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
SWAP13
// stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6
// stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%add_fp6
// stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%jump(mul_fp6)
ret_3:
// stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(112)
// stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%swap_fp6
// stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%dup_fp6_6
// stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(106)
// stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%add_fp6
// stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%subr_fp6
// stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
DUP14 %add_const(6)
// stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%store_fp6
// stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(100)
// stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%add_fp6
// stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
DUP8
// stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%store_fp6
// stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%pop2
JUMP

View File

@ -0,0 +1,314 @@
// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro load_fp6
// stack: ptr
DUP1 %add_const(4)
// stack: ind4, ptr
%mload_kernel_general
// stack: x4, ptr
DUP2 %add_const(3)
// stack: ind3, x4, ptr
%mload_kernel_general
// stack: x3, x4, ptr
DUP3 %add_const(2)
// stack: ind2, x3, x4, ptr
%mload_kernel_general
// stack: x2, x3, x4, ptr
DUP4 %add_const(1)
// stack: ind1, x2, x3, x4, ptr
%mload_kernel_general
// stack: x1, x2, x3, x4, ptr
DUP5 %add_const(5)
// stack: ind5, x1, x2, x3, x4, ptr
%mload_kernel_general
// stack: x5, x1, x2, x3, x4, ptr
SWAP5
// stack: ind0, x1, x2, x3, x4, x5
%mload_kernel_general
// stack: x0, x1, x2, x3, x4, x5
%endmacro
// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro load_fp6(ptr)
// stack:
PUSH $ptr %add_const(5)
// stack: ind5
%mload_kernel_general
// stack: x5
PUSH $ptr %add_const(4)
// stack: ind4, x5
%mload_kernel_general
// stack: x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x4, x5
%mload_kernel_general
// stack: x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x3, x4, x5
%mload_kernel_general
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(1)
// stack: ind1, x2, x3, x4, x5
%mload_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $ptr
// stack: ind0, x1, x2, x3, x4, x5
%mload_kernel_general
// stack: x0, x1, x2, x3, x4, x5
%endmacro
// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro store_fp6
// stack: ptr, x0, x1, x2, x3, x4 , x5
SWAP5
// stack: x4, x0, x1, x2, x3, ptr, x5
DUP6 %add_const(4)
// stack: ind4, x4, x0, x1, x2, x3, ptr, x5
%mstore_kernel_general
// stack: x0, x1, x2, x3, ptr, x5
DUP5
// stack: ind0, x0, x1, x2, x3, ptr, x5
%mstore_kernel_general
// stack: x1, x2, x3, ptr, x5
DUP4 %add_const(1)
// stack: ind1, x1, x2, x3, ptr, x5
%mstore_kernel_general
// stack: x2, x3, ptr, x5
DUP3 %add_const(2)
// stack: ind2, x2, x3, ptr, x5
%mstore_kernel_general
// stack: x3, ptr, x5
DUP2 %add_const(3)
// stack: ind3, x3, ptr, x5
%mstore_kernel_general
// stack: ptr, x5
%add_const(5)
// stack: ind5, x5
%mstore_kernel_general
// stack:
%endmacro
// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro store_fp6(ptr)
// stack: x0, x1, x2, x3, x4, x5
PUSH $ptr
// stack: ind0, x0, x1, x2, x3, x4, x5
%mstore_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $ptr %add_const(1)
// stack: ind1, x1, x2, x3, x4, x5
%mstore_kernel_general
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x2, x3, x4, x5
%mstore_kernel_general
// stack: x3, x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x3, x4, x5
%mstore_kernel_general
// stack: x4, x5
PUSH $ptr %add_const(4)
// stack: ind4, x4, x5
%mstore_kernel_general
// stack: x5
PUSH $ptr %add_const(5)
// stack: ind5, x5
%mstore_kernel_general
// stack:
%endmacro
// cost: store (40) + i9 (9) = 49
%macro store_fp6_sh(ptr)
// stack: x0, x1, x2, x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x0, x1, x2, x3, x4, x5
%mstore_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x1, x2, x3, x4, x5
%mstore_kernel_general
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(4)
// stack: ind4, x2, x3, x4, x5
%mstore_kernel_general
// stack: x3, x4, x5
PUSH $ptr %add_const(5)
// stack: ind5, x3, x4, x5
%mstore_kernel_general
// stack: x4, x5
%i9
// stack: y5, y4
PUSH $ptr %add_const(1)
// stack: ind1, y5, y4
%mstore_kernel_general
// stack: y4
PUSH $ptr
// stack: ind0, y4
%mstore_kernel_general
// stack:
%endmacro
// cost: 9; note this returns y, x for the output x + yi
%macro i9
// stack: a , b
DUP2
// stack: b, a, b
DUP2
// stack: a , b, a , b
PUSH 9 MULFP254
// stack: 9a , b, a , b
SUBFP254
// stack: 9a - b, a , b
SWAP2
// stack: b , a, 9a - b
PUSH 9 MULFP254
// stack 9b , a, 9a - b
ADDFP254
// stack: 9b + a, 9a - b
%endmacro
// cost: 6
%macro dup_fp6_0
// stack: f: 6
DUP6
DUP6
DUP6
DUP6
DUP6
DUP6
// stack: f: 6, g: 6
%endmacro
// cost: 6
%macro dup_fp6_6
// stack: f: 6, g: 6
DUP12
DUP12
DUP12
DUP12
DUP12
DUP12
// stack: g: 6, f: 6, g: 6
%endmacro
// cost: 6
%macro dup_fp6_7
// stack: f: 6, g: 6
DUP13
DUP13
DUP13
DUP13
DUP13
DUP13
// stack: g: 6, f: 6, g: 6
%endmacro
// cost: 16
%macro swap_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP6
// stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
SWAP1
SWAP7
SWAP1
// stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5
SWAP2
SWAP8
SWAP2
// stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5
SWAP3
SWAP9
SWAP3
// stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5
SWAP4
SWAP10
SWAP4
// stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5
SWAP5
SWAP11
SWAP5
// stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
%endmacro
// cost: 16
// swap two fp6 elements with a stack term separating them
// (f: 6, x, g: 6) -> (g: 6, x, f: 6)
%macro swap_fp6_hole
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
SWAP7
// stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
SWAP1
SWAP8
SWAP1
// stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
SWAP2
SWAP9
SWAP2
// stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
SWAP3
SWAP10
SWAP3
// stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
SWAP4
SWAP11
SWAP4
// stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
SWAP5
SWAP12
SWAP5
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
%endmacro
// cost: 16
%macro add_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
ADDFP254
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
ADDFP254
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
ADDFP254
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
ADDFP254
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
ADDFP254
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
ADDFP254
// stack: h0, h1, h2, h3, h4, h5
%endmacro
// *reversed argument subtraction* cost: 17
%macro subr_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
SUBFP254
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
SUBFP254
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
SUBFP254
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
SUBFP254
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
SUBFP254
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
SWAP1
SUBFP254
// stack: h0, h1, h2, h3, h4, h5
%endmacro

View File

@ -0,0 +1,258 @@
/// inputs:
/// C = C0 + C1t + C2t^2
/// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2
///
/// D = D0 + D1t + D2t^2
/// = (d0 + d0_i) + (d1 + d1_i)t + (d2 + d2_i)t^2
///
/// output:
/// E = E0 + E1t + E2t^2 = CD
/// = (e0 + e0_i) + (e1 + e1_i)t + (e2 + e2_i)t^2
///
/// initial stack: c0, c0_, c1, c1_, c2, c2_, d0, d0_, d1, d1_, d2, d2_, retdest
/// final stack: e0, e0_, e1, e1_, e2, e2_
/// computations:
///
/// E0 = C0D0 + i9(C1D2 + C2D1)
///
/// C0D0 = (c0d0 - c0_d0_) + (c0d0_ + c0_d0)i
///
/// C1D2 = (c1d2 - c1_d2_) + (c1d2_ + c1_d2)i
/// C2D1 = (c2d1 - c2_d1_) + (c2d1_ + c2_d1)i
///
/// CD12 = C1D2 + C2D1
/// = (c1d2 + c2d1 - c1_d2_ - c2_d1_) + (c1d2_ + c1_d2 + c2d1_ + c2_d1)i
///
/// i9(CD12) = (9CD12 - CD12_) + (CD12 + 9CD12_)i
///
/// e0 = 9CD12 - CD12_ + C0D0
/// e0_ = 9CD12_ + CD12 + C0D0_
///
///
/// E1 = C0D1 + C1D0 + i9(C2D2)
///
/// C0D1 = (c0d1 - c0_d1_) + (c0d1_ + c0_d1)i
/// C1D0 = (c1d0 - c1_d0_) + (c1d0_ + c1_d0)i
///
/// CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_)
/// CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0
///
/// C2D2 = (c2d2 - c2_d2_) + (c2d2_ + c2_d2)i
/// i9(C2D2) = (9C2D2 - C2D2_) + (C2D2 + 9C2D2_)i
///
/// e1 = 9C2D2 - C2D2_ + CD01
/// e1_ = C2D2 + 9C2D2_ + CD01_
///
///
/// E2 = C0D2 + C1D1 + C2D0
///
/// C0D2 = (c0d2 - c0_d2_) + (c0d2_ + c0_d2)i
/// C1D1 = (c1d1 - c1_d1_) + (c1d1_ + c1_d1)i
/// C2D0 = (c2d0 - c2_d0_) + (c2d0_ + c2_d0)i
///
/// e2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_)
/// e2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0
// cost: 157
global mul_fp6:
// e2
// make c0_d2_ + c1_d1_ + c2_d0_
DUP8
DUP7
MULFP254
DUP11
DUP6
MULFP254
ADDFP254
DUP13
DUP4
MULFP254
ADDFP254
// make c0d2 + c1d1 + c2d0
DUP12
DUP3
MULFP254
DUP11
DUP6
MULFP254
ADDFP254
DUP9
DUP8
MULFP254
ADDFP254
// stack: c0d2 + c1d1 + c2d0 , c0_d2_ + c1_d1_ + c2_d0_
SUBFP254
// stack: e2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_)
SWAP12
// e0, e0_
// make CD12_ = c1d2_ + c1_d2 + c2d1_ + c2_d1
DUP1
DUP5
MULFP254
DUP13
DUP7
MULFP254
ADDFP254
DUP12
DUP8
MULFP254
ADDFP254
DUP11
DUP9
MULFP254
ADDFP254
// make C0D0_ = c0d0_ + c0_d0
DUP10
DUP4
MULFP254
DUP10
DUP6
MULFP254
ADDFP254
// make CD12 = c1d2 + c2d1 - c1_d2_ - c2_d1_
DUP13
DUP10
MULFP254
DUP4
DUP9
MULFP254
ADDFP254
DUP15
DUP8
MULFP254
DUP14
DUP11
MULFP254
ADDFP254
SUBFP254
// make C0D0 = c0d0 - c0_d0_
DUP12
DUP7
MULFP254
DUP12
DUP7
MULFP254
SUBFP254
// stack: C0D0 , CD12 , C0D0_, CD12_
DUP4
DUP3
// stack: CD12 , CD12_ , C0D0 , CD12 , C0D0_, CD12_
PUSH 9
MULFP254
SUBFP254
ADDFP254
// stack: e0 = 9CD12 - CD12_ + C0D0 , CD12 , C0D0_, CD12_
SWAP12
SWAP3
// stack: CD12_ , CD12 , C0D0_
PUSH 9
MULFP254
ADDFP254
ADDFP254
// stack: e0_ = 9CD12_ + CD12 + C0D0_
SWAP11
// e1, e1_
// make C2D2_ = c2d2_ + c2_d2
DUP14
DUP10
MULFP254
DUP4
DUP10
MULFP254
ADDFP254
// make C2D2 = c2d2 - c2_d2_
DUP4
DUP11
MULFP254
DUP16
DUP11
MULFP254
SUBFP254
// make CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_)
DUP4
DUP10
MULFP254
DUP16
DUP9
MULFP254
ADDFP254
DUP13
DUP10
MULFP254
DUP5
DUP9
MULFP254
ADDFP254
SUBFP254
// stack: CD01, C2D2, C2D2_
DUP3
DUP3
// stack: C2D2 , C2D2_ , CD01, C2D2, C2D2_
PUSH 9
MULFP254
SUBFP254
ADDFP254
// stack: e1 = 9C2D2 - C2D2_ + CD01, C2D2, C2D2_
SWAP15
SWAP2
// stack: C2D2_ , C2D2
PUSH 9
MULFP254
ADDFP254
// stack: 9C2D2_ + C2D2
// make CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0
DUP12
DUP10
MULFP254
DUP5
DUP10
MULFP254
ADDFP254
DUP4
DUP9
MULFP254
ADDFP254
DUP3
DUP8
MULFP254
ADDFP254
// stack: CD01_ , 9C2D2_ + C2D2
ADDFP254
// stack: e1_ = CD01_ + 9C2D2_ + C2D2
SWAP15
// e2_
// stack: d2, d1_, d1, d0_, d2_, c0, c0_, c1, c1_, c2, c2_, d0
SWAP7
MULFP254
// stack: c1d1_, d1, d0_, d2_, c0, c0_, d2, c1_, c2, c2_, d0
SWAP7
MULFP254
// stack: c1_d1, d0_, d2_, c0, c0_, d2, c1d1_, c2, c2_, d0
SWAP7
MULFP254
// stack: c2d0_, d2_, c0, c0_, d2, c1d1_, c1_d1 , c2_, d0
SWAP2
MULFP254
// stack: c0d2_ , c2d0_, c0_, d2, c1d1_, c1_d1 , c2_, d0
ADDFP254
// stack: c0d2_ + c2d0_, c0_, d2, c1d1_, c1_d1 , c2_, d0
SWAP2
MULFP254
// stack: c0_d2 , c0d2_ + c2d0_ , c1d1_ , c1_d1 , c2_, d0
ADDFP254
ADDFP254
ADDFP254
// stack: c0_d2 + c0d2_ + c2d0_ + c1d1_ + c1_d1 , c2_, d0
SWAP2
MULFP254
ADDFP254
// stack: e2_ = c2_d0 + c0_d2 + c0d2_ + c2d0_ + c1d1_ + c1_d1
SWAP6
// stack: retdest, e0, e0_, e1, e1_, e2, e2_
JUMP

View File

@ -237,9 +237,9 @@ impl<'a> Interpreter<'a> {
0x09 => self.run_mulmod(), // "MULMOD",
0x0a => self.run_exp(), // "EXP",
0x0b => todo!(), // "SIGNEXTEND",
0x0c => todo!(), // "ADDFP254",
0x0d => todo!(), // "MULFP254",
0x0e => todo!(), // "SUBFP254",
0x0c => self.run_addfp254(), // "ADDFP254",
0x0d => self.run_mulfp254(), // "MULFP254",
0x0e => self.run_subfp254(), // "SUBFP254",
0x10 => self.run_lt(), // "LT",
0x11 => self.run_gt(), // "GT",
0x12 => todo!(), // "SLT",
@ -370,6 +370,27 @@ impl<'a> Interpreter<'a> {
self.push(x.overflowing_sub(y).0);
}
// TODO: 107 is hardcoded as a dummy prime for testing
// should be changed to the proper implementation prime
fn run_addfp254(&mut self) {
let x = self.pop();
let y = self.pop();
self.push((x + y) % 107);
}
fn run_mulfp254(&mut self) {
let x = self.pop();
let y = self.pop();
self.push(U256::try_from(x.full_mul(y) % 107).unwrap());
}
fn run_subfp254(&mut self) {
let x = self.pop();
let y = self.pop();
self.push((U256::from(107) + x - y) % 107);
}
fn run_div(&mut self) {
let x = self.pop();
let y = self.pop();

View File

@ -0,0 +1,203 @@
use anyhow::Result;
use ethereum_types::U256;
use rand::{thread_rng, Rng};
use crate::cpu::kernel::aggregator::combined_kernel;
use crate::cpu::kernel::interpreter::run_with_kernel;
// TODO: 107 is hardcoded as a dummy prime for testing
// should be changed to the proper implementation prime
// once the run_{add, mul, sub}fp254 fns are implemented
const P254: u32 = 107;
fn add_fp(x: u32, y: u32) -> u32 {
(x + y) % P254
}
fn add3_fp(x: u32, y: u32, z: u32) -> u32 {
(x + y + z) % P254
}
fn mul_fp(x: u32, y: u32) -> u32 {
(x * y) % P254
}
fn sub_fp(x: u32, y: u32) -> u32 {
(P254 + x - y) % P254
}
fn add_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
let [a, a_] = a;
let [b, b_] = b;
[add_fp(a, b), add_fp(a_, b_)]
}
fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] {
let [a, a_] = a;
let [b, b_] = b;
let [c, c_] = c;
[add3_fp(a, b, c), add3_fp(a_, b_, c_)]
}
// fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
// let [a, a_] = a;
// let [b, b_] = b;
// [sub_fp(a, b), sub_fp(a_, b_)]
// }
fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
let [a, a_] = a;
let [b, b_] = b;
[
sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
add_fp(mul_fp(a, b_), mul_fp(a_, b)),
]
}
fn i9(a: [u32; 2]) -> [u32; 2] {
let [a, a_] = a;
[sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))]
}
// fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
// let [c0, c1, c2] = c;
// let [d0, d1, d2] = d;
// let e0 = add_fp2(c0, d0);
// let e1 = add_fp2(c1, d1);
// let e2 = add_fp2(c2, d2);
// [e0, e1, e2]
// }
// fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
// let [c0, c1, c2] = c;
// let [d0, d1, d2] = d;
// let e0 = sub_fp2(c0, d0);
// let e1 = sub_fp2(c1, d1);
// let e2 = sub_fp2(c2, d2);
// [e0, e1, e2]
// }
fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
let [c0, c1, c2] = c;
let [d0, d1, d2] = d;
let c0d0 = mul_fp2(c0, d0);
let c0d1 = mul_fp2(c0, d1);
let c0d2 = mul_fp2(c0, d2);
let c1d0 = mul_fp2(c1, d0);
let c1d1 = mul_fp2(c1, d1);
let c1d2 = mul_fp2(c1, d2);
let c2d0 = mul_fp2(c2, d0);
let c2d1 = mul_fp2(c2, d1);
let c2d2 = mul_fp2(c2, d2);
let cd12 = add_fp2(c1d2, c2d1);
[
add_fp2(c0d0, i9(cd12)),
add3_fp2(c0d1, c1d0, i9(c2d2)),
add3_fp2(c0d2, c1d1, c2d0),
]
}
// fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
// let [c0, c1, c2] = c;
// [i9(c2), c0, c1]
// }
// fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
// let [f0, f1] = f;
// let [g0, g1] = g;
// let h0 = mul_fp6(f0, g0);
// let h1 = mul_fp6(f1, g1);
// let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
// [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
// }
fn gen_fp6() -> [[u32; 2]; 3] {
let mut rng = thread_rng();
[
[rng.gen_range(0..P254), rng.gen_range(0..P254)],
[rng.gen_range(0..P254), rng.gen_range(0..P254)],
[rng.gen_range(0..P254), rng.gen_range(0..P254)],
]
}
fn as_stack(xs: Vec<u32>) -> Vec<U256> {
xs.iter().map(|&x| U256::from(x)).rev().collect()
}
#[test]
fn test_fp6() -> Result<()> {
let c = gen_fp6();
let d = gen_fp6();
let mut input: Vec<u32> = [c, d].into_iter().flatten().flatten().collect();
input.push(0xdeadbeef);
let kernel = combined_kernel();
let initial_offset = kernel.global_labels["mul_fp6"];
let initial_stack: Vec<U256> = as_stack(input);
let final_stack: Vec<U256> = run_with_kernel(&kernel, initial_offset, initial_stack)?
.stack()
.to_vec();
let output: Vec<u32> = mul_fp6(c, d).into_iter().flatten().collect();
let expected = as_stack(output);
assert_eq!(final_stack, expected);
Ok(())
}
// fn make_initial_stack(
// f0: [[u32; 2]; 3],
// f1: [[u32; 2]; 3],
// g0: [[u32; 2]; 3],
// g1: [[u32; 2]; 3],
// ) -> Vec<U256> {
// // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
// let f0: Vec<u32> = f0.into_iter().flatten().collect();
// let f1: Vec<u32> = f1.into_iter().flatten().collect();
// let g0: Vec<u32> = g0.into_iter().flatten().collect();
// let g1: Vec<u32> = g1.into_iter().flatten().collect();
// let mut input = f0;
// input.extend(vec![0]);
// input.extend(f1);
// input.extend(g0);
// input.extend(vec![12]);
// input.extend(g1);
// input.extend(vec![12, 24, 0, 24]);
// as_stack(input)
// }
// #[test]
// fn test_fp12() -> Result<()> {
// let f0 = gen_fp6();
// let f1 = gen_fp6();
// let g0 = gen_fp6();
// let g1 = gen_fp6();
// let kernel = combined_kernel();
// let initial_offset = kernel.global_labels["test_mul_Fp12"];
// let initial_stack: Vec<U256> = make_initial_stack(f0, f1, g0, g1);
// let final_stack: Vec<U256> = run_with_kernel(&kernel, initial_offset, initial_stack)?
// .stack()
// .to_vec();
// let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
// .into_iter()
// .flatten()
// .flatten()
// .collect();
// output.extend(vec![24]);
// let expected = as_stack(output);
// assert_eq!(final_stack, expected);
// Ok(())
// }

View File

@ -2,6 +2,7 @@ mod core;
mod curve_ops;
mod ecrecover;
mod exp;
mod fields;
mod hash;
mod mpt;
mod packing;