diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 8fe49739..194a93c8 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -32,6 +32,9 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/lift_x.asm"), include_str!("asm/curve/secp256k1/moddiv.asm"), include_str!("asm/exp.asm"), + include_str!("asm/fields/fp6_macros.asm"), + include_str!("asm/fields/fp6_mul.asm"), + include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm new file mode 100644 index 00000000..2f4b9024 --- /dev/null +++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm @@ -0,0 +1,166 @@ +/// Note: uncomment this to test + +/// global test_mul_Fp12: +/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out +/// DUP7 +/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out +/// %store_fp6 +/// // stack: in0 , f', g, in1 , g', in1, out, in0, out +/// %add_const(6) +/// // stack: in0', f', g, in1 , g', in1, out, in0, out +/// %store_fp6 +/// // stack: g, in1 , g', in1, out, in0, out +/// DUP7 +/// // stack: in1, g, in1 , g', in1, out, in0, out +/// %store_fp6 +/// // stack: in1 , g', in1, out, in0, out +/// %add_const(6) +/// // stack: in1', g', in1, out, in0, out +/// %store_fp6 +/// // stack: in1, out, in0, out +/// PUSH ret_stack +/// // stack: ret_stack, in1, out, in0, out +/// SWAP3 +/// // stack: in0, in1, out, ret_stack, out +/// %jump(mul_Fp12) +/// ret_stack: +/// // stack: out +/// DUP1 %add_const(6) +/// // stack: out', out +/// %load_fp6 +/// // stack: h', out +/// DUP7 +/// // stack: out, h', out +/// %load_fp6 +/// // stack: h, h', out +/// %jump(0xdeadbeef) + + +/// fp6 functions: +/// fn | num | ops | cost +/// ------------------------- +/// load | 8 | 40 | 320 +/// store | 5 | 40 | 200 +/// dup | 5 | 6 | 30 +/// swap | 4 | 16 | 64 +/// add | 4 | 16 | 64 +/// subr | 1 | 17 | 17 +/// mul | 3 | 157 | 471 +/// i9 | 1 | 9 | 9 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 2 +/// DUP | 6 +/// PUSH | 6 +/// POP | 2 +/// JUMP | 1 +/// +/// TOTAL: 1196 + +/// inputs: +/// F = f + f'z +/// G = g + g'z +/// +/// output: +/// H = h + h'z = FG +/// +/// h = fg + sh(f'g') +/// h' = (f+f')(g+g') - fg - f'g' +/// +/// memory pointers [ind' = ind+6] +/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'} +/// +/// f, f', g, g' consist of six elements on the stack + +global mul_Fp12: + // stack: in0, in1, out + DUP1 %add_const(6) + // stack: in0', in0, in1, out + %load_fp6 + // stack: f', in0, in1, out + DUP8 %add_const(6) + // stack: in1', f', in0, in1, out + %load_fp6 + // stack: g', f', in0, in1, out + PUSH ret_1 + // stack: ret_1, g', f', in0, in1, out + %dup_fp6_7 + // stack: f', ret_1, g', f', in0, in1, out + %dup_fp6_7 + // stack: g', f', ret_1, g', f', in0, in1, out + %jump(mul_fp6) +ret_1: + // stack: f'g', g' , f', in0, in1, out + %dup_fp6_0 + // stack: f'g', f'g', g' , f', in0, in1, out + %store_fp6_sh(100) + // stack: f'g', g' , f', in0, in1, out {100: sh(f'g')} + %store_fp6(106) + // stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + DUP13 + // stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + DUP15 + // stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %load_fp6 + // stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %swap_fp6_hole + // stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %dup_fp6_7 + // stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %add_fp6 + // stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %swap_fp6_hole + // stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + PUSH ret_2 + // stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + SWAP7 + // stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %load_fp6 + // stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %jump(mul_fp6) +ret_2: + // stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + %store_fp6(112) + // stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %swap_fp6 + // stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + PUSH ret_3 + // stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + SWAP13 + // stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %load_fp6 + // stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %add_fp6 + // stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %jump(mul_fp6) +ret_3: + // stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %load_fp6(112) + // stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %swap_fp6 + // stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %dup_fp6_6 + // stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %load_fp6(106) + // stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %add_fp6 + // stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %subr_fp6 + // stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + DUP14 %add_const(6) + // stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %store_fp6 + // stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %load_fp6(100) + // stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %add_fp6 + // stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + DUP8 + // stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %store_fp6 + // stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg} + %pop2 + JUMP diff --git a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm new file mode 100644 index 00000000..b575c234 --- /dev/null +++ b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm @@ -0,0 +1,314 @@ +// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro load_fp6 + // stack: ptr + DUP1 %add_const(4) + // stack: ind4, ptr + %mload_kernel_general + // stack: x4, ptr + DUP2 %add_const(3) + // stack: ind3, x4, ptr + %mload_kernel_general + // stack: x3, x4, ptr + DUP3 %add_const(2) + // stack: ind2, x3, x4, ptr + %mload_kernel_general + // stack: x2, x3, x4, ptr + DUP4 %add_const(1) + // stack: ind1, x2, x3, x4, ptr + %mload_kernel_general + // stack: x1, x2, x3, x4, ptr + DUP5 %add_const(5) + // stack: ind5, x1, x2, x3, x4, ptr + %mload_kernel_general + // stack: x5, x1, x2, x3, x4, ptr + SWAP5 + // stack: ind0, x1, x2, x3, x4, x5 + %mload_kernel_general + // stack: x0, x1, x2, x3, x4, x5 +%endmacro + +// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro load_fp6(ptr) + // stack: + PUSH $ptr %add_const(5) + // stack: ind5 + %mload_kernel_general + // stack: x5 + PUSH $ptr %add_const(4) + // stack: ind4, x5 + %mload_kernel_general + // stack: x4, x5 + PUSH $ptr %add_const(3) + // stack: ind3, x4, x5 + %mload_kernel_general + // stack: x3, x4, x5 + PUSH $ptr %add_const(2) + // stack: ind2, x3, x4, x5 + %mload_kernel_general + // stack: x2, x3, x4, x5 + PUSH $ptr %add_const(1) + // stack: ind1, x2, x3, x4, x5 + %mload_kernel_general + // stack: x1, x2, x3, x4, x5 + PUSH $ptr + // stack: ind0, x1, x2, x3, x4, x5 + %mload_kernel_general + // stack: x0, x1, x2, x3, x4, x5 +%endmacro + +// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro store_fp6 + // stack: ptr, x0, x1, x2, x3, x4 , x5 + SWAP5 + // stack: x4, x0, x1, x2, x3, ptr, x5 + DUP6 %add_const(4) + // stack: ind4, x4, x0, x1, x2, x3, ptr, x5 + %mstore_kernel_general + // stack: x0, x1, x2, x3, ptr, x5 + DUP5 + // stack: ind0, x0, x1, x2, x3, ptr, x5 + %mstore_kernel_general + // stack: x1, x2, x3, ptr, x5 + DUP4 %add_const(1) + // stack: ind1, x1, x2, x3, ptr, x5 + %mstore_kernel_general + // stack: x2, x3, ptr, x5 + DUP3 %add_const(2) + // stack: ind2, x2, x3, ptr, x5 + %mstore_kernel_general + // stack: x3, ptr, x5 + DUP2 %add_const(3) + // stack: ind3, x3, ptr, x5 + %mstore_kernel_general + // stack: ptr, x5 + %add_const(5) + // stack: ind5, x5 + %mstore_kernel_general + // stack: +%endmacro + +// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro store_fp6(ptr) + // stack: x0, x1, x2, x3, x4, x5 + PUSH $ptr + // stack: ind0, x0, x1, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x1, x2, x3, x4, x5 + PUSH $ptr %add_const(1) + // stack: ind1, x1, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x2, x3, x4, x5 + PUSH $ptr %add_const(2) + // stack: ind2, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x3, x4, x5 + PUSH $ptr %add_const(3) + // stack: ind3, x3, x4, x5 + %mstore_kernel_general + // stack: x4, x5 + PUSH $ptr %add_const(4) + // stack: ind4, x4, x5 + %mstore_kernel_general + // stack: x5 + PUSH $ptr %add_const(5) + // stack: ind5, x5 + %mstore_kernel_general + // stack: +%endmacro + +// cost: store (40) + i9 (9) = 49 +%macro store_fp6_sh(ptr) + // stack: x0, x1, x2, x3, x4, x5 + PUSH $ptr %add_const(2) + // stack: ind2, x0, x1, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x1, x2, x3, x4, x5 + PUSH $ptr %add_const(3) + // stack: ind3, x1, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x2, x3, x4, x5 + PUSH $ptr %add_const(4) + // stack: ind4, x2, x3, x4, x5 + %mstore_kernel_general + // stack: x3, x4, x5 + PUSH $ptr %add_const(5) + // stack: ind5, x3, x4, x5 + %mstore_kernel_general + // stack: x4, x5 + %i9 + // stack: y5, y4 + PUSH $ptr %add_const(1) + // stack: ind1, y5, y4 + %mstore_kernel_general + // stack: y4 + PUSH $ptr + // stack: ind0, y4 + %mstore_kernel_general + // stack: +%endmacro + +// cost: 9; note this returns y, x for the output x + yi +%macro i9 + // stack: a , b + DUP2 + // stack: b, a, b + DUP2 + // stack: a , b, a , b + PUSH 9 MULFP254 + // stack: 9a , b, a , b + SUBFP254 + // stack: 9a - b, a , b + SWAP2 + // stack: b , a, 9a - b + PUSH 9 MULFP254 + // stack 9b , a, 9a - b + ADDFP254 + // stack: 9b + a, 9a - b +%endmacro + +// cost: 6 +%macro dup_fp6_0 + // stack: f: 6 + DUP6 + DUP6 + DUP6 + DUP6 + DUP6 + DUP6 + // stack: f: 6, g: 6 +%endmacro + +// cost: 6 +%macro dup_fp6_6 + // stack: f: 6, g: 6 + DUP12 + DUP12 + DUP12 + DUP12 + DUP12 + DUP12 + // stack: g: 6, f: 6, g: 6 +%endmacro + +// cost: 6 +%macro dup_fp6_7 + // stack: f: 6, g: 6 + DUP13 + DUP13 + DUP13 + DUP13 + DUP13 + DUP13 + // stack: g: 6, f: 6, g: 6 +%endmacro + +// cost: 16 +%macro swap_fp6 + // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 + SWAP6 + // stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5 + SWAP1 + SWAP7 + SWAP1 + // stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5 + SWAP2 + SWAP8 + SWAP2 + // stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5 + SWAP3 + SWAP9 + SWAP3 + // stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5 + SWAP4 + SWAP10 + SWAP4 + // stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5 + SWAP5 + SWAP11 + SWAP5 + // stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5 +%endmacro + +// cost: 16 +// swap two fp6 elements with a stack term separating them +// (f: 6, x, g: 6) -> (g: 6, x, f: 6) +%macro swap_fp6_hole + // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 + SWAP7 + // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5 + SWAP1 + SWAP8 + SWAP1 + // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5 + SWAP2 + SWAP9 + SWAP2 + // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5 + SWAP3 + SWAP10 + SWAP3 + // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5 + SWAP4 + SWAP11 + SWAP4 + // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5 + SWAP5 + SWAP12 + SWAP5 + // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5 +%endmacro + +// cost: 16 +%macro add_fp6 + // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f5, g0, h1, h2, h3, h4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, g0, h1, h2, h3, h4, h5 + ADDFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro + +// *reversed argument subtraction* cost: 17 +%macro subr_fp6 + // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f5, g0, h1, h2, h3, h4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, g0, h1, h2, h3, h4, h5 + SWAP1 + SUBFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm new file mode 100644 index 00000000..0fc6dbdf --- /dev/null +++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm @@ -0,0 +1,258 @@ +/// inputs: +/// C = C0 + C1t + C2t^2 +/// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 +/// +/// D = D0 + D1t + D2t^2 +/// = (d0 + d0_i) + (d1 + d1_i)t + (d2 + d2_i)t^2 +/// +/// output: +/// E = E0 + E1t + E2t^2 = CD +/// = (e0 + e0_i) + (e1 + e1_i)t + (e2 + e2_i)t^2 +/// +/// initial stack: c0, c0_, c1, c1_, c2, c2_, d0, d0_, d1, d1_, d2, d2_, retdest +/// final stack: e0, e0_, e1, e1_, e2, e2_ + +/// computations: +/// +/// E0 = C0D0 + i9(C1D2 + C2D1) +/// +/// C0D0 = (c0d0 - c0_d0_) + (c0d0_ + c0_d0)i +/// +/// C1D2 = (c1d2 - c1_d2_) + (c1d2_ + c1_d2)i +/// C2D1 = (c2d1 - c2_d1_) + (c2d1_ + c2_d1)i +/// +/// CD12 = C1D2 + C2D1 +/// = (c1d2 + c2d1 - c1_d2_ - c2_d1_) + (c1d2_ + c1_d2 + c2d1_ + c2_d1)i +/// +/// i9(CD12) = (9CD12 - CD12_) + (CD12 + 9CD12_)i +/// +/// e0 = 9CD12 - CD12_ + C0D0 +/// e0_ = 9CD12_ + CD12 + C0D0_ +/// +/// +/// E1 = C0D1 + C1D0 + i9(C2D2) +/// +/// C0D1 = (c0d1 - c0_d1_) + (c0d1_ + c0_d1)i +/// C1D0 = (c1d0 - c1_d0_) + (c1d0_ + c1_d0)i +/// +/// CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) +/// CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 +/// +/// C2D2 = (c2d2 - c2_d2_) + (c2d2_ + c2_d2)i +/// i9(C2D2) = (9C2D2 - C2D2_) + (C2D2 + 9C2D2_)i +/// +/// e1 = 9C2D2 - C2D2_ + CD01 +/// e1_ = C2D2 + 9C2D2_ + CD01_ +/// +/// +/// E2 = C0D2 + C1D1 + C2D0 +/// +/// C0D2 = (c0d2 - c0_d2_) + (c0d2_ + c0_d2)i +/// C1D1 = (c1d1 - c1_d1_) + (c1d1_ + c1_d1)i +/// C2D0 = (c2d0 - c2_d0_) + (c2d0_ + c2_d0)i +/// +/// e2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) +/// e2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 + + +// cost: 157 +global mul_fp6: + // e2 + // make c0_d2_ + c1_d1_ + c2_d0_ + DUP8 + DUP7 + MULFP254 + DUP11 + DUP6 + MULFP254 + ADDFP254 + DUP13 + DUP4 + MULFP254 + ADDFP254 + // make c0d2 + c1d1 + c2d0 + DUP12 + DUP3 + MULFP254 + DUP11 + DUP6 + MULFP254 + ADDFP254 + DUP9 + DUP8 + MULFP254 + ADDFP254 + // stack: c0d2 + c1d1 + c2d0 , c0_d2_ + c1_d1_ + c2_d0_ + SUBFP254 + // stack: e2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) + SWAP12 + + // e0, e0_ + // make CD12_ = c1d2_ + c1_d2 + c2d1_ + c2_d1 + DUP1 + DUP5 + MULFP254 + DUP13 + DUP7 + MULFP254 + ADDFP254 + DUP12 + DUP8 + MULFP254 + ADDFP254 + DUP11 + DUP9 + MULFP254 + ADDFP254 + // make C0D0_ = c0d0_ + c0_d0 + DUP10 + DUP4 + MULFP254 + DUP10 + DUP6 + MULFP254 + ADDFP254 + // make CD12 = c1d2 + c2d1 - c1_d2_ - c2_d1_ + DUP13 + DUP10 + MULFP254 + DUP4 + DUP9 + MULFP254 + ADDFP254 + DUP15 + DUP8 + MULFP254 + DUP14 + DUP11 + MULFP254 + ADDFP254 + SUBFP254 + // make C0D0 = c0d0 - c0_d0_ + DUP12 + DUP7 + MULFP254 + DUP12 + DUP7 + MULFP254 + SUBFP254 + // stack: C0D0 , CD12 , C0D0_, CD12_ + DUP4 + DUP3 + // stack: CD12 , CD12_ , C0D0 , CD12 , C0D0_, CD12_ + PUSH 9 + MULFP254 + SUBFP254 + ADDFP254 + // stack: e0 = 9CD12 - CD12_ + C0D0 , CD12 , C0D0_, CD12_ + SWAP12 + SWAP3 + // stack: CD12_ , CD12 , C0D0_ + PUSH 9 + MULFP254 + ADDFP254 + ADDFP254 + // stack: e0_ = 9CD12_ + CD12 + C0D0_ + SWAP11 + + // e1, e1_ + // make C2D2_ = c2d2_ + c2_d2 + DUP14 + DUP10 + MULFP254 + DUP4 + DUP10 + MULFP254 + ADDFP254 + // make C2D2 = c2d2 - c2_d2_ + DUP4 + DUP11 + MULFP254 + DUP16 + DUP11 + MULFP254 + SUBFP254 + // make CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) + DUP4 + DUP10 + MULFP254 + DUP16 + DUP9 + MULFP254 + ADDFP254 + DUP13 + DUP10 + MULFP254 + DUP5 + DUP9 + MULFP254 + ADDFP254 + SUBFP254 + // stack: CD01, C2D2, C2D2_ + DUP3 + DUP3 + // stack: C2D2 , C2D2_ , CD01, C2D2, C2D2_ + PUSH 9 + MULFP254 + SUBFP254 + ADDFP254 + // stack: e1 = 9C2D2 - C2D2_ + CD01, C2D2, C2D2_ + SWAP15 + SWAP2 + // stack: C2D2_ , C2D2 + PUSH 9 + MULFP254 + ADDFP254 + // stack: 9C2D2_ + C2D2 + // make CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 + DUP12 + DUP10 + MULFP254 + DUP5 + DUP10 + MULFP254 + ADDFP254 + DUP4 + DUP9 + MULFP254 + ADDFP254 + DUP3 + DUP8 + MULFP254 + ADDFP254 + // stack: CD01_ , 9C2D2_ + C2D2 + ADDFP254 + // stack: e1_ = CD01_ + 9C2D2_ + C2D2 + SWAP15 + + // e2_ + // stack: d2, d1_, d1, d0_, d2_, c0, c0_, c1, c1_, c2, c2_, d0 + SWAP7 + MULFP254 + // stack: c1d1_, d1, d0_, d2_, c0, c0_, d2, c1_, c2, c2_, d0 + SWAP7 + MULFP254 + // stack: c1_d1, d0_, d2_, c0, c0_, d2, c1d1_, c2, c2_, d0 + SWAP7 + MULFP254 + // stack: c2d0_, d2_, c0, c0_, d2, c1d1_, c1_d1 , c2_, d0 + SWAP2 + MULFP254 + // stack: c0d2_ , c2d0_, c0_, d2, c1d1_, c1_d1 , c2_, d0 + ADDFP254 + // stack: c0d2_ + c2d0_, c0_, d2, c1d1_, c1_d1 , c2_, d0 + SWAP2 + MULFP254 + // stack: c0_d2 , c0d2_ + c2d0_ , c1d1_ , c1_d1 , c2_, d0 + ADDFP254 + ADDFP254 + ADDFP254 + // stack: c0_d2 + c0d2_ + c2d0_ + c1d1_ + c1_d1 , c2_, d0 + SWAP2 + MULFP254 + ADDFP254 + // stack: e2_ = c2_d0 + c0_d2 + c0d2_ + c2d0_ + c1d1_ + c1_d1 + SWAP6 + + // stack: retdest, e0, e0_, e1, e1_, e2, e2_ + JUMP diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 8057792b..fe95d04c 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -237,9 +237,9 @@ impl<'a> Interpreter<'a> { 0x09 => self.run_mulmod(), // "MULMOD", 0x0a => self.run_exp(), // "EXP", 0x0b => todo!(), // "SIGNEXTEND", - 0x0c => todo!(), // "ADDFP254", - 0x0d => todo!(), // "MULFP254", - 0x0e => todo!(), // "SUBFP254", + 0x0c => self.run_addfp254(), // "ADDFP254", + 0x0d => self.run_mulfp254(), // "MULFP254", + 0x0e => self.run_subfp254(), // "SUBFP254", 0x10 => self.run_lt(), // "LT", 0x11 => self.run_gt(), // "GT", 0x12 => todo!(), // "SLT", @@ -370,6 +370,27 @@ impl<'a> Interpreter<'a> { self.push(x.overflowing_sub(y).0); } + // TODO: 107 is hardcoded as a dummy prime for testing + // should be changed to the proper implementation prime + + fn run_addfp254(&mut self) { + let x = self.pop(); + let y = self.pop(); + self.push((x + y) % 107); + } + + fn run_mulfp254(&mut self) { + let x = self.pop(); + let y = self.pop(); + self.push(U256::try_from(x.full_mul(y) % 107).unwrap()); + } + + fn run_subfp254(&mut self) { + let x = self.pop(); + let y = self.pop(); + self.push((U256::from(107) + x - y) % 107); + } + fn run_div(&mut self) { let x = self.pop(); let y = self.pop(); diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs new file mode 100644 index 00000000..b8a38887 --- /dev/null +++ b/evm/src/cpu/kernel/tests/fields.rs @@ -0,0 +1,203 @@ +use anyhow::Result; +use ethereum_types::U256; +use rand::{thread_rng, Rng}; + +use crate::cpu::kernel::aggregator::combined_kernel; +use crate::cpu::kernel::interpreter::run_with_kernel; + +// TODO: 107 is hardcoded as a dummy prime for testing +// should be changed to the proper implementation prime +// once the run_{add, mul, sub}fp254 fns are implemented +const P254: u32 = 107; + +fn add_fp(x: u32, y: u32) -> u32 { + (x + y) % P254 +} + +fn add3_fp(x: u32, y: u32, z: u32) -> u32 { + (x + y + z) % P254 +} + +fn mul_fp(x: u32, y: u32) -> u32 { + (x * y) % P254 +} + +fn sub_fp(x: u32, y: u32) -> u32 { + (P254 + x - y) % P254 +} + +fn add_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { + let [a, a_] = a; + let [b, b_] = b; + [add_fp(a, b), add_fp(a_, b_)] +} + +fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] { + let [a, a_] = a; + let [b, b_] = b; + let [c, c_] = c; + [add3_fp(a, b, c), add3_fp(a_, b_, c_)] +} + +// fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { +// let [a, a_] = a; +// let [b, b_] = b; +// [sub_fp(a, b), sub_fp(a_, b_)] +// } + +fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { + let [a, a_] = a; + let [b, b_] = b; + [ + sub_fp(mul_fp(a, b), mul_fp(a_, b_)), + add_fp(mul_fp(a, b_), mul_fp(a_, b)), + ] +} + +fn i9(a: [u32; 2]) -> [u32; 2] { + let [a, a_] = a; + [sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))] +} + +// fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { +// let [c0, c1, c2] = c; +// let [d0, d1, d2] = d; + +// let e0 = add_fp2(c0, d0); +// let e1 = add_fp2(c1, d1); +// let e2 = add_fp2(c2, d2); +// [e0, e1, e2] +// } + +// fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { +// let [c0, c1, c2] = c; +// let [d0, d1, d2] = d; + +// let e0 = sub_fp2(c0, d0); +// let e1 = sub_fp2(c1, d1); +// let e2 = sub_fp2(c2, d2); +// [e0, e1, e2] +// } + +fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { + let [c0, c1, c2] = c; + let [d0, d1, d2] = d; + + let c0d0 = mul_fp2(c0, d0); + let c0d1 = mul_fp2(c0, d1); + let c0d2 = mul_fp2(c0, d2); + let c1d0 = mul_fp2(c1, d0); + let c1d1 = mul_fp2(c1, d1); + let c1d2 = mul_fp2(c1, d2); + let c2d0 = mul_fp2(c2, d0); + let c2d1 = mul_fp2(c2, d1); + let c2d2 = mul_fp2(c2, d2); + let cd12 = add_fp2(c1d2, c2d1); + + [ + add_fp2(c0d0, i9(cd12)), + add3_fp2(c0d1, c1d0, i9(c2d2)), + add3_fp2(c0d2, c1d1, c2d0), + ] +} + +// fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] { +// let [c0, c1, c2] = c; +// [i9(c2), c0, c1] +// } + +// fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] { +// let [f0, f1] = f; +// let [g0, g1] = g; + +// let h0 = mul_fp6(f0, g0); +// let h1 = mul_fp6(f1, g1); +// let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1)); +// [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))] +// } + +fn gen_fp6() -> [[u32; 2]; 3] { + let mut rng = thread_rng(); + [ + [rng.gen_range(0..P254), rng.gen_range(0..P254)], + [rng.gen_range(0..P254), rng.gen_range(0..P254)], + [rng.gen_range(0..P254), rng.gen_range(0..P254)], + ] +} + +fn as_stack(xs: Vec) -> Vec { + xs.iter().map(|&x| U256::from(x)).rev().collect() +} + +#[test] +fn test_fp6() -> Result<()> { + let c = gen_fp6(); + let d = gen_fp6(); + + let mut input: Vec = [c, d].into_iter().flatten().flatten().collect(); + input.push(0xdeadbeef); + + let kernel = combined_kernel(); + let initial_offset = kernel.global_labels["mul_fp6"]; + let initial_stack: Vec = as_stack(input); + let final_stack: Vec = run_with_kernel(&kernel, initial_offset, initial_stack)? + .stack() + .to_vec(); + + let output: Vec = mul_fp6(c, d).into_iter().flatten().collect(); + let expected = as_stack(output); + + assert_eq!(final_stack, expected); + + Ok(()) +} + +// fn make_initial_stack( +// f0: [[u32; 2]; 3], +// f1: [[u32; 2]; 3], +// g0: [[u32; 2]; 3], +// g1: [[u32; 2]; 3], +// ) -> Vec { +// // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out +// let f0: Vec = f0.into_iter().flatten().collect(); +// let f1: Vec = f1.into_iter().flatten().collect(); +// let g0: Vec = g0.into_iter().flatten().collect(); +// let g1: Vec = g1.into_iter().flatten().collect(); + +// let mut input = f0; +// input.extend(vec![0]); +// input.extend(f1); +// input.extend(g0); +// input.extend(vec![12]); +// input.extend(g1); +// input.extend(vec![12, 24, 0, 24]); + +// as_stack(input) +// } + +// #[test] +// fn test_fp12() -> Result<()> { +// let f0 = gen_fp6(); +// let f1 = gen_fp6(); +// let g0 = gen_fp6(); +// let g1 = gen_fp6(); + +// let kernel = combined_kernel(); +// let initial_offset = kernel.global_labels["test_mul_Fp12"]; +// let initial_stack: Vec = make_initial_stack(f0, f1, g0, g1); +// let final_stack: Vec = run_with_kernel(&kernel, initial_offset, initial_stack)? +// .stack() +// .to_vec(); + +// let mut output: Vec = mul_fp12([f0, f1], [g0, g1]) +// .into_iter() +// .flatten() +// .flatten() +// .collect(); +// output.extend(vec![24]); +// let expected = as_stack(output); + +// assert_eq!(final_stack, expected); + +// Ok(()) +// } diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index 9148d6a4..91f3f229 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -2,6 +2,7 @@ mod core; mod curve_ops; mod ecrecover; mod exp; +mod fields; mod hash; mod mpt; mod packing;