diff --git a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm similarity index 68% rename from evm/src/cpu/kernel/asm/fields/fp6_macros.asm rename to evm/src/cpu/kernel/asm/fields/field_macros.asm index b575c234..02651a1b 100644 --- a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm +++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm @@ -1,3 +1,7 @@ +%macro offset_fp6 + %add_const(6) +%endmacro + // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40 %macro load_fp6 // stack: ptr @@ -87,6 +91,51 @@ // stack: %endmacro +// cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53 +%macro store_fp6_double + // stack: ptr, x0, x1, x2, x3, x4, x5 + SWAP6 + // stack: x5, x0, x1, x2, x3, x4, ptr + PUSH 2 MULFP254 + // stack: 2*x5, x0, x1, x2, x3, x4, ptr + DUP7 %add_const(5) + // stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr + %mstore_kernel_general + // stack: x0, x1, x2, x3, x4, ptr + PUSH 2 MULFP254 + // stack: 2*x0, x1, x2, x3, x4, ptr + DUP6 + // stack: ind0, 2*x0, x1, x2, x3, x4, ptr + %mstore_kernel_general + // stack: x1, x2, x3, x4, ptr + PUSH 2 MULFP254 + // stack: 2*x1, x2, x3, x4, ptr + DUP5 %add_const(1) + // stack: ind1, 2*x1, x2, x3, x4, ptr + %mstore_kernel_general + // stack: x2, x3, x4, ptr + PUSH 2 MULFP254 + // stack: 2*x2, x3, x4, ptr + DUP4 %add_const(2) + // stack: ind2, 2*x2, x3, x4, ptr + %mstore_kernel_general + // stack: x3, x4, ptr + PUSH 2 MULFP254 + // stack: 2*x3, x4, ptr + DUP3 %add_const(3) + // stack: ind3, 2*x3, x4, ptr + %mstore_kernel_general + // stack: x4, ptr + PUSH 2 MULFP254 + // stack: 2*x4, ptr + SWAP1 + // stack: ptr, 2*x4 + %add_const(4) + // stack: ind4, 2*x4 + %mstore_kernel_general + // stack: +%endmacro + // cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 %macro store_fp6(ptr) // stack: x0, x1, x2, x3, x4, x5 @@ -175,31 +224,55 @@ DUP6 DUP6 DUP6 - // stack: f: 6, g: 6 + // stack: f: 6, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp6_2 + // stack: X: 2, f: 6 + DUP8 + DUP8 + DUP8 + DUP8 + DUP8 + DUP8 + // stack: f: 6, X: 2, f: 6 %endmacro // cost: 6 %macro dup_fp6_6 - // stack: f: 6, g: 6 + // stack: X: 6, f: 6 DUP12 DUP12 DUP12 DUP12 DUP12 DUP12 - // stack: g: 6, f: 6, g: 6 + // stack: f: 6, X: 6, f: 6 %endmacro // cost: 6 %macro dup_fp6_7 - // stack: f: 6, g: 6 + // stack: X: 7, f: 6 DUP13 DUP13 DUP13 DUP13 DUP13 DUP13 - // stack: g: 6, f: 6, g: 6 + // stack: f: 6, X: 7, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp6_8 + // stack: X: 8, f: 6 + DUP14 + DUP14 + DUP14 + DUP14 + DUP14 + DUP14 + // stack: f: 6, X: 8, f: 6 %endmacro // cost: 16 @@ -231,7 +304,7 @@ // cost: 16 // swap two fp6 elements with a stack term separating them -// (f: 6, x, g: 6) -> (g: 6, x, f: 6) +// (f: 6, X, g: 6) -> (g: 6, X, f: 6) %macro swap_fp6_hole // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 SWAP7 @@ -258,6 +331,35 @@ // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5 %endmacro +// cost: 16 +// swap two fp6 elements with two stack terms separating them +// (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6) +%macro swap_fp6_hole_2 + // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 + SWAP8 + // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5 + SWAP1 + SWAP9 + SWAP1 + // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5 + SWAP2 + SWAP10 + SWAP2 + // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5 + SWAP3 + SWAP11 + SWAP3 + // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5 + SWAP4 + SWAP12 + SWAP4 + // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5 + SWAP5 + SWAP13 + SWAP5 + // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5 +%endmacro + // cost: 16 %macro add_fp6 // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 @@ -285,6 +387,37 @@ // stack: h0, h1, h2, h3, h4, h5 %endmacro +// cost: 18 +// add two fp6 elements with a to-be-popped stack term separating them +// (f: 6, X, g: 6) -> (f + g: 6) +%macro add_fp6_hole + // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f2, f3, f4, f5, X, g0, h1, g2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f3, f4, f5, X, g0, h1, h2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f4, f5, X, g0, h1, h2, h3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f5, X, g0, h1, h2, h3, h4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, X, g0, h1, h2, h3, h4, h5 + SWAP1 + POP + ADDFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro + // *reversed argument subtraction* cost: 17 %macro subr_fp6 // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm index 2f4b9024..253103da 100644 --- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm +++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm @@ -1,31 +1,31 @@ /// Note: uncomment this to test -/// global test_mul_Fp12: -/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out +/// global test_mul_fp12: +/// // stack: f, inA , f', g, inB , g', inB, out, inA, out /// DUP7 -/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out +/// // stack: inA, f, inA , f', g, inB , g', inB, out, inA, out /// %store_fp6 -/// // stack: in0 , f', g, in1 , g', in1, out, in0, out -/// %add_const(6) -/// // stack: in0', f', g, in1 , g', in1, out, in0, out +/// // stack: inA , f', g, inB , g', inB, out, inA, out +/// %offset_fp6 +/// // stack: inA', f', g, inB , g', inB, out, inA, out /// %store_fp6 -/// // stack: g, in1 , g', in1, out, in0, out +/// // stack: g, inB , g', inB, out, inA, out /// DUP7 -/// // stack: in1, g, in1 , g', in1, out, in0, out +/// // stack: inB, g, inB , g', inB, out, inA, out /// %store_fp6 -/// // stack: in1 , g', in1, out, in0, out -/// %add_const(6) -/// // stack: in1', g', in1, out, in0, out +/// // stack: inB , g', inB, out, inA, out +/// %offset_fp6 +/// // stack: inB', g', inB, out, inA, out /// %store_fp6 -/// // stack: in1, out, in0, out +/// // stack: inB, out, inA, out /// PUSH ret_stack -/// // stack: ret_stack, in1, out, in0, out +/// // stack: ret_stack, inB, out, inA, out /// SWAP3 -/// // stack: in0, in1, out, ret_stack, out -/// %jump(mul_Fp12) +/// // stack: inA, inB, out, ret_stack, out +/// %jump(mul_fp12) /// ret_stack: /// // stack: out -/// DUP1 %add_const(6) +/// DUP1 %offset_fp6 /// // stack: out', out /// %load_fp6 /// // stack: h', out @@ -56,9 +56,9 @@ /// DUP | 6 /// PUSH | 6 /// POP | 2 -/// JUMP | 1 +/// JUMP | 6 /// -/// TOTAL: 1196 +/// TOTAL: 1201 /// inputs: /// F = f + f'z @@ -71,96 +71,365 @@ /// h' = (f+f')(g+g') - fg - f'g' /// /// memory pointers [ind' = ind+6] -/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'} +/// {inA: f, inA: f', inB: g, inB':g', out: h, out': h'} /// /// f, f', g, g' consist of six elements on the stack -global mul_Fp12: - // stack: in0, in1, out - DUP1 %add_const(6) - // stack: in0', in0, in1, out +global mul_fp12: + // stack: inA, inB, out + DUP1 %offset_fp6 + // stack: inA', inA, inB, out %load_fp6 - // stack: f', in0, in1, out - DUP8 %add_const(6) - // stack: in1', f', in0, in1, out + // stack: f', inA, inB, out + DUP8 %offset_fp6 + // stack: inB', f', inA, inB, out %load_fp6 - // stack: g', f', in0, in1, out + // stack: g', f', inA, inB, out PUSH ret_1 - // stack: ret_1, g', f', in0, in1, out + // stack: ret_1, g', f', inA, inB, out %dup_fp6_7 - // stack: f', ret_1, g', f', in0, in1, out + // stack: f', ret_1, g', f', inA, inB, out %dup_fp6_7 - // stack: g', f', ret_1, g', f', in0, in1, out + // stack: g', f', ret_1, g', f', inA, inB, out %jump(mul_fp6) ret_1: - // stack: f'g', g' , f', in0, in1, out + // stack: f'g', g' , f', inA, inB, out %dup_fp6_0 - // stack: f'g', f'g', g' , f', in0, in1, out - %store_fp6_sh(100) - // stack: f'g', g' , f', in0, in1, out {100: sh(f'g')} - %store_fp6(106) - // stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: f'g', f'g', g' , f', inA, inB, out + %store_fp6_sh(0) + // stack: f'g', g' , f', inA, inB, out {0: sh(f'g')} + %store_fp6(6) + // stack: g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'} DUP13 - // stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'} DUP15 - // stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: inB, inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'} %load_fp6 - // stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: g , inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'} %swap_fp6_hole - // stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'} %dup_fp6_7 - // stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: g,g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'} %add_fp6 - // stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: g+g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'} %swap_fp6_hole - // stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'} PUSH ret_2 - // stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: ret_2, g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'} SWAP7 - // stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: inA, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'} %load_fp6 - // stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} + // stack: f, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'} %jump(mul_fp6) ret_2: - // stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %store_fp6(112) - // stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: fg, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'} + %store_fp6(12) + // stack: g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %swap_fp6 - // stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg} PUSH ret_3 - // stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: ret_3, f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg} SWAP13 - // stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: inA, f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %load_fp6 - // stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: f,f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %add_fp6 - // stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: f+f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %jump(mul_fp6) ret_3: - // stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(112) - // stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg} + %load_fp6(12) + // stack: fg, (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg} %swap_fp6 - // stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %dup_fp6_6 - // stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(106) - // stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} + %load_fp6(6) + // stack: f'g',fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %add_fp6 - // stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %subr_fp6 - // stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - DUP14 %add_const(6) - // stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} + DUP14 %offset_fp6 + // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %store_fp6 - // stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(100) - // stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} + %load_fp6(0) + // stack: sh(f'g') , fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %add_fp6 - // stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} DUP8 - // stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: out, sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %store_fp6 - // stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg} + // stack: inB, out {0: sh(f'g'), 6: f'g', 12: fg} %pop2 JUMP + + +/// fp6 functions: +/// fn | num | ops | cost +/// ------------------------- +/// load | 2 | 40 | 80 +/// store | 2 | 40 | 80 +/// dup | 2 | 6 | 12 +/// swap | 2 | 16 | 32 +/// add | 1 | 16 | 16 +/// mul | 1 | 157 | 157 +/// sq | 2 | | +/// dbl | 1 | 13 | 13 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 4 +/// DUP | 5 +/// PUSH | 6 +/// POP | 3 +/// JUMP | 4 +/// +/// TOTAL: + +/// input: +/// F = f + f'z +/// +/// output: +/// H = h + h'z = FF +/// +/// h = ff + sh(f'f') +/// h' = 2ff' +/// +/// memory pointers [ind' = ind+6] +/// {inp: f, inp: f', out: h, out': h'} +/// +/// f, f' consist of six elements on the stack + +global square_fp12: + // stack: inp, out + DUP1 %offset_fp6 + // stack: inp, inp, out + %load_fp6 + // stack: f, inp, out + PUSH post_sq2 + // stack: post_sq2, f, inp, out + SWAP7 + // stack: inp, f, post_sq2, out + PUSH post_sq1 + // stack: post_sq1, inp, f, post_sq2, out + %dup_fp6_2 + // stack: f , post_sq1, inp, f, post_sq2, out + DUP16 %offset_fp6 + // stack: out', f , post_sq1, inp, f, post_sq2, out + PUSH post_mul + // stack: post_mul, out', f , post_sq1, inp, f, post_sq2, out + DUP10 %offset_fp6 + // stack: inp', post_mul, out', f , post_sq1, inp, f, post_sq2, out + %load_fp6 + // stack: f', post_mul, out', f , post_sq1, inp, f, post_sq2, out + %swap_fp6_hole_2 + // stack: f , post_mul, out', f', post_sq1, inp, f, post_sq2, out + %dup_fp6_8 + // stack: f', f , post_mul, out', f', post_sq1, inp, f, post_sq2, out + %jump(mul_fp6) +post_mul: + // stack: f'f, out', f', post_sq1, inp, f, post_sq2, out + DUP7 + // stack: out', f'f, out', f', post_sq1, inp, f, post_sq2, out + %store_fp6_double + // stack: out', f', post_sq1, inp, f, post_sq2, out + POP + // stack: f', post_sq1, inp, f, post_sq2, out + %jump(square_fp6) +post_sq1: + // stack: f'f', inp, f, post_sq2, out + %swap_fp6_hole + // stack: f, inp, f'f', post_sq2, out + SWAP6 SWAP13 SWAP6 + // stack: f, post_sq2, f'f', inp, out + %jump(square_fp6) +post_sq2: + // stack: ff , f'f', inp, out + %add_fp6 + // stack: ff + f'f', inp, out + DUP8 + // stack: out, ff + f'f', inp, out + %store_fp6 + // stack: inp, out + %pop2 + JUMP + +/// fp6 functions: +/// fn | num | ops | cost +/// ------------------------- +/// load | 2 | 40 | 80 +/// store | 2 | 40 | 80 +/// dup | 2 | 6 | 12 +/// swap | 2 | 16 | 32 +/// add | 1 | 16 | 16 +/// mul | 1 | 157 | 157 +/// sq | 2 | | +/// dbl | 1 | 13 | 13 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 4 +/// DUP | 5 +/// PUSH | 6 +/// POP | 3 +/// JUMP | 4 +/// +/// TOTAL: + +/// input: +/// F = f + f'z +/// G = g0 + (G1)t + (G2)tz +/// +/// output: +/// H = h + h'z = FG +/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z] +/// +/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f') +/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f ) +/// +/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3] +/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'} +/// +/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element + + +global mul_fp12_sparse: + // stack: inA, inB, out + DUP1 %offset_fp6 + // stack: inA', inA, inB, out + %load_fp6 + // stack: f', inA, inB, out + DUP8 + // stack: inB, f', inA, inB, out + DUP8 + // stack: inA, inB, f', inA, inB, out + %load_fp6 + // stack: f, inB, f', inA, inB, out + DUP16 + // stack: out, f, inB, f', inA, inB, out + %dup_fp6_8 + // stack: f', out, f, inB, f', inA, inB, out + DUP14 + // stack: inB, f', out, f, inB, f', inA, inB, out + %dup_fp6_8 + // stack: f, inB, f', out, f, inB, f', inA, inB, out + DUP7 + // stack: inB, f, inB, f', out, f, inB, f', inA, inB, out + %dup_fp6_8 + // stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %dup_fp6_7 + // stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + DUP13 + // stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %mload_kernel_general + // stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %mul_fp_fp12 + // stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %swap_fp6 + // stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + DUP13 %add_const(3) + // stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %load_fp2 + // stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %mul_fp2_fp12_sh2 + // stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %add_fp6 + // stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %swap_fp6_hole + // stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + DUP7 %add_const(1) + // stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %load_fp2 + // stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %mul_fp2_fp12_sh + // stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %add_fp6_hole + // stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + DUP14 + // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out + %store_fp6 + // stack: inB, out, f', f, inB, f', inA, inB, out + %pop2 + // stack: f', f, inB, f', inA, inB, out + DUP13 + // stack: inB, f', f, inB, f', inA, inB, out + %mload_kernel_general + // stack: g0 , f', f, inB, f', inA, inB, out + %mul_fp_fp12 + // stack: g0 * f', f, inB, f', inA, inB, out + %swap_fp6 + // stack: f , g0 * f', inB, f', inA, inB, out + DUP13 %add_const(3) + // stack: inB2, f , g0 * f', inB, f', inA, inB, out + %load_fp2 + // stack: G2 , f , g0 * f', inB, f', inA, inB, out + %mul_fp2_fp12_sh + // stack: G2 * sh(f) , g0 * f', inB, f', inA, inB, out + %add_fp6 + // stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out + %swap_fp6_hole + // stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + DUP7 %add_const(1) + // stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + %load_fp2 + // stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + %mul_fp2_fp12_sh + // stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out + %add_fp6_hole + // stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out + DUP9 %offset_fp6 + // stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out + %store_fp6 + // stack: inA, inB, out + %pop3 + + +/// global mul_fp12_sparse_fast: +/// // stack: inA, inB, out +/// DUP2 +/// // stack: inB, inA, inB, out +/// %load_fp12_sparse +/// // stack: g0, G1, G1', inA, inB, out +/// DUP6 %offset_fp6 +/// // stack: inA', g0, G1, G1', inA, inB, out +/// %load_fp6 +/// // stack: f', g0, G1, G1', inA, inB, out +/// DUP12 +/// // stack: inA, f', g0, G1, G1', inA, inB, out +/// %load_fp6 +/// // stack: f, f', g0, G1, G1', inA, inB, out +/// %clone_mul_fp_fp6 +/// // stack: (g0)f, f, f', g0, G1, G1', inA, inB, out +/// %clone_mul_fp2_fp6_sh +/// // stack: (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out +/// %add_fp6 +/// // stack: (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out +/// %clone_mul_fp2_fp6_sh2 +/// // stack: (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out +/// %add_fp6 +/// // stack: (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out +/// DUP26 +/// // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out +/// %store_fp6 +/// // stack: f, f', g0, G1, G1', inA, inB, out +/// %semiclone_mul_fp2_fp6_sh +/// // stack: (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// %clone_mul_fp2_fp6_sh +/// // stack: (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// %add_fp6 +/// // stack: (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// %clone_mul_fp_fp6 +/// // stack: (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// %add_fp6 +/// // stack: (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// DUP20 offset_fp6 +/// // stack: out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out +/// %store_fp6 +/// // stack: f', g0, G1, G1', inA, inB, out +/// %pop14 diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm index 0fc6dbdf..9be87aac 100644 --- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm +++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm @@ -256,3 +256,7 @@ global mul_fp6: // stack: retdest, e0, e0_, e1, e1_, e2, e2_ JUMP + + +global square_fp6: + \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/fields/frobenius.asm new file mode 100644 index 00000000..e69de29b