mirror of
https://github.com/logos-storage/plonky2.git
synced 2026-01-15 04:03:13 +00:00
sparse mul finished
This commit is contained in:
parent
215e9a6e04
commit
5d2a9b3d53
@ -1,3 +1,7 @@
|
||||
%macro offset_fp6
|
||||
%add_const(6)
|
||||
%endmacro
|
||||
|
||||
// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
|
||||
%macro load_fp6
|
||||
// stack: ptr
|
||||
@ -87,6 +91,51 @@
|
||||
// stack:
|
||||
%endmacro
|
||||
|
||||
// cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53
|
||||
%macro store_fp6_double
|
||||
// stack: ptr, x0, x1, x2, x3, x4, x5
|
||||
SWAP6
|
||||
// stack: x5, x0, x1, x2, x3, x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x5, x0, x1, x2, x3, x4, ptr
|
||||
DUP7 %add_const(5)
|
||||
// stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr
|
||||
%mstore_kernel_general
|
||||
// stack: x0, x1, x2, x3, x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x0, x1, x2, x3, x4, ptr
|
||||
DUP6
|
||||
// stack: ind0, 2*x0, x1, x2, x3, x4, ptr
|
||||
%mstore_kernel_general
|
||||
// stack: x1, x2, x3, x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x1, x2, x3, x4, ptr
|
||||
DUP5 %add_const(1)
|
||||
// stack: ind1, 2*x1, x2, x3, x4, ptr
|
||||
%mstore_kernel_general
|
||||
// stack: x2, x3, x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x2, x3, x4, ptr
|
||||
DUP4 %add_const(2)
|
||||
// stack: ind2, 2*x2, x3, x4, ptr
|
||||
%mstore_kernel_general
|
||||
// stack: x3, x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x3, x4, ptr
|
||||
DUP3 %add_const(3)
|
||||
// stack: ind3, 2*x3, x4, ptr
|
||||
%mstore_kernel_general
|
||||
// stack: x4, ptr
|
||||
PUSH 2 MULFP254
|
||||
// stack: 2*x4, ptr
|
||||
SWAP1
|
||||
// stack: ptr, 2*x4
|
||||
%add_const(4)
|
||||
// stack: ind4, 2*x4
|
||||
%mstore_kernel_general
|
||||
// stack:
|
||||
%endmacro
|
||||
|
||||
// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
|
||||
%macro store_fp6(ptr)
|
||||
// stack: x0, x1, x2, x3, x4, x5
|
||||
@ -175,31 +224,55 @@
|
||||
DUP6
|
||||
DUP6
|
||||
DUP6
|
||||
// stack: f: 6, g: 6
|
||||
// stack: f: 6, f: 6
|
||||
%endmacro
|
||||
|
||||
// cost: 6
|
||||
%macro dup_fp6_2
|
||||
// stack: X: 2, f: 6
|
||||
DUP8
|
||||
DUP8
|
||||
DUP8
|
||||
DUP8
|
||||
DUP8
|
||||
DUP8
|
||||
// stack: f: 6, X: 2, f: 6
|
||||
%endmacro
|
||||
|
||||
// cost: 6
|
||||
%macro dup_fp6_6
|
||||
// stack: f: 6, g: 6
|
||||
// stack: X: 6, f: 6
|
||||
DUP12
|
||||
DUP12
|
||||
DUP12
|
||||
DUP12
|
||||
DUP12
|
||||
DUP12
|
||||
// stack: g: 6, f: 6, g: 6
|
||||
// stack: f: 6, X: 6, f: 6
|
||||
%endmacro
|
||||
|
||||
// cost: 6
|
||||
%macro dup_fp6_7
|
||||
// stack: f: 6, g: 6
|
||||
// stack: X: 7, f: 6
|
||||
DUP13
|
||||
DUP13
|
||||
DUP13
|
||||
DUP13
|
||||
DUP13
|
||||
DUP13
|
||||
// stack: g: 6, f: 6, g: 6
|
||||
// stack: f: 6, X: 7, f: 6
|
||||
%endmacro
|
||||
|
||||
// cost: 6
|
||||
%macro dup_fp6_8
|
||||
// stack: X: 8, f: 6
|
||||
DUP14
|
||||
DUP14
|
||||
DUP14
|
||||
DUP14
|
||||
DUP14
|
||||
DUP14
|
||||
// stack: f: 6, X: 8, f: 6
|
||||
%endmacro
|
||||
|
||||
// cost: 16
|
||||
@ -231,7 +304,7 @@
|
||||
|
||||
// cost: 16
|
||||
// swap two fp6 elements with a stack term separating them
|
||||
// (f: 6, x, g: 6) -> (g: 6, x, f: 6)
|
||||
// (f: 6, X, g: 6) -> (g: 6, X, f: 6)
|
||||
%macro swap_fp6_hole
|
||||
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
|
||||
SWAP7
|
||||
@ -258,6 +331,35 @@
|
||||
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
|
||||
%endmacro
|
||||
|
||||
// cost: 16
|
||||
// swap two fp6 elements with two stack terms separating them
|
||||
// (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6)
|
||||
%macro swap_fp6_hole_2
|
||||
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
|
||||
SWAP8
|
||||
// stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
|
||||
SWAP1
|
||||
SWAP9
|
||||
SWAP1
|
||||
// stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
|
||||
SWAP2
|
||||
SWAP10
|
||||
SWAP2
|
||||
// stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
|
||||
SWAP3
|
||||
SWAP11
|
||||
SWAP3
|
||||
// stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
|
||||
SWAP4
|
||||
SWAP12
|
||||
SWAP4
|
||||
// stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
|
||||
SWAP5
|
||||
SWAP13
|
||||
SWAP5
|
||||
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
|
||||
%endmacro
|
||||
|
||||
// cost: 16
|
||||
%macro add_fp6
|
||||
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
|
||||
@ -285,6 +387,37 @@
|
||||
// stack: h0, h1, h2, h3, h4, h5
|
||||
%endmacro
|
||||
|
||||
// cost: 18
|
||||
// add two fp6 elements with a to-be-popped stack term separating them
|
||||
// (f: 6, X, g: 6) -> (f + g: 6)
|
||||
%macro add_fp6_hole
|
||||
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
|
||||
SWAP8
|
||||
ADDFP254
|
||||
SWAP7
|
||||
// stack: f0, f2, f3, f4, f5, X, g0, h1, g2, g3, g4, g5
|
||||
SWAP8
|
||||
ADDFP254
|
||||
SWAP7
|
||||
// stack: f0, f3, f4, f5, X, g0, h1, h2, g3, g4, g5
|
||||
SWAP8
|
||||
ADDFP254
|
||||
SWAP7
|
||||
// stack: f0, f4, f5, X, g0, h1, h2, h3, g4, g5
|
||||
SWAP8
|
||||
ADDFP254
|
||||
SWAP7
|
||||
// stack: f0, f5, X, g0, h1, h2, h3, h4, g5
|
||||
SWAP8
|
||||
ADDFP254
|
||||
SWAP7
|
||||
// stack: f0, X, g0, h1, h2, h3, h4, h5
|
||||
SWAP1
|
||||
POP
|
||||
ADDFP254
|
||||
// stack: h0, h1, h2, h3, h4, h5
|
||||
%endmacro
|
||||
|
||||
// *reversed argument subtraction* cost: 17
|
||||
%macro subr_fp6
|
||||
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
|
||||
@ -1,31 +1,31 @@
|
||||
/// Note: uncomment this to test
|
||||
|
||||
/// global test_mul_Fp12:
|
||||
/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out
|
||||
/// global test_mul_fp12:
|
||||
/// // stack: f, inA , f', g, inB , g', inB, out, inA, out
|
||||
/// DUP7
|
||||
/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out
|
||||
/// // stack: inA, f, inA , f', g, inB , g', inB, out, inA, out
|
||||
/// %store_fp6
|
||||
/// // stack: in0 , f', g, in1 , g', in1, out, in0, out
|
||||
/// %add_const(6)
|
||||
/// // stack: in0', f', g, in1 , g', in1, out, in0, out
|
||||
/// // stack: inA , f', g, inB , g', inB, out, inA, out
|
||||
/// %offset_fp6
|
||||
/// // stack: inA', f', g, inB , g', inB, out, inA, out
|
||||
/// %store_fp6
|
||||
/// // stack: g, in1 , g', in1, out, in0, out
|
||||
/// // stack: g, inB , g', inB, out, inA, out
|
||||
/// DUP7
|
||||
/// // stack: in1, g, in1 , g', in1, out, in0, out
|
||||
/// // stack: inB, g, inB , g', inB, out, inA, out
|
||||
/// %store_fp6
|
||||
/// // stack: in1 , g', in1, out, in0, out
|
||||
/// %add_const(6)
|
||||
/// // stack: in1', g', in1, out, in0, out
|
||||
/// // stack: inB , g', inB, out, inA, out
|
||||
/// %offset_fp6
|
||||
/// // stack: inB', g', inB, out, inA, out
|
||||
/// %store_fp6
|
||||
/// // stack: in1, out, in0, out
|
||||
/// // stack: inB, out, inA, out
|
||||
/// PUSH ret_stack
|
||||
/// // stack: ret_stack, in1, out, in0, out
|
||||
/// // stack: ret_stack, inB, out, inA, out
|
||||
/// SWAP3
|
||||
/// // stack: in0, in1, out, ret_stack, out
|
||||
/// %jump(mul_Fp12)
|
||||
/// // stack: inA, inB, out, ret_stack, out
|
||||
/// %jump(mul_fp12)
|
||||
/// ret_stack:
|
||||
/// // stack: out
|
||||
/// DUP1 %add_const(6)
|
||||
/// DUP1 %offset_fp6
|
||||
/// // stack: out', out
|
||||
/// %load_fp6
|
||||
/// // stack: h', out
|
||||
@ -56,9 +56,9 @@
|
||||
/// DUP | 6
|
||||
/// PUSH | 6
|
||||
/// POP | 2
|
||||
/// JUMP | 1
|
||||
/// JUMP | 6
|
||||
///
|
||||
/// TOTAL: 1196
|
||||
/// TOTAL: 1201
|
||||
|
||||
/// inputs:
|
||||
/// F = f + f'z
|
||||
@ -71,96 +71,365 @@
|
||||
/// h' = (f+f')(g+g') - fg - f'g'
|
||||
///
|
||||
/// memory pointers [ind' = ind+6]
|
||||
/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'}
|
||||
/// {inA: f, inA: f', inB: g, inB':g', out: h, out': h'}
|
||||
///
|
||||
/// f, f', g, g' consist of six elements on the stack
|
||||
|
||||
global mul_Fp12:
|
||||
// stack: in0, in1, out
|
||||
DUP1 %add_const(6)
|
||||
// stack: in0', in0, in1, out
|
||||
global mul_fp12:
|
||||
// stack: inA, inB, out
|
||||
DUP1 %offset_fp6
|
||||
// stack: inA', inA, inB, out
|
||||
%load_fp6
|
||||
// stack: f', in0, in1, out
|
||||
DUP8 %add_const(6)
|
||||
// stack: in1', f', in0, in1, out
|
||||
// stack: f', inA, inB, out
|
||||
DUP8 %offset_fp6
|
||||
// stack: inB', f', inA, inB, out
|
||||
%load_fp6
|
||||
// stack: g', f', in0, in1, out
|
||||
// stack: g', f', inA, inB, out
|
||||
PUSH ret_1
|
||||
// stack: ret_1, g', f', in0, in1, out
|
||||
// stack: ret_1, g', f', inA, inB, out
|
||||
%dup_fp6_7
|
||||
// stack: f', ret_1, g', f', in0, in1, out
|
||||
// stack: f', ret_1, g', f', inA, inB, out
|
||||
%dup_fp6_7
|
||||
// stack: g', f', ret_1, g', f', in0, in1, out
|
||||
// stack: g', f', ret_1, g', f', inA, inB, out
|
||||
%jump(mul_fp6)
|
||||
ret_1:
|
||||
// stack: f'g', g' , f', in0, in1, out
|
||||
// stack: f'g', g' , f', inA, inB, out
|
||||
%dup_fp6_0
|
||||
// stack: f'g', f'g', g' , f', in0, in1, out
|
||||
%store_fp6_sh(100)
|
||||
// stack: f'g', g' , f', in0, in1, out {100: sh(f'g')}
|
||||
%store_fp6(106)
|
||||
// stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: f'g', f'g', g' , f', inA, inB, out
|
||||
%store_fp6_sh(0)
|
||||
// stack: f'g', g' , f', inA, inB, out {0: sh(f'g')}
|
||||
%store_fp6(6)
|
||||
// stack: g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
DUP13
|
||||
// stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
DUP15
|
||||
// stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: inB, inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%load_fp6
|
||||
// stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: g , inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%swap_fp6_hole
|
||||
// stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%dup_fp6_7
|
||||
// stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: g,g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%add_fp6
|
||||
// stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: g+g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%swap_fp6_hole
|
||||
// stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
PUSH ret_2
|
||||
// stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: ret_2, g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
SWAP7
|
||||
// stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: inA, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%load_fp6
|
||||
// stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
// stack: f, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%jump(mul_fp6)
|
||||
ret_2:
|
||||
// stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
|
||||
%store_fp6(112)
|
||||
// stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: fg, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
|
||||
%store_fp6(12)
|
||||
// stack: g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%swap_fp6
|
||||
// stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
PUSH ret_3
|
||||
// stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: ret_3, f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
SWAP13
|
||||
// stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: inA, f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%load_fp6
|
||||
// stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: f,f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%add_fp6
|
||||
// stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: f+f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%jump(mul_fp6)
|
||||
ret_3:
|
||||
// stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
%load_fp6(112)
|
||||
// stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%load_fp6(12)
|
||||
// stack: fg, (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%swap_fp6
|
||||
// stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%dup_fp6_6
|
||||
// stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
%load_fp6(106)
|
||||
// stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%load_fp6(6)
|
||||
// stack: f'g',fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%add_fp6
|
||||
// stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%subr_fp6
|
||||
// stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
DUP14 %add_const(6)
|
||||
// stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
DUP14 %offset_fp6
|
||||
// stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%store_fp6
|
||||
// stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
%load_fp6(100)
|
||||
// stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%load_fp6(0)
|
||||
// stack: sh(f'g') , fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%add_fp6
|
||||
// stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
DUP8
|
||||
// stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: out, sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%store_fp6
|
||||
// stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg}
|
||||
// stack: inB, out {0: sh(f'g'), 6: f'g', 12: fg}
|
||||
%pop2
|
||||
JUMP
|
||||
|
||||
|
||||
/// fp6 functions:
|
||||
/// fn | num | ops | cost
|
||||
/// -------------------------
|
||||
/// load | 2 | 40 | 80
|
||||
/// store | 2 | 40 | 80
|
||||
/// dup | 2 | 6 | 12
|
||||
/// swap | 2 | 16 | 32
|
||||
/// add | 1 | 16 | 16
|
||||
/// mul | 1 | 157 | 157
|
||||
/// sq | 2 | |
|
||||
/// dbl | 1 | 13 | 13
|
||||
///
|
||||
/// lone stack operations:
|
||||
/// op | num
|
||||
/// ------------
|
||||
/// ADD | 3
|
||||
/// SWAP | 4
|
||||
/// DUP | 5
|
||||
/// PUSH | 6
|
||||
/// POP | 3
|
||||
/// JUMP | 4
|
||||
///
|
||||
/// TOTAL:
|
||||
|
||||
/// input:
|
||||
/// F = f + f'z
|
||||
///
|
||||
/// output:
|
||||
/// H = h + h'z = FF
|
||||
///
|
||||
/// h = ff + sh(f'f')
|
||||
/// h' = 2ff'
|
||||
///
|
||||
/// memory pointers [ind' = ind+6]
|
||||
/// {inp: f, inp: f', out: h, out': h'}
|
||||
///
|
||||
/// f, f' consist of six elements on the stack
|
||||
|
||||
global square_fp12:
|
||||
// stack: inp, out
|
||||
DUP1 %offset_fp6
|
||||
// stack: inp, inp, out
|
||||
%load_fp6
|
||||
// stack: f, inp, out
|
||||
PUSH post_sq2
|
||||
// stack: post_sq2, f, inp, out
|
||||
SWAP7
|
||||
// stack: inp, f, post_sq2, out
|
||||
PUSH post_sq1
|
||||
// stack: post_sq1, inp, f, post_sq2, out
|
||||
%dup_fp6_2
|
||||
// stack: f , post_sq1, inp, f, post_sq2, out
|
||||
DUP16 %offset_fp6
|
||||
// stack: out', f , post_sq1, inp, f, post_sq2, out
|
||||
PUSH post_mul
|
||||
// stack: post_mul, out', f , post_sq1, inp, f, post_sq2, out
|
||||
DUP10 %offset_fp6
|
||||
// stack: inp', post_mul, out', f , post_sq1, inp, f, post_sq2, out
|
||||
%load_fp6
|
||||
// stack: f', post_mul, out', f , post_sq1, inp, f, post_sq2, out
|
||||
%swap_fp6_hole_2
|
||||
// stack: f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
|
||||
%dup_fp6_8
|
||||
// stack: f', f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
|
||||
%jump(mul_fp6)
|
||||
post_mul:
|
||||
// stack: f'f, out', f', post_sq1, inp, f, post_sq2, out
|
||||
DUP7
|
||||
// stack: out', f'f, out', f', post_sq1, inp, f, post_sq2, out
|
||||
%store_fp6_double
|
||||
// stack: out', f', post_sq1, inp, f, post_sq2, out
|
||||
POP
|
||||
// stack: f', post_sq1, inp, f, post_sq2, out
|
||||
%jump(square_fp6)
|
||||
post_sq1:
|
||||
// stack: f'f', inp, f, post_sq2, out
|
||||
%swap_fp6_hole
|
||||
// stack: f, inp, f'f', post_sq2, out
|
||||
SWAP6 SWAP13 SWAP6
|
||||
// stack: f, post_sq2, f'f', inp, out
|
||||
%jump(square_fp6)
|
||||
post_sq2:
|
||||
// stack: ff , f'f', inp, out
|
||||
%add_fp6
|
||||
// stack: ff + f'f', inp, out
|
||||
DUP8
|
||||
// stack: out, ff + f'f', inp, out
|
||||
%store_fp6
|
||||
// stack: inp, out
|
||||
%pop2
|
||||
JUMP
|
||||
|
||||
/// fp6 functions:
|
||||
/// fn | num | ops | cost
|
||||
/// -------------------------
|
||||
/// load | 2 | 40 | 80
|
||||
/// store | 2 | 40 | 80
|
||||
/// dup | 2 | 6 | 12
|
||||
/// swap | 2 | 16 | 32
|
||||
/// add | 1 | 16 | 16
|
||||
/// mul | 1 | 157 | 157
|
||||
/// sq | 2 | |
|
||||
/// dbl | 1 | 13 | 13
|
||||
///
|
||||
/// lone stack operations:
|
||||
/// op | num
|
||||
/// ------------
|
||||
/// ADD | 3
|
||||
/// SWAP | 4
|
||||
/// DUP | 5
|
||||
/// PUSH | 6
|
||||
/// POP | 3
|
||||
/// JUMP | 4
|
||||
///
|
||||
/// TOTAL:
|
||||
|
||||
/// input:
|
||||
/// F = f + f'z
|
||||
/// G = g0 + (G1)t + (G2)tz
|
||||
///
|
||||
/// output:
|
||||
/// H = h + h'z = FG
|
||||
/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
|
||||
///
|
||||
/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f')
|
||||
/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
|
||||
///
|
||||
/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
|
||||
/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
|
||||
///
|
||||
/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element
|
||||
|
||||
|
||||
global mul_fp12_sparse:
|
||||
// stack: inA, inB, out
|
||||
DUP1 %offset_fp6
|
||||
// stack: inA', inA, inB, out
|
||||
%load_fp6
|
||||
// stack: f', inA, inB, out
|
||||
DUP8
|
||||
// stack: inB, f', inA, inB, out
|
||||
DUP8
|
||||
// stack: inA, inB, f', inA, inB, out
|
||||
%load_fp6
|
||||
// stack: f, inB, f', inA, inB, out
|
||||
DUP16
|
||||
// stack: out, f, inB, f', inA, inB, out
|
||||
%dup_fp6_8
|
||||
// stack: f', out, f, inB, f', inA, inB, out
|
||||
DUP14
|
||||
// stack: inB, f', out, f, inB, f', inA, inB, out
|
||||
%dup_fp6_8
|
||||
// stack: f, inB, f', out, f, inB, f', inA, inB, out
|
||||
DUP7
|
||||
// stack: inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%dup_fp6_8
|
||||
// stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%dup_fp6_7
|
||||
// stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
DUP13
|
||||
// stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%mload_kernel_general
|
||||
// stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%mul_fp_fp12
|
||||
// stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%swap_fp6
|
||||
// stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
DUP13 %add_const(3)
|
||||
// stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%load_fp2
|
||||
// stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%mul_fp2_fp12_sh2
|
||||
// stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%add_fp6
|
||||
// stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%swap_fp6_hole
|
||||
// stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
|
||||
DUP7 %add_const(1)
|
||||
// stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%load_fp2
|
||||
// stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%mul_fp2_fp12_sh
|
||||
// stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
|
||||
%add_fp6_hole
|
||||
// stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
|
||||
DUP14
|
||||
// stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
|
||||
%store_fp6
|
||||
// stack: inB, out, f', f, inB, f', inA, inB, out
|
||||
%pop2
|
||||
// stack: f', f, inB, f', inA, inB, out
|
||||
DUP13
|
||||
// stack: inB, f', f, inB, f', inA, inB, out
|
||||
%mload_kernel_general
|
||||
// stack: g0 , f', f, inB, f', inA, inB, out
|
||||
%mul_fp_fp12
|
||||
// stack: g0 * f', f, inB, f', inA, inB, out
|
||||
%swap_fp6
|
||||
// stack: f , g0 * f', inB, f', inA, inB, out
|
||||
DUP13 %add_const(3)
|
||||
// stack: inB2, f , g0 * f', inB, f', inA, inB, out
|
||||
%load_fp2
|
||||
// stack: G2 , f , g0 * f', inB, f', inA, inB, out
|
||||
%mul_fp2_fp12_sh
|
||||
// stack: G2 * sh(f) , g0 * f', inB, f', inA, inB, out
|
||||
%add_fp6
|
||||
// stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out
|
||||
%swap_fp6_hole
|
||||
// stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
|
||||
DUP7 %add_const(1)
|
||||
// stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
|
||||
%load_fp2
|
||||
// stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
|
||||
%mul_fp2_fp12_sh
|
||||
// stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
|
||||
%add_fp6_hole
|
||||
// stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
|
||||
DUP9 %offset_fp6
|
||||
// stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
|
||||
%store_fp6
|
||||
// stack: inA, inB, out
|
||||
%pop3
|
||||
|
||||
|
||||
/// global mul_fp12_sparse_fast:
|
||||
/// // stack: inA, inB, out
|
||||
/// DUP2
|
||||
/// // stack: inB, inA, inB, out
|
||||
/// %load_fp12_sparse
|
||||
/// // stack: g0, G1, G1', inA, inB, out
|
||||
/// DUP6 %offset_fp6
|
||||
/// // stack: inA', g0, G1, G1', inA, inB, out
|
||||
/// %load_fp6
|
||||
/// // stack: f', g0, G1, G1', inA, inB, out
|
||||
/// DUP12
|
||||
/// // stack: inA, f', g0, G1, G1', inA, inB, out
|
||||
/// %load_fp6
|
||||
/// // stack: f, f', g0, G1, G1', inA, inB, out
|
||||
/// %clone_mul_fp_fp6
|
||||
/// // stack: (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// %clone_mul_fp2_fp6_sh
|
||||
/// // stack: (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// %add_fp6
|
||||
/// // stack: (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// %clone_mul_fp2_fp6_sh2
|
||||
/// // stack: (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// %add_fp6
|
||||
/// // stack: (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// DUP26
|
||||
/// // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
|
||||
/// %store_fp6
|
||||
/// // stack: f, f', g0, G1, G1', inA, inB, out
|
||||
/// %semiclone_mul_fp2_fp6_sh
|
||||
/// // stack: (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// %clone_mul_fp2_fp6_sh
|
||||
/// // stack: (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// %add_fp6
|
||||
/// // stack: (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// %clone_mul_fp_fp6
|
||||
/// // stack: (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// %add_fp6
|
||||
/// // stack: (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// DUP20 offset_fp6
|
||||
/// // stack: out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
|
||||
/// %store_fp6
|
||||
/// // stack: f', g0, G1, G1', inA, inB, out
|
||||
/// %pop14
|
||||
|
||||
@ -256,3 +256,7 @@ global mul_fp6:
|
||||
|
||||
// stack: retdest, e0, e0_, e1, e1_, e2, e2_
|
||||
JUMP
|
||||
|
||||
|
||||
global square_fp6:
|
||||
|
||||
0
evm/src/cpu/kernel/asm/fields/frobenius.asm
Normal file
0
evm/src/cpu/kernel/asm/fields/frobenius.asm
Normal file
Loading…
x
Reference in New Issue
Block a user