From 7d4cec55fba9572df4cb859d4750dcf78022f449 Mon Sep 17 00:00:00 2001 From: Dmitry Vagner Date: Thu, 20 Oct 2022 16:18:41 -0400 Subject: [PATCH] fp6 mul --- evm/src/cpu/kernel/aggregator.rs | 5 +- .../asm/fields/{fp12.asm => fp12_mul.asm} | 21 +- .../asm/fields/{fp6.asm => fp6_macros.asm} | 238 ------------------ evm/src/cpu/kernel/asm/fields/fp6_mul.asm | 236 +++++++++++++++++ 4 files changed, 255 insertions(+), 245 deletions(-) rename evm/src/cpu/kernel/asm/fields/{fp12.asm => fp12_mul.asm} (90%) rename evm/src/cpu/kernel/asm/fields/{fp6.asm => fp6_macros.asm} (61%) create mode 100644 evm/src/cpu/kernel/asm/fields/fp6_mul.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 17adb07e..5488ab67 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -32,8 +32,9 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/lift_x.asm"), include_str!("asm/curve/secp256k1/moddiv.asm"), include_str!("asm/exp.asm"), - include_str!("asm/fields/fp6.asm"), - include_str!("asm/fields/fp12.asm"), + include_str!("asm/fields/fp6_macros.asm"), + include_str!("asm/fields/fp6_mul.asm"), + include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), diff --git a/evm/src/cpu/kernel/asm/fields/fp12.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm similarity index 90% rename from evm/src/cpu/kernel/asm/fields/fp12.asm rename to evm/src/cpu/kernel/asm/fields/fp12_mul.asm index ff7f064b..6462f2cb 100644 --- a/evm/src/cpu/kernel/asm/fields/fp12.asm +++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm @@ -1,11 +1,13 @@ global test_mul_Fp12: - // stack: f, f', g, g' + // stack: f, f', g, g', in2, out, in1 %store_fp6(0) %store_fp6(6) %store_fp6(12) %store_fp6(18) + // stack: in2, out, in1 PUSH return_on_stack - // stack: return_on_stack + SWAP3 + // stack: in1, in2, out, return_on_stack %jump(mul_Fp12) return_on_stack: // stack: @@ -14,7 +16,7 @@ return_on_stack: // stack: h, h' %jump(0xdeadbeef) - +/// fp6 macros: /// macro | num | ops | cost /// ------------------------- /// load | 8 | 40 | 320 @@ -25,9 +27,18 @@ return_on_stack: /// sub | 2 | 17 | 34 /// mul | 3 | 156 | 468 /// i9 | 1 | 9 | 9 -/// jump | 1 | 1 | 1 /// -/// TOTAL: 1174 +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 2 +/// DUP | 6 +/// PUSH | 6 +/// POP | 2 +/// JUMP | 1 +/// +/// TOTAL: 1194 /// F = f + f'z diff --git a/evm/src/cpu/kernel/asm/fields/fp6.asm b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm similarity index 61% rename from evm/src/cpu/kernel/asm/fields/fp6.asm rename to evm/src/cpu/kernel/asm/fields/fp6_macros.asm index b6f1950e..be213dc3 100644 --- a/evm/src/cpu/kernel/asm/fields/fp6.asm +++ b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm @@ -314,241 +314,3 @@ SUBFP254 // stack: h0, h1, h2, h3, h4, h5 %endmacro - -// cost: 156 -%macro mul_fp6 - /// C = C0 + C1t + C2t^2 - /// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 - /// - /// D = D0 + D1t + D2t^2 - /// = (d0 + d0_i) + (d1 + d1_i)t + (d2 + d2_i)t^2 - /// - /// E = E0 + E1t + E2t^2 = CD - /// - /// initial stack: c0, c0_, c1, c1_, c2, c2_, d0, d0_, d1, d1_, d2, d2_ - /// final stack: e0, e0_, e1, e1_, e2, e2_ - - /// E0 = C0D0 + i9(C1D2 + C2D1) - /// - /// C0D0 = (c0d0 - c0_d0_) + (c0d0_ + c0_d0)i - /// - /// C1D2 = (c1d2 - c1_d2_) + (c1d2_ + c1_d2)i - /// C2D1 = (c2d1 - c2_d1_) + (c2d1_ + c2_d1)i - /// - /// CDX = C1D2 + C2D1 - /// = (c1d2 + c2d1 - c1_d2_ - c2_d1_) + (c1d2_ + c1_d2 + c2d1_ + c2_d1)i - /// - /// i9(CDX) = (9CDX - CDX_) + (CDX + 9CDX_)i - /// - /// E0 = 9CDX - CDX_ + C0D0 - /// E0_ = 9CDX_ + CDX + C0D0_ - - // make CDX_ = c1d2_ + c1_d2 + c2d1_ + c2_d1 - DUP12 - DUP4 - MULFP254 - DUP12 - DUP6 - MULFP254 - ADDFP254 - DUP11 - DUP7 - MULFP254 - ADDFP254 - DUP10 - DUP8 - MULFP254 - ADDFP254 - // make C0D0_ = c0d0_ + c0_d0 - DUP9 - DUP3 - MULFP254 - DUP9 - DUP5 - MULFP254 - ADDFP254 - // make CDX = c1d2 + c2d1 - c1_d2_ - c2_d1_ - DUP12 - DUP9 - MULFP254 - DUP15 - DUP8 - MULFP254 - ADDFP254 - DUP14 - DUP7 - MULFP254 - DUP13 - DUP10 - MULFP254 - ADDFP254 - SUBFP254 - // make C0D0 = c0d0 - c0_d0_ - DUP11 - DUP6 - MULFP254 - DUP11 - DUP6 - MULFP254 - SUBFP254 - - // stack: C0D0 , CDX , C0D0_, CDX_ - DUP4 - DUP3 - // stack: CDX , CDX_ , C0D0 , CDX , C0D0_, CDX_ - PUSH 9 - MULFP254 - SUBFP254 - ADDFP254 - // stack: E0 = 9CDX - CDX_ + C0D0 , CDX , C0D0_, CDX_ - SWAP15 - SWAP3 - // stack: CDX_ , CDX , C0D0_ - PUSH 9 - MULFP254 - ADDFP254 - ADDFP254 - // stack: E0_ = 9CDX_ + CDX + C0D0_ - SWAP9 - - /// E1 = C0D1 + C1D0 + i9(C2D2) - /// - /// C0D1 = (c0d1 - c0_d1_) + (c0d1_ + c0_d1)i - /// C1D0 = (c1d0 - c1_d0_) + (c1d0_ + c1_d0)i - /// - /// CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) - /// CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 - /// - /// C2D2 = (c2d2 - c2_d2_) + (c2d2_ + c2_d2)i - /// i9(C2D2) = (9C2D2 - C2D2_) + (C2D2 + 9C2D2_)i - /// - /// E1 = 9C2D2 - C2D2_ + CD01 - /// E1_ = C2D2 + 9C2D2_ + CD01_ - - // make C2D2_ = c2d2_ + c2_d2 - DUP13 - DUP9 - MULFP254 - DUP3 - DUP9 - MULFP254 - ADDFP254 - // make C2D2 = c2d2 - c2_d2_ - DUP3 - DUP10 - MULFP254 - DUP15 - DUP10 - MULFP254 - SUBFP254 - // make C0D0 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) - DUP3 - DUP9 - MULFP254 - DUP15 - DUP8 - MULFP254 - ADDFP254 - DUP12 - DUP9 - MULFP254 - DUP15 - DUP8 - MULFP254 - ADDFP254 - SUBFP254 - // stack: C0D0, C2D2, C2D2_ - DUP3 - DUP3 - // stack: C2D2 , C2D2_ , C0D0, C2D2, C2D2_ - PUSH 9 - MULFP254 - SUBFP254 - ADDFP254 - // stack: E1 = 9C2D2 - C2D2_ + C0D0, C2D2, C2D2_ - SWAP13 - SWAP2 - // stack: C2D2_, C2D2 - PUSH 9 - MULFP254 - ADDFP254 - // stack: 9C2D2_ + C2D2 - // make CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 - DUP11 - DUP9 - MULFP254 - DUP4 - DUP9 - MULFP254 - ADDFP254 - DUP3 - DUP8 - MULFP254 - ADDFP254 - DUP15 - DUP7 - MULFP254 - ADDFP254 - // stack: CD01_ , 9C2D2_ + C2D2 - ADDFP254 - // stack: E1_ = CD01_ + 9C2D2_ + C2D2 - SWAP13 - - /// E2 = C0D2 + C1D1 + C2D0 - /// - /// C0D2 = (c0d2 - c0_d2_) + (c0d2_ + c0_d2)i - /// C1D1 = (c1d1 - c1_d1_) + (c1d1_ + c1_d1)i - /// C2D0 = (c2d0 - c2_d0_) + (c2d0_ + c2_d0)i - /// - /// E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) - /// E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 - - // make c0_d2_ + c1_d1_ + c2_d0_ - DUP3 - DUP11 - MULFP254 - DUP2 - DUP10 - MULFP254 - ADDFP254 - DUP5 - DUP8 - MULFP254 - ADDFP254 - // make c0d2 + c1d1 + c2d0 - DUP16 - DUP7 - MULFP254 - DUP4 - DUP10 - MULFP254 - ADDFP254 - DUP13 - DUP12 - MULFP254 - ADDFP254 - // stack: c0d2 + c1d1 + c2d0 , c0_d2_ + c1_d1_ + c2_d0_ - SUBFP254 - // stack: E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) - SWAP15 - // make c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 - SWAP7 - MULFP254 - SWAP7 - MULFP254 - SWAP7 - MULFP254 - SWAP2 - MULFP254 - ADDFP254 - SWAP2 - MULFP254 - ADDFP254 - ADDFP254 - ADDFP254 - SWAP2 - MULFP254 - ADDFP254 - // stack: E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 - SWAP5 -%endmacro diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm new file mode 100644 index 00000000..d4d92689 --- /dev/null +++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm @@ -0,0 +1,236 @@ +// cost: 156 +global mul_fp6: + /// C = C0 + C1t + C2t^2 + /// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 + /// + /// D = D0 + D1t + D2t^2 + /// = (d0 + d0_i) + (d1 + d1_i)t + (d2 + d2_i)t^2 + /// + /// E = E0 + E1t + E2t^2 = CD + /// + /// initial stack: c0, c0_, c1, c1_, c2, c2_, d0, d0_, d1, d1_, d2, d2_ + /// final stack: e0, e0_, e1, e1_, e2, e2_ + + /// E0 = C0D0 + i9(C1D2 + C2D1) + /// + /// C0D0 = (c0d0 - c0_d0_) + (c0d0_ + c0_d0)i + /// + /// C1D2 = (c1d2 - c1_d2_) + (c1d2_ + c1_d2)i + /// C2D1 = (c2d1 - c2_d1_) + (c2d1_ + c2_d1)i + /// + /// CDX = C1D2 + C2D1 + /// = (c1d2 + c2d1 - c1_d2_ - c2_d1_) + (c1d2_ + c1_d2 + c2d1_ + c2_d1)i + /// + /// i9(CDX) = (9CDX - CDX_) + (CDX + 9CDX_)i + /// + /// E0 = 9CDX - CDX_ + C0D0 + /// E0_ = 9CDX_ + CDX + C0D0_ + + // make CDX_ = c1d2_ + c1_d2 + c2d1_ + c2_d1 + DUP12 + DUP4 + MULFP254 + DUP12 + DUP6 + MULFP254 + ADDFP254 + DUP11 + DUP7 + MULFP254 + ADDFP254 + DUP10 + DUP8 + MULFP254 + ADDFP254 + // make C0D0_ = c0d0_ + c0_d0 + DUP9 + DUP3 + MULFP254 + DUP9 + DUP5 + MULFP254 + ADDFP254 + // make CDX = c1d2 + c2d1 - c1_d2_ - c2_d1_ + DUP12 + DUP9 + MULFP254 + DUP15 + DUP8 + MULFP254 + ADDFP254 + DUP14 + DUP7 + MULFP254 + DUP13 + DUP10 + MULFP254 + ADDFP254 + SUBFP254 + // make C0D0 = c0d0 - c0_d0_ + DUP11 + DUP6 + MULFP254 + DUP11 + DUP6 + MULFP254 + SUBFP254 + + // stack: C0D0 , CDX , C0D0_, CDX_ + DUP4 + DUP3 + // stack: CDX , CDX_ , C0D0 , CDX , C0D0_, CDX_ + PUSH 9 + MULFP254 + SUBFP254 + ADDFP254 + // stack: E0 = 9CDX - CDX_ + C0D0 , CDX , C0D0_, CDX_ + SWAP15 + SWAP3 + // stack: CDX_ , CDX , C0D0_ + PUSH 9 + MULFP254 + ADDFP254 + ADDFP254 + // stack: E0_ = 9CDX_ + CDX + C0D0_ + SWAP9 + + /// E1 = C0D1 + C1D0 + i9(C2D2) + /// + /// C0D1 = (c0d1 - c0_d1_) + (c0d1_ + c0_d1)i + /// C1D0 = (c1d0 - c1_d0_) + (c1d0_ + c1_d0)i + /// + /// CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) + /// CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 + /// + /// C2D2 = (c2d2 - c2_d2_) + (c2d2_ + c2_d2)i + /// i9(C2D2) = (9C2D2 - C2D2_) + (C2D2 + 9C2D2_)i + /// + /// E1 = 9C2D2 - C2D2_ + CD01 + /// E1_ = C2D2 + 9C2D2_ + CD01_ + + // make C2D2_ = c2d2_ + c2_d2 + DUP13 + DUP9 + MULFP254 + DUP3 + DUP9 + MULFP254 + ADDFP254 + // make C2D2 = c2d2 - c2_d2_ + DUP3 + DUP10 + MULFP254 + DUP15 + DUP10 + MULFP254 + SUBFP254 + // make C0D0 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) + DUP3 + DUP9 + MULFP254 + DUP15 + DUP8 + MULFP254 + ADDFP254 + DUP12 + DUP9 + MULFP254 + DUP15 + DUP8 + MULFP254 + ADDFP254 + SUBFP254 + // stack: C0D0, C2D2, C2D2_ + DUP3 + DUP3 + // stack: C2D2 , C2D2_ , C0D0, C2D2, C2D2_ + PUSH 9 + MULFP254 + SUBFP254 + ADDFP254 + // stack: E1 = 9C2D2 - C2D2_ + C0D0, C2D2, C2D2_ + SWAP13 + SWAP2 + // stack: C2D2_, C2D2 + PUSH 9 + MULFP254 + ADDFP254 + // stack: 9C2D2_ + C2D2 + // make CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 + DUP11 + DUP9 + MULFP254 + DUP4 + DUP9 + MULFP254 + ADDFP254 + DUP3 + DUP8 + MULFP254 + ADDFP254 + DUP15 + DUP7 + MULFP254 + ADDFP254 + // stack: CD01_ , 9C2D2_ + C2D2 + ADDFP254 + // stack: E1_ = CD01_ + 9C2D2_ + C2D2 + SWAP13 + + /// E2 = C0D2 + C1D1 + C2D0 + /// + /// C0D2 = (c0d2 - c0_d2_) + (c0d2_ + c0_d2)i + /// C1D1 = (c1d1 - c1_d1_) + (c1d1_ + c1_d1)i + /// C2D0 = (c2d0 - c2_d0_) + (c2d0_ + c2_d0)i + /// + /// E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) + /// E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 + + // make c0_d2_ + c1_d1_ + c2_d0_ + DUP3 + DUP11 + MULFP254 + DUP2 + DUP10 + MULFP254 + ADDFP254 + DUP5 + DUP8 + MULFP254 + ADDFP254 + // make c0d2 + c1d1 + c2d0 + DUP16 + DUP7 + MULFP254 + DUP4 + DUP10 + MULFP254 + ADDFP254 + DUP13 + DUP12 + MULFP254 + ADDFP254 + // stack: c0d2 + c1d1 + c2d0 , c0_d2_ + c1_d1_ + c2_d0_ + SUBFP254 + // stack: E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) + SWAP15 + // make c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 + SWAP7 + MULFP254 + SWAP7 + MULFP254 + SWAP7 + MULFP254 + SWAP2 + MULFP254 + ADDFP254 + SWAP2 + MULFP254 + ADDFP254 + ADDFP254 + ADDFP254 + SWAP2 + MULFP254 + ADDFP254 + // stack: E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 + SWAP5