diff --git a/evm/src/cpu/kernel/asm/fields/fp6.asm b/evm/src/cpu/kernel/asm/fields/fp6.asm index a6e93a71..d12322b3 100644 --- a/evm/src/cpu/kernel/asm/fields/fp6.asm +++ b/evm/src/cpu/kernel/asm/fields/fp6.asm @@ -180,7 +180,7 @@ // stack: h0, h1, h2, h3, h4, h5 %endmacro -// *backwards order subtraction* cost: 17 +// *reversed argument subtraction* cost: 17 %macro bus_fp6 // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 SWAP7 @@ -210,14 +210,14 @@ // cost: 156 %macro mul_fp6 - /// E = E0 + E1t + E2t^2 = CD - /// /// C = C0 + C1t + C2t^2 /// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 /// /// D = D0 + D1t + D2t^2 /// = (d0 + d0_i) + (d1 + d1_i)t + (d2 + d2_i)t^2 /// + /// E = E0 + E1t + E2t^2 = CD + /// /// initial stack: c0, c0_, c1, c1_, c2, c2_, d0, d0_, d1, d1_, d2, d2_ /// final stack: e0, e0_, e1, e1_, e2, e2_ @@ -236,7 +236,7 @@ /// E0 = 9CDX - CDX_ + C0D0 /// E0_ = 9CDX_ + CDX + C0D0_ - // CDX_ = c1d2_ + c1_d2 + c2d1_ + c2_d1 + // make CDX_ = c1d2_ + c1_d2 + c2d1_ + c2_d1 DUP12 DUP4 MULFP254 @@ -252,7 +252,7 @@ DUP8 MULFP254 ADDFP254 - // C0D0_ = c0d0_ + c0_d0 + // make C0D0_ = c0d0_ + c0_d0 DUP9 DUP3 MULFP254 @@ -260,7 +260,7 @@ DUP5 MULFP254 ADDFP254 - // CDX = c1d2 + c2d1 - c1_d2_ - c2_d1_ + // make CDX = c1d2 + c2d1 - c1_d2_ - c2_d1_ DUP12 DUP9 MULFP254 @@ -276,7 +276,7 @@ MULFP254 ADDFP254 SUBFP254 - // C0D0 = c0d0 - c0_d0_ + // make C0D0 = c0d0 - c0_d0_ DUP11 DUP6 MULFP254 @@ -284,23 +284,24 @@ DUP6 MULFP254 SUBFP254 - // stack: C0D0 , CDX , C0D0_, CDX_ + + // stack: C0D0 , CDX , C0D0_, CDX_ DUP4 DUP3 - // stack: CDX , CDX_ , C0D0 , CDX , C0D0_, CDX_ + // stack: CDX , CDX_ , C0D0 , CDX , C0D0_, CDX_ PUSH 9 MULFP254 SUBFP254 ADDFP254 - // stack: 9CDX - CDX_ + C0D0 , CDX , C0D0_, CDX_ + // stack: E0 = 9CDX - CDX_ + C0D0 , CDX , C0D0_, CDX_ SWAP15 SWAP3 - // stack: CDX_ , CDX , C0D0_ + // stack: CDX_ , CDX , C0D0_ PUSH 9 MULFP254 ADDFP254 ADDFP254 - // stack: 9CDX_ + CDX + C0D0_ + // stack: E0_ = 9CDX_ + CDX + C0D0_ SWAP9 /// E1 = C0D1 + C1D0 + i9(C2D2) @@ -308,13 +309,16 @@ /// C0D1 = (c0d1 - c0_d1_) + (c0d1_ + c0_d1)i /// C1D0 = (c1d0 - c1_d0_) + (c1d0_ + c1_d0)i /// + /// CD01 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) + /// CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 + /// /// C2D2 = (c2d2 - c2_d2_) + (c2d2_ + c2_d2)i /// i9(C2D2) = (9C2D2 - C2D2_) + (C2D2 + 9C2D2_)i /// - /// E1 = 9C2D2 - C2D2_ + c0d1 + c1d0 - (c0_d1_ + c1_d0_) - /// E1_ = C2D2 + 9C2D2_ + c0d1_ + c0_d1 + c1d0_ + c1_d0 + /// E1 = 9C2D2 - C2D2_ + CD01 + /// E1_ = C2D2 + 9C2D2_ + CD01_ - // C2D2_ = c2d2_ + c2_d2 + // make C2D2_ = c2d2_ + c2_d2 DUP13 DUP9 MULFP254 @@ -322,7 +326,7 @@ DUP9 MULFP254 ADDFP254 - // C2D2 = c2d2 - c2_d2_ + // make C2D2 = c2d2 - c2_d2_ DUP3 DUP10 MULFP254 @@ -330,8 +334,7 @@ DUP10 MULFP254 SUBFP254 - // stack: C2D2, C2D2_ - // c0d1 + c1d0 - (c0_d1_ + c1_d0_) + // make C0D0 = c0d1 + c1d0 - (c0_d1_ + c1_d0_) DUP3 DUP9 MULFP254 @@ -347,23 +350,23 @@ MULFP254 ADDFP254 SUBFP254 - // stack: c0d1 + c1d0 - (c0_d1_ + c1_d0_), C2D2, C2D2_ + // stack: C0D0, C2D2, C2D2_ DUP3 DUP3 - // stack: C2D2 , C2D2_ , c0d1 + c1d0 - (c0_d1_ + c1_d0_), C2D2, C2D2_ + // stack: C2D2 , C2D2_ , C0D0, C2D2, C2D2_ PUSH 9 MULFP254 SUBFP254 ADDFP254 - // stack: 9C2D2 - C2D2_ + c0d1 + c1d0 - (c0_d1_ + c1_d0_), C2D2, C2D2_ + // stack: E1 = 9C2D2 - C2D2_ + C0D0, C2D2, C2D2_ SWAP13 SWAP2 - // stack: C2D2_ , C2D2 + // stack: C2D2_, C2D2 PUSH 9 MULFP254 ADDFP254 // stack: 9C2D2_ + C2D2 - // c0d1_ + c0_d1 + c1d0_ + c1_d0 + // make CD01_ = c0d1_ + c0_d1 + c1d0_ + c1_d0 DUP11 DUP9 MULFP254 @@ -379,7 +382,9 @@ DUP7 MULFP254 ADDFP254 + // stack: CD01_ , 9C2D2_ + C2D2 ADDFP254 + // stack: E1_ = CD01_ + 9C2D2_ + C2D2 SWAP13 /// E2 = C0D2 + C1D1 + C2D0 @@ -391,7 +396,7 @@ /// E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) /// E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 - // c0_d2_ + c1_d1_ + c2_d0_ + // make c0_d2_ + c1_d1_ + c2_d0_ DUP3 DUP11 MULFP254 @@ -403,7 +408,7 @@ DUP8 MULFP254 ADDFP254 - // c0d2 + c1d1 + c2d0 + // make c0d2 + c1d1 + c2d0 DUP16 DUP7 MULFP254 @@ -415,10 +420,11 @@ DUP12 MULFP254 ADDFP254 - // stack: c0d2 + c1d1 + c2d0, c0_d2_ + c1_d1_ + c2_d0_ + // stack: c0d2 + c1d1 + c2d0 , c0_d2_ + c1_d1_ + c2_d0_ SUBFP254 + // stack: E2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) SWAP15 - // c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 + // make c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 SWAP7 MULFP254 SWAP7 @@ -436,5 +442,6 @@ SWAP2 MULFP254 ADDFP254 + // stack: E2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 SWAP5 %endmacro