all but inv

This commit is contained in:
Dmitry Vagner 2022-12-16 17:35:52 -08:00
parent cefbe248fa
commit d6c7e319bd
3 changed files with 246 additions and 39 deletions

View File

@ -28,13 +28,13 @@ pub(crate) fn combined_kernel() -> Kernel {
include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
include_str!("asm/curve/bn254/curve_arithmetic/power.asm"),
include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
include_str!("asm/curve/bn254/field_arithmetic/power.asm"),
include_str!("asm/curve/common.asm"),
include_str!("asm/curve/secp256k1/curve_mul.asm"),
include_str!("asm/curve/secp256k1/curve_add.asm"),

View File

@ -16,13 +16,13 @@
/// return y2 * y1 * y0
global power:
// stack: sqr, out, retdest
// stack: sqr, out, retdest
PUSH 1 DUP1 DUP1
// stack: 1, 1, 1, sqr, out, retdest
// stack: 1, 1, 1, sqr, out, retdest
%mstore_kernel_general(200) %mstore_kernel_general(212) %mstore_kernel_general(224)
// stack: sqr, out, retdest {200: y0, 212: y1, 224: y2}
// stack: sqr, out, retdest {200: y0, 212: y1, 224: y2}
PUSH power_loop_2 PUSH power_loop_1 PUSH power_return
// stack: power_return, power_loop_1, power_loop_2, sqr, out, retdest {200: y0, 212: y1, 224: y2}
// stack: power_return, power_loop_1, power_loop_2, sqr, out, retdest {200: y0, 212: y1, 224: y2}
SWAP3
// stack: sqr, power_loop_1, power_loop_2, power_return, out, retdest {200: y0, 212: y1, 224: y2}
PUSH 65 PUSH 62 PUSH 65

View File

@ -7,27 +7,68 @@ use rand::{thread_rng, Rng};
use crate::cpu::kernel::aggregator::KERNEL;
use crate::cpu::kernel::interpreter::{run_interpreter, BN_BASE};
type Fp = U256;
type Fp2 = [U256; 2];
type Fp6 = [Fp2; 3];
type Fp12 = [Fp6; 2];
fn add_fp(x: U256, y: U256) -> U256 {
const zero: Fp = U256::from(0);
fn embed_fp2(x: Fp) -> Fp2 {
[x, zero]
}
fn embed_fp2_fp6(a: Fp2) -> Fp6 {
[a, embed_fp2(zero), embed_fp2(zero)]
}
fn embed_fp6(x: Fp) -> Fp6 {
embed_fp2_fp6(embed_fp2(x))
}
fn embed_fp12(x: Fp) -> Fp12 {
[embed_fp6(x), embed_fp6(zero)]
}
fn gen_fp() -> Fp {
let rng = thread_rng();
let x64 = rng.gen::<u64>();
U256([x64, x64, x64, x64]) % BN_BASE
}
fn gen_fp6() -> Fp6 {
[
[gen_fp(), gen_fp()],
[gen_fp(), gen_fp()],
[gen_fp(), gen_fp()],
]
}
fn gen_fp12() -> Fp12 {
[gen_fp6(), gen_fp6()]
}
fn gen_fp12_sparse() -> Fp12 {
sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
}
fn add_fp(x: Fp, y: Fp) -> Fp {
(x + y) % BN_BASE
}
fn add3_fp(x: U256, y: U256, z: U256) -> U256 {
fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
(x + y + z) % BN_BASE
}
fn mul_fp(x: U256, y: U256) -> U256 {
fn mul_fp(x: Fp, y: Fp) -> Fp {
U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
}
fn sub_fp(x: U256, y: U256) -> U256 {
fn sub_fp(x: Fp, y: Fp) -> Fp {
(BN_BASE + x - y) % BN_BASE
}
fn neg_fp(x: U256) -> U256 {
fn neg_fp(x: Fp) -> Fp {
(BN_BASE - x) % BN_BASE
}
@ -119,10 +160,9 @@ fn sh(c: Fp6) -> Fp6 {
fn sparse_embed(x: [U256; 5]) -> Fp12 {
let [g0, g1, g1_, g2, g2_] = x;
let zero = U256::from(0);
[
[[g0, zero], [g1, g1_], [zero, zero]],
[[zero, zero], [g2, g2_], [zero, zero]],
[embed_fp2(g0), [g1, g1_], embed_fp2(zero)],
[embed_fp2(zero), [g2, g2_], embed_fp2(zero)],
]
}
@ -136,28 +176,6 @@ fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
[add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
}
fn gen_fp() -> U256 {
let mut rng = thread_rng();
let x64 = rng.gen::<u64>();
U256([x64, x64, x64, x64]) % BN_BASE
}
fn gen_fp6() -> Fp6 {
[
[gen_fp(), gen_fp()],
[gen_fp(), gen_fp()],
[gen_fp(), gen_fp()],
]
}
fn gen_fp12() -> Fp12 {
[gen_fp6(), gen_fp6()]
}
fn gen_fp12_sparse() -> Fp12 {
sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
}
fn frob_t1(n: usize) -> Fp2 {
match n {
0 => [
@ -320,12 +338,203 @@ fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
let [f0, f1] = f;
let zero = U256::from(0);
let scale = [frob_z(n), [zero, zero], [zero, zero]];
let scale = embed_fp2_fp6(frob_z(n));
[frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
}
const EXPS4: [(bool, bool, bool); 65] = [
(True, True, True),
(True, True, False),
(True, True, True),
(True, True, True),
(False, False, False),
(False, False, True),
(True, False, True),
(False, True, False),
(True, False, True),
(True, True, False),
(True, False, True),
(False, True, False),
(True, True, False),
(True, True, False),
(True, True, False),
(False, True, False),
(False, True, False),
(False, False, True),
(True, False, True),
(True, True, False),
(False, True, False),
(True, True, False),
(True, True, False),
(True, True, False),
(False, False, True),
(False, False, True),
(True, False, True),
(True, False, True),
(True, True, False),
(True, False, False),
(True, True, False),
(False, True, False),
(True, True, False),
(True, False, False),
(False, True, False),
(False, False, False),
(True, False, False),
(True, False, False),
(True, False, True),
(False, False, True),
(False, True, True),
(False, False, True),
(False, True, True),
(False, True, True),
(False, False, False),
(True, True, True),
(True, False, True),
(True, False, True),
(False, True, True),
(True, False, True),
(False, True, True),
(False, True, True),
(True, True, False),
(True, True, False),
(True, True, False),
(True, False, False),
(False, False, True),
(True, False, False),
(False, False, True),
(True, False, True),
(True, True, False),
(True, True, True),
(False, True, True),
(False, True, False),
(True, True, True),
];
const EXPS2: [(bool, bool); 62] = [
(True, False),
(True, True),
(False, False),
(True, False),
(True, False),
(True, True),
(True, False),
(True, True),
(True, False),
(False, True),
(False, True),
(True, True),
(True, True),
(False, False),
(True, True),
(False, False),
(False, False),
(False, True),
(False, True),
(True, True),
(True, True),
(True, True),
(False, True),
(True, True),
(False, False),
(True, True),
(True, False),
(True, True),
(False, False),
(True, True),
(True, True),
(True, False),
(False, False),
(False, True),
(False, False),
(True, True),
(False, True),
(False, False),
(True, False),
(False, True),
(False, True),
(True, False),
(False, True),
(False, False),
(False, False),
(False, False),
(False, True),
(True, False),
(True, True),
(False, True),
(True, True),
(True, False),
(False, True),
(False, False),
(True, False),
(False, True),
(True, False),
(True, True),
(True, False),
(True, True),
(False, True),
(True, True),
];
const EXPS0: [(bool, bool); 65] = [
False, False, True, False, False, True, True, False, True, False, True, True, True, False,
True, False, False, False, True, False, False, True, False, True, False, True, True, False,
False, False, False, False, True, False, True, False, True, True, True, False, False, True,
True, True, True, False, True, False, True, True, False, False, True, False, False, False,
True, True, True, True, False, False, True, True, False,
];
fn fast_exp(f: Fp12) -> Fp12 {
let mut sq: Fp12 = f;
let mut y0: Fp12 = embed_fp12(U256::from(1));
let mut y2: Fp12 = embed_fp12(U256::from(1));
let mut y4: Fp12 = embed_fp12(U256::from(1));
for (a, b, c) in EXPS4 {
if a {
y4 = mul_fp12(y4, sq);
}
if b {
y2 = mul_fp12(y2, sq);
}
if c {
y0 = mul_fp12(y0, sq);
}
sq = mul_fp12(sq, sq);
}
y4 = mul_fp12(y4, y4);
for (a, b) in EXPS2 {
if a {
y2 = mul_fp12(y2, sq);
}
if b {
y0 = mul_fp12(y0, sq);
}
sq = mul_fp12(sq, sq);
}
y2 = mul_fp12(y2, y2);
for a in EXPS0 {
if a {
y0 = mul_fp12(y0, sq);
}
sq = mul_fp12(sq, sq);
}
y0 = mul_fp12(y0, y0);
// TODO: y0 = inv_fp12(y0);
y4 = mul_fp12(y4, y2);
y4 = mul_fp12(y4, y2);
y4 = mul_fp12(y4, y0);
y4 = frob_fp12(1, y4);
y2 = frob_fp12(2, y2);
mul_fp12(mul_fp12(y4, y2), y0)
}
fn make_mul_stack(
in0: usize,
in1: usize,
@ -352,7 +561,6 @@ fn make_mul_stack(
input.extend(g);
input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
input.reverse();
input
}
@ -403,7 +611,6 @@ fn make_frob_stack(f: Fp12) -> Vec<U256> {
input.extend(f);
input.extend(vec![ptr]);
input.reverse();
input
}