update fp12

This commit is contained in:
Dmitry Vagner 2022-11-14 16:41:36 -08:00
parent 69ce4f99fe
commit a3dfea9cf8
2 changed files with 236 additions and 175 deletions

View File

@ -1,5 +1,6 @@
/// Note: uncomment this to test
/// cost: 220
global test_mul_fp12:
// stack: f, inA , f', g, inB , g', inB, out, inA, out
DUP7
@ -22,7 +23,7 @@ global test_mul_fp12:
// stack: ret_stack, inB, out, inA, out
SWAP3
// stack: inA, inB, out, ret_stack, out
%jump(mul_fp12)
%jump(mul_fp12_sparse)
ret_stack:
// stack: out
DUP1 %offset_fp6
@ -35,6 +36,11 @@ ret_stack:
// stack: h, h', out
%jump(0xdeadbeef)
///////////////////////////////////////
///// GENERAL FP12 MULTIPLICATION /////
///////////////////////////////////////
/// cost: 1063
/// fp6 functions:
/// fn | num | ops | cost
@ -166,6 +172,189 @@ ret_3:
JUMP
//////////////////////////////////////
///// SPARSE FP12 MULTIPLICATION /////
//////////////////////////////////////
/// cost: 646
/// fp6 functions:
/// fn | num | ops | cost
/// ---------------------------
/// load | 2 | 40 | 80
/// store | 2 | 40 | 80
/// dup | 4 | 6 | 24
/// swap | 4 | 16 | 64
/// add | 4 | 16 | 64
/// mul_fp | 2 | 21 | 42
/// mul_fp2 | 4 | 59 | 236
///
/// lone stack operations:
/// op | num
/// ------------
/// ADD | 6
/// DUP | 9
/// PUSH | 6
/// POP | 5
///
/// TOTAL: 618
/// input:
/// F = f + f'z
/// G = g0 + (G1)t + (G2)tz
///
/// output:
/// H = h + h'z = FG
/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
///
/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f')
/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
///
/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
///
/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element
global mul_fp12_sparse:
// stack: inA, inB, out
DUP1 %offset_fp6
// stack: inA', inA, inB, out
%load_fp6
// stack: f', inA, inB, out
DUP8
// stack: inB, f', inA, inB, out
DUP8
// stack: inA, inB, f', inA, inB, out
%load_fp6
// stack: f, inB, f', inA, inB, out
DUP16
// stack: out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', out, f, inB, f', inA, inB, out
DUP14
// stack: inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f, inB, f', out, f, inB, f', inA, inB, out
DUP7
// stack: inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_7
// stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13
// stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp_fp6
// stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6
// stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13 %add_const(8)
// stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp6_sh2
// stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6_hole
// stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP7 %add_const(2)
// stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP14
// stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
%store_fp6
// stack: inB, out, f', f, inB, f', inA, inB, out
%pop2
// stack: f', f, inB, f', inA, inB, out
DUP13
// stack: inB, f', f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f', f, inB, f', inA, inB, out
%mul_fp_fp6
// stack: g0 * f', f, inB, f', inA, inB, out
%swap_fp6
// stack: f , g0 * f', inB, f', inA, inB, out
DUP13 %add_const(8)
// stack: inB2, f , g0 * f', inB, f', inA, inB, out
%load_fp2
// stack: G2 , f , g0 * f', inB, f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G2 * sh(f) , g0 * f', inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out
%swap_fp6_hole
// stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
DUP7 %add_const(2)
// stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%load_fp2
// stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
DUP9 %offset_fp6
// stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
%store_fp6
// stack: inA, inB, out
%pop3
JUMP
/// global mul_fp12_sparse_fast:
/// // stack: inA, inB, out
/// DUP2
/// // stack: inB, inA, inB, out
/// %load_fp12_sparse
/// // stack: g0, G1, G1', inA, inB, out
/// DUP6 %offset_fp6
/// // stack: inA', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// DUP12
/// // stack: inA, f', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh2
/// // stack: (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// DUP26
/// // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %semiclone_mul_fp2_fp6_sh
/// // stack: (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// DUP20 offset_fp6
/// // stack: out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// %pop14
/////////////////////////
///// FP12 SQUARING /////
/////////////////////////
/// fp6 functions:
/// fn | num | ops | cost
/// -------------------------
@ -257,176 +446,3 @@ post_sq2:
// stack: inp, out
%pop2
JUMP
/// fp6 functions:
/// fn | num | ops | cost
/// ---------------------------
/// load | 2 | 40 | 80
/// store | 2 | 40 | 80
/// dup | 4 | 6 | 24
/// swap | 4 | 16 | 64
/// add | 4 | 16 | 64
/// mul_fp | 2 | 21 | 42
/// mul_fp2 | 4 | 59 | 236
///
/// lone stack operations:
/// op | num
/// ------------
/// ADD | 6
/// DUP | 9
/// PUSH | 6
/// POP | 5
///
/// TOTAL: 618
/// input:
/// F = f + f'z
/// G = g0 + (G1)t + (G2)tz
///
/// output:
/// H = h + h'z = FG
/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
///
/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f')
/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
///
/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
///
/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element
global mul_fp12_sparse:
// stack: inA, inB, out
DUP1 %offset_fp6
// stack: inA', inA, inB, out
%load_fp6
// stack: f', inA, inB, out
DUP8
// stack: inB, f', inA, inB, out
DUP8
// stack: inA, inB, f', inA, inB, out
%load_fp6
// stack: f, inB, f', inA, inB, out
DUP16
// stack: out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', out, f, inB, f', inA, inB, out
DUP14
// stack: inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f, inB, f', out, f, inB, f', inA, inB, out
DUP7
// stack: inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_7
// stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13
// stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp_fp6
// stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6
// stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13 %add_const(3)
// stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp6_sh2
// stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6_hole
// stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP7 %add_const(1)
// stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP14
// stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
%store_fp6
// stack: inB, out, f', f, inB, f', inA, inB, out
%pop2
// stack: f', f, inB, f', inA, inB, out
DUP13
// stack: inB, f', f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f', f, inB, f', inA, inB, out
%mul_fp_fp6
// stack: g0 * f', f, inB, f', inA, inB, out
%swap_fp6
// stack: f , g0 * f', inB, f', inA, inB, out
DUP13 %add_const(3)
// stack: inB2, f , g0 * f', inB, f', inA, inB, out
%load_fp2
// stack: G2 , f , g0 * f', inB, f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G2 * sh(f) , g0 * f', inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out
%swap_fp6_hole
// stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
DUP7 %add_const(1)
// stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%load_fp2
// stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%mul_fp2_fp6_sh
// stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
DUP9 %offset_fp6
// stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
%store_fp6
// stack: inA, inB, out
%pop3
/// global mul_fp12_sparse_fast:
/// // stack: inA, inB, out
/// DUP2
/// // stack: inB, inA, inB, out
/// %load_fp12_sparse
/// // stack: g0, G1, G1', inA, inB, out
/// DUP6 %offset_fp6
/// // stack: inA', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// DUP12
/// // stack: inA, f', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh2
/// // stack: (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// DUP26
/// // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %semiclone_mul_fp2_fp6_sh
/// // stack: (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// DUP20 offset_fp6
/// // stack: out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// %pop14

View File

@ -106,6 +106,11 @@ fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
[i9(c2), c0, c1]
}
fn sparse_embed(x: [u32; 5]) -> [[[u32; 2]; 3]; 2] {
let [g0, g1, g1_, g2, g2_] = x;
[[[g0, 0], [g1, g1_], [0, 0]], [[0, 0], [g2, g2_], [0, 0]]]
}
fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
let [f0, f1] = f;
let [g0, g1] = g;
@ -125,6 +130,17 @@ fn gen_fp6() -> [[u32; 2]; 3] {
]
}
fn gen_fp12_sparse() -> [[[u32; 2]; 3]; 2] {
let mut rng = thread_rng();
sparse_embed([
rng.gen_range(0..P254),
rng.gen_range(0..P254),
rng.gen_range(0..P254),
rng.gen_range(0..P254),
rng.gen_range(0..P254),
])
}
fn as_stack(xs: Vec<u32>) -> Vec<U256> {
xs.iter().map(|&x| U256::from(x)).rev().collect()
}
@ -177,7 +193,7 @@ fn make_initial_stack(
as_stack(input)
}
#[test]
// #[test]
fn test_fp12() -> Result<()> {
let in1 = 64;
let in2 = 76;
@ -206,3 +222,32 @@ fn test_fp12() -> Result<()> {
Ok(())
}
#[test]
fn test_fp12_sparse() -> Result<()> {
let in1 = 64;
let in2 = 76;
let out = 88;
let f0 = gen_fp6();
let f1 = gen_fp6();
let [g0, g1] = gen_fp12_sparse();
let initial_offset = KERNEL.global_labels["test_mul_fp12"];
let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
.stack()
.to_vec();
let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
.into_iter()
.flatten()
.flatten()
.collect();
output.extend(vec![out]);
let expected = as_stack(output);
assert_eq!(final_stack, expected);
Ok(())
}