diff --git a/README.md b/README.md index 5fe5482..93507aa 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Initially, at least, this largely follows the [go-kzg](https://github.com/protol Done so far: - Rough and ready FFT and inverse FFT over the finite field. + - Ditto for FFTs over the G1 group ## Installation diff --git a/src/Makefile b/src/Makefile index 6c64ba0..56fedf1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,4 @@ -tests = fft_fr_test fft_util_test +tests = fft_util_test fft_fr_test fft_g1_test .PRECIOUS: %.o @@ -9,6 +9,10 @@ fft_fr_test: fft_fr.o fft_fr_test.c test_util.o fft_util.o clang -Wall -o $@ $@.c test_util.o fft_fr.o fft_util.o -L../lib -lblst ./$@ +fft_g1_test: fft_g1.o fft_g1_test.c test_util.o fft_util.o + clang -Wall -o $@ $@.c test_util.o fft_g1.o fft_util.o -L../lib -lblst + ./$@ + %_test: %.o %_test.c test_util.o clang -Wall -o $@ $@.c test_util.o $*.o -L../lib -lblst ./$@ diff --git a/src/fft_fr.c b/src/fft_fr.c index 942d6ac..5656d3a 100644 --- a/src/fft_fr.c +++ b/src/fft_fr.c @@ -17,44 +17,43 @@ #include "fft_fr.h" // Slow Fourier Transform (simple, good for small sizes) -void fft_fr_slow(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { - blst_fr v, last, tmp; +void fft_fr_slow(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { + blst_fr v, last, jv, r; for (uint64_t i = 0; i < l; i++) { - blst_fr jv = in[offset]; - blst_fr r = roots[0]; - blst_fr_mul(&v, &jv, &r); - last = v; + blst_fr_mul(&last, &in[0], &roots[0]); for (uint64_t j = 1; j < l; j++) { - jv = in[offset + j * stride]; + jv = in[j * stride]; r = roots[((i * j) % l) * roots_stride]; blst_fr_mul(&v, &jv, &r); - tmp = last; - blst_fr_add(&last, &tmp, &v); + blst_fr_add(&last, &last, &v); } out[i] = last; } } // Fast Fourier Transform -void fft_fr_fast(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { +void fft_fr_fast(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { uint64_t half = l / 2; - fft_fr_helper(out, in, offset, stride * 2, roots, roots_stride * 2, l / 2); - fft_fr_helper(out + half, in, offset + stride, stride * 2, roots, roots_stride * 2, l / 2); - for (uint64_t i = 0; i < half; i++) { - blst_fr y_times_root; - blst_fr x = out[i]; - blst_fr_mul(&y_times_root, &out[i + half], &roots[i * roots_stride]); - blst_fr_add(&out[i], &x, &y_times_root); - blst_fr_sub(&out[i + half], &x, &y_times_root); + if (half > 0) { + fft_fr_helper(out, in, stride * 2, roots, roots_stride * 2, half); + fft_fr_helper(out + half, in + stride, stride * 2, roots, roots_stride * 2, half); + for (uint64_t i = 0; i < half; i++) { + blst_fr y_times_root; + blst_fr x = out[i]; + blst_fr_mul(&y_times_root, &out[i + half], &roots[i * roots_stride]); + blst_fr_add(&out[i], &x, &y_times_root); + blst_fr_sub(&out[i + half], &x, &y_times_root); + } + } else { + blst_fr_mul(out, in, roots); } } -void fft_fr_helper(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { - // TODO: Tunable parameter - if (l <= 4) { - fft_fr_slow(out, in, offset, stride, roots, roots_stride, l); +void fft_fr_helper(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { + if (l <= 4) { // TODO: Tunable parameter + fft_fr_slow(out, in, stride, roots, roots_stride, l); } else { - fft_fr_fast(out, in, offset, stride, roots, roots_stride, l); + fft_fr_fast(out, in, stride, roots, roots_stride, l); } } @@ -67,11 +66,11 @@ void fft_fr (blst_fr *out, blst_fr *in, FFTSettings *fs, bool inv, uint64_t n) { blst_fr inv_len; fr_from_uint64(&inv_len, n); blst_fr_eucl_inverse(&inv_len, &inv_len); - fft_fr_helper(out, in, 0, 1, fs->reverse_roots_of_unity, stride, fs->max_width); + fft_fr_helper(out, in, 1, fs->reverse_roots_of_unity, stride, fs->max_width); for (uint64_t i = 0; i < fs->max_width; i++) { blst_fr_mul(&out[i], &out[i], &inv_len); } } else { - fft_fr_helper(out, in, 0, 1, fs->expanded_roots_of_unity, stride, fs->max_width); + fft_fr_helper(out, in, 1, fs->expanded_roots_of_unity, stride, fs->max_width); } } diff --git a/src/fft_fr.h b/src/fft_fr.h index e09a490..6408561 100644 --- a/src/fft_fr.h +++ b/src/fft_fr.h @@ -17,7 +17,7 @@ #include "c-kzg.h" #include "fft_util.h" -void fft_fr_slow(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); -void fft_fr_fast(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); -void fft_fr_helper(blst_fr *out, blst_fr *in, uint64_t offset, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_fr_slow(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_fr_fast(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_fr_helper(blst_fr *out, blst_fr *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); void fft_fr (blst_fr *out, blst_fr *in, FFTSettings *fs, bool inv, uint64_t n); diff --git a/src/fft_fr_test.c b/src/fft_fr_test.c index d8f2945..de45f7d 100644 --- a/src/fft_fr_test.c +++ b/src/fft_fr_test.c @@ -40,7 +40,7 @@ const uint64_t inv_fft_expected[][4] = void compare_sft_fft(void) { // Initialise: ascending values of i (could be anything), and arbitrary size - unsigned int size = 8; + unsigned int size = 12; FFTSettings fs = new_fft_settings(size); blst_fr data[fs.max_width], out0[fs.max_width], out1[fs.max_width]; for (int i = 0; i < fs.max_width; i++) { @@ -48,8 +48,8 @@ void compare_sft_fft(void) { } // Do both fast and slow transforms - fft_fr_slow(out0, data, 0, 1, fs.expanded_roots_of_unity, 1, fs.max_width); - fft_fr_fast(out1, data, 0, 1, fs.expanded_roots_of_unity, 1, fs.max_width); + fft_fr_slow(out0, data, 1, fs.expanded_roots_of_unity, 1, fs.max_width); + fft_fr_fast(out1, data, 1, fs.expanded_roots_of_unity, 1, fs.max_width); // Verify the results are identical for (int i = 0; i < fs.max_width; i++) { diff --git a/src/fft_g1.c b/src/fft_g1.c new file mode 100644 index 0000000..baaa009 --- /dev/null +++ b/src/fft_g1.c @@ -0,0 +1,91 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fft_g1.h" +#include "test_util.h" + +void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b) { + blst_scalar s; + blst_scalar_from_fr(&s, b); + //blst_p1_mult(out, a, s.b, 8 * sizeof(blst_scalar)); + blst_p1_mult(out, a, s.b, 256); +} + +void p1_sub(blst_p1 *out, const blst_p1 *a, const blst_p1 *b) { + blst_p1 bneg = *b; + blst_p1_cneg(&bneg, true); + blst_p1_add_or_double(out, a, &bneg); +} + +// Slow Fourier Transform (simple, good for small sizes) +void fft_g1_slow(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { + blst_p1 v, last, jv; + blst_fr r; + for (uint64_t i = 0; i < l; i++) { + p1_mul(&last, &in[0], &roots[0]); + for (uint64_t j = 1; j < l; j++) { + jv = in[j * stride]; + r = roots[((i * j) % l) * roots_stride]; + p1_mul(&v, &jv, &r); + blst_p1_add_or_double(&last, &last, &v); + } + out[i] = last; + } +} + +// Fast Fourier Transform +void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { + uint64_t half = l / 2; + if (half > 0) { + fft_g1_helper(out, in, stride * 2, roots, roots_stride * 2, half); + fft_g1_helper(out + half, in + stride, stride * 2, roots, roots_stride * 2, half); + for (uint64_t i = 0; i < half; i++) { + blst_p1 y_times_root; + blst_p1 x = out[i]; + p1_mul(&y_times_root, &out[i + half], &roots[i * roots_stride]); + blst_p1_add_or_double(&out[i], &x, &y_times_root); + p1_sub(&out[i + half], &x, &y_times_root); + } + } else { + p1_mul(out, in, roots); + } +} + +void fft_g1_helper(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { + if (l <= 4) { // TODO: Tunable parameter + fft_g1_slow(out, in, stride, roots, roots_stride, l); + } else { + fft_g1_fast(out, in, stride, roots, roots_stride, l); + } +} + +// The main entry point for forward and reverse FFTs +void fft_g1 (blst_p1 *out, blst_p1 *in, FFTSettings *fs, bool inv, uint64_t n) { + uint64_t stride = fs->max_width / n; + assert(n <= fs->max_width); + assert(is_power_of_two(n)); + if (inv) { + blst_fr inv_len; + fr_from_uint64(&inv_len, n); + blst_fr_eucl_inverse(&inv_len, &inv_len); + fft_g1_helper(out, in, 1, fs->reverse_roots_of_unity, stride, fs->max_width); + for (uint64_t i = 0; i < fs->max_width; i++) { + p1_mul(&out[i], &out[i], &inv_len); + } + } else { + fft_g1_helper(out, in, 1, fs->expanded_roots_of_unity, stride, fs->max_width); + } +} diff --git a/src/fft_g1.h b/src/fft_g1.h new file mode 100644 index 0000000..488c1d8 --- /dev/null +++ b/src/fft_g1.h @@ -0,0 +1,25 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c-kzg.h" +#include "fft_util.h" + +void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b); +void p1_sub(blst_p1 *out, const blst_p1 *a, const blst_p1 *b); +void fft_g1_slow(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_g1_helper(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l); +void fft_g1 (blst_p1 *out, blst_p1 *in, FFTSettings *fs, bool inv, uint64_t n); diff --git a/src/fft_g1_test.c b/src/fft_g1_test.c new file mode 100644 index 0000000..eaba292 --- /dev/null +++ b/src/fft_g1_test.c @@ -0,0 +1,108 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../inc/acutest.h" +#include "test_util.h" +#include "fft_g1.h" + +// The G1 subgroup size minus 1 +const uint64_t r_minus_1[] = {0xffffffff00000000L, 0x53bda402fffe5bfeL, 0x3339d80809a1d805L, 0x73eda753299d7d48L}; + +void make_data(blst_p1 *out, uint64_t n) { + // Multiples of g1_gen + assert(n > 0); + blst_p1_from_affine(out + 0, &BLS12_381_G1); + for (int i = 1; i < n; i++) { + blst_p1_add_affine(out + i, out + i - 1, &BLS12_381_G1); + } +} + +void p1_mul_works(void) { + blst_fr rm1; + blst_p1 g1_gen, g1_gen_neg, res; + + // Multiply the generator by the group order minus one + blst_p1_from_affine(&g1_gen, &BLS12_381_G1); + blst_fr_from_uint64(&rm1, r_minus_1); + p1_mul(&res, &g1_gen, &rm1); + + // We should end up with negative the generator + blst_p1_from_affine(&g1_gen_neg, &BLS12_381_NEG_G1); + + TEST_CHECK(blst_p1_is_equal(&res, &g1_gen_neg)); +} + +void p1_sub_works(void) { + blst_p1 g1_gen, g1_gen_neg; + blst_p1 tmp, res; + + blst_p1_from_affine(&g1_gen, &BLS12_381_G1); + blst_p1_from_affine(&g1_gen_neg, &BLS12_381_NEG_G1); + + // 2 * g1_gen = g1_gen - g1_gen_neg + blst_p1_double(&tmp, &g1_gen); + p1_sub(&res, &g1_gen, &g1_gen_neg); + + TEST_CHECK(blst_p1_is_equal(&tmp, &res)); +} + +void compare_sft_fft(void) { + // Initialise: arbitrary size + unsigned int size = 6; + FFTSettings fs = new_fft_settings(size); + blst_p1 data[fs.max_width], slow[fs.max_width], fast[fs.max_width]; + make_data(data, fs.max_width); + + // Do both fast and slow transforms + fft_g1_slow(slow, data, 1, fs.expanded_roots_of_unity, 1, fs.max_width); + fft_g1_fast(fast, data, 1, fs.expanded_roots_of_unity, 1, fs.max_width); + + // Verify the results are identical + for (int i = 0; i < fs.max_width; i++) { + TEST_CHECK(blst_p1_is_equal(slow + i, fast + i)); + } + + free_fft_settings(&fs); +} + +void roundtrip_fft(void) { + // Initialise: arbitrary size + unsigned int size = 10; + FFTSettings fs = new_fft_settings(size); + blst_p1 expected[fs.max_width], data[fs.max_width], coeffs[fs.max_width]; + make_data(expected, fs.max_width); + make_data(data, fs.max_width); + + // Forward and reverse FFT + fft_g1(coeffs, data, &fs, false, fs.max_width); + fft_g1(data, coeffs, &fs, true, fs.max_width); + + // Verify that the result is still ascending values of i + for (int i = 0; i < fs.max_width; i++) { + TEST_CHECK(blst_p1_is_equal(expected + i, data + i)); + } + + free_fft_settings(&fs); +} + +TEST_LIST = + { + {"p1_mul_works", p1_mul_works}, + {"p1_sub_works", p1_sub_works}, + {"compare_sft_fft", compare_sft_fft}, + {"roundtrip_fft", roundtrip_fft}, + { NULL, NULL } /* zero record marks the end of the list */ + }; diff --git a/src/test_util.c b/src/test_util.c index 1bb6b83..a8e1c48 100644 --- a/src/test_util.c +++ b/src/test_util.c @@ -56,16 +56,28 @@ bool fr_equal(blst_fr *aa, blst_fr *bb) { // G1 and G2 utilities // +void print_p1_bytes(byte p1[96]) { + printf("[0x"); + print_bytes_as_hex(p1, 0, 48); + printf(",0x"); + print_bytes_as_hex(p1, 48, 48); + printf("]\n"); +} + +/* "Pretty" print a point in G1 */ +void print_p1(const blst_p1 *p1) { + byte *p1_bytes = (byte *)malloc(96); + blst_p1_serialize(p1_bytes, p1); + print_p1_bytes(p1_bytes); + free(p1_bytes); +} + /* "Pretty" print an affine point in G1 */ void print_p1_affine(const blst_p1_affine *p1) { - byte *p1_hex = (byte *)malloc(96); - blst_p1_affine_serialize(p1_hex, p1); - printf("[0x"); - print_bytes_as_hex(p1_hex, 0, 48); - printf(",0x"); - print_bytes_as_hex(p1_hex, 48, 48); - printf("]\n"); - free(p1_hex); + byte *p1_bytes = (byte *)malloc(96); + blst_p1_affine_serialize(p1_bytes, p1); + print_p1_bytes(p1_bytes); + free(p1_bytes); } /* "Pretty" print an affine point in G2 */ diff --git a/src/test_util.h b/src/test_util.h index c857428..44036c3 100644 --- a/src/test_util.h +++ b/src/test_util.h @@ -27,5 +27,7 @@ void print_fr(const blst_fr *a); bool fr_equal(blst_fr *aa, blst_fr *bb); // G1 and G2 utilities +void print_p1_bytes(byte p1[96]); +void print_p1(const blst_p1 *p1); void print_p1_affine(const blst_p1_affine *p1); void print_p2_affine(const blst_p2_affine *p2);