Use Blst's multiscalar multiplication for commitments

2021-06-22 19:52:44 +01:00 · 2021-06-22 19:52:44 +01:00 · 37021b9653
parent 18f7d8f72e
commit 37021b9653
8 changed files with 169 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,8 +8,8 @@
 *.log
 tmp/
 doc/
-inc/blst.h
-inc/blst_aux.h
+inc/blst.h*
+inc/blst_aux.h*
 .vscode/
 *.json
 .clang-format
--- a/README.md
+++ b/README.md
@ -39,6 +39,8 @@ cp ../blst/libblast.a lib/
 cp ../blst/bindings/*.h inc/
 ```

+This version of c-kzg is tested with Blst's master branch, commit `d4b40c3`. Blst release 0.3.4 is not sufficient since we make use of the more recently implemented Pippenger multiscalar multiplication for the polynomial commitments.
+
 ## Build

 Build the `libckzg.a` library:
--- a/src/Makefile
+++ b/src/Makefile
@ -1,6 +1,6 @@
 TESTS = bls12_381_test das_extension_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
 	fk20_proofs_test kzg_proofs_test poly_test recover_test utility_test zero_poly_test
-BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench
+BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench
 LIB_SRC = bls12_381.c c_kzg_util.c das_extension.c fft_common.c fft_fr.c fft_g1.c fk20_proofs.c kzg_proofs.c poly.c recover.c utility.c zero_poly.c
 LIB_OBJ = $(LIB_SRC:.c=.o)

--- a/src/bls12_381.c
+++ b/src/bls12_381.c
@ -24,6 +24,8 @@

 #ifdef BLST

+#include <stdlib.h> // malloc(), free(), NULL
+
 /**
 * Fast log base 2 of a byte.
 *
@ -377,16 +379,50 @@ void g2_dbl(g2_t *out, const g2_t *a) {
 * @param[in]  coeffs Array of field elements, length @p len
 * @param[in]  len    The number of group/field elements
 *
- * @todo This could be substantially improved with an optimised multi-scalar multiplication. (1) Benchmark and see if
- * this is a bottleneck. (2) If so, look into optimised routines. [Notes from
- * Mamy](https://github.com/vacp2p/research/issues/7#issuecomment-690083000) on the topic.
+ * For the benefit of future generations (since Blst has no documentation to speak of),
+ * there are two ways to pass the arrays of scalars and points into `blst_p1s_mult_pippenger()`.
+ *
+ * 1. Pass `points` as an array of pointers to the points, and pass `scalars` as an array of pointers to the scalars,
+ * each of length @p len.
+ * 2. Pass an array where the first element is a pointer to the contiguous array of points and the second is null, and
+ * similarly for scalars.
+ *
+ * We do the second of these to save memory here.
 */
 void g1_linear_combination(g1_t *out, const g1_t *p, const fr_t *coeffs, const uint64_t len) {
-    g1_t tmp;
-    *out = g1_identity;
-    for (uint64_t i = 0; i < len; i++) {
-        g1_mul(&tmp, &p[i], &coeffs[i]);
-        blst_p1_add_or_double(out, out, &tmp);
+
+    if (len < 8) { // Tunable parameter: must be at least 2 since Blst fails for 0 or 1
+        // Direct approach
+        g1_t tmp;
+        *out = g1_identity;
+        for (uint64_t i = 0; i < len; i++) {
+            g1_mul(&tmp, &p[i], &coeffs[i]);
+            blst_p1_add_or_double(out, out, &tmp);
+        }
+    } else {
+        // Blst's implementation of the Pippenger method
+        void *scratch = malloc(blst_p1s_mult_pippenger_scratch_sizeof(len));
+        blst_p1_affine *p_affine = malloc(len * sizeof(blst_p1_affine));
+        blst_scalar *scalars = malloc(len * sizeof(blst_scalar));
+
+        // Transform the points to affine representation
+        const blst_p1 *p_arg[2] = {p, NULL};
+        blst_p1s_to_affine(p_affine, p_arg, len);
+
+        // Transform the field elements to 256-bit scalars
+        for (int i = 0; i < len; i++) {
+            blst_scalar_from_fr(&scalars[i], &coeffs[i]);
+        }
+
+        // Call the Pippenger implementation
+        const byte *scalars_arg[2] = {(byte *)scalars, NULL};
+        const blst_p1_affine *points_arg[2] = {p_affine, NULL};
+        blst_p1s_mult_pippenger(out, points_arg, len, scalars_arg, 256, scratch);
+
+        // Tidy up
+        free(scratch);
+        free(p_affine);
+        free(scalars);
    }
 }

--- a/src/bls12_381_test.c
+++ b/src/bls12_381_test.c
@ -181,6 +181,29 @@ void g1_make_linear_combination(void) {
    TEST_CHECK(g1_equal(&exp, &res));
 }

+void g1_random_linear_combination(void) {
+    int len = 8192;
+    fr_t coeffs[len];
+    g1_t p[len], p1tmp = g1_generator;
+    for (int i = 0; i < len; i++) {
+        coeffs[i] = rand_fr();
+        p[i] = p1tmp;
+        blst_p1_double(&p1tmp, &p1tmp);
+    }
+
+    // Expected result
+    g1_t exp = g1_identity;
+    for (uint64_t i = 0; i < len; i++) {
+        g1_mul(&p1tmp, &p[i], &coeffs[i]);
+        blst_p1_add_or_double(&exp, &exp, &p1tmp);
+    }
+
+    // Test result
+    g1_t res;
+    g1_linear_combination(&res, p, coeffs, len);
+    TEST_CHECK(g1_equal(&exp, &res));
+}
+
 void pairings_work(void) {
    // Verify that e([3]g1, [5]g2) = e([5]g1, [3]g2)
    fr_t three, five;
@ -219,6 +242,7 @@ TEST_LIST = {
    {"g1_identity_is_infinity", g1_identity_is_infinity},
    {"g1_identity_is_identity", g1_identity_is_identity},
    {"g1_make_linear_combination", g1_make_linear_combination},
+    {"g1_random_linear_combination", g1_make_linear_combination},
    {"pairings_work", pairings_work},
    {NULL, NULL} /* zero record marks the end of the list */
 };
--- a/src/kzg_proofs.c
+++ b/src/kzg_proofs.c
@ -27,6 +27,7 @@
 #include "kzg_proofs.h"
 #include "c_kzg_util.h"
 #include "utility.h"
+#include <assert.h>

 /**
 * Make a KZG commitment to a polynomial.
@ -82,7 +83,7 @@ C_KZG_RET check_proof_single(bool *out, const g1_t *commitment, const g1_t *proo
 }

 /**
- * Compute KZG proof for polynomial at positions x * w^y where w is an n-th root of unity.
+ * Compute KZG proof for polynomial at positions x0 * w^y where w is an n-th root of unity.
 *
 * This constitutes the proof for one data availability sample, which consists
 * of several polynomial evaluations.
@ -103,7 +104,7 @@ C_KZG_RET compute_proof_multi(g1_t *out, const poly *p, const fr_t *x0, uint64_t

    CHECK(is_power_of_two(n));

-    // Construct x^n - x0^n = (x - w^0)(x - w^1)...(x - w^(n-1))
+    // Construct x^n - x0^n = (x - x0.w^0)(x - x0.w^1)...(x - x0.w^(n-1))
    TRY(new_poly(&divisor, n + 1));

    // -(x0^n)
--- a/src/kzg_proofs_bench.c
+++ b/src/kzg_proofs_bench.c
@ -0,0 +1,92 @@
+/*
+ * Copyright 2021 Benjamin Edgington
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h> // malloc(), free(), atoi()
+#include <stdio.h>  // printf()
+#include <assert.h> // assert()
+#include <unistd.h> // EXIT_SUCCESS/FAILURE
+#include "bench_util.h"
+#include "test_util.h"
+#include "kzg_proofs.h"
+
+// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
+long run_bench(int scale, int max_seconds) {
+    timespec_t t0, t1;
+    unsigned long total_time = 0, nits = 0;
+    FFTSettings fs;
+    KZGSettings ks;
+
+    assert(C_KZG_OK == new_fft_settings(&fs, scale));
+
+    // Allocate on the heap to avoid stack overflow for large sizes
+    g1_t *s1 = malloc(fs.max_width * sizeof(g1_t));
+    g2_t *s2 = malloc(fs.max_width * sizeof(g2_t));
+
+    generate_trusted_setup(s1, s2, &secret, fs.max_width);
+    assert(C_KZG_OK == new_kzg_settings(&ks, s1, s2, fs.max_width, &fs));
+
+    poly p;
+    assert(C_KZG_OK == new_poly(&p, fs.max_width));
+    for (int i = 0; i < fs.max_width; i++) {
+        p.coeffs[i] = rand_fr();
+    }
+
+    while (total_time < max_seconds * NANO) {
+        g1_t commitment;
+        clock_gettime(CLOCK_REALTIME, &t0);
+
+        commit_to_poly(&commitment, &p, &ks);
+
+        clock_gettime(CLOCK_REALTIME, &t1);
+        nits++;
+        total_time += tdiff(t0, t1);
+    }
+
+    free_poly(&p);
+    free(s1);
+    free(s2);
+    free_kzg_settings(&ks);
+    free_fft_settings(&fs);
+
+    return total_time / nits;
+}
+
+int main(int argc, char *argv[]) {
+    int nsec = 0;
+
+    switch (argc) {
+    case 1:
+        nsec = NSEC;
+        break;
+    case 2:
+        nsec = atoi(argv[1]);
+        break;
+    default:
+        break;
+    };
+
+    if (nsec == 0) {
+        printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+
+    printf("*** Benchmarking Polynomial Commitment, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
+    for (int scale = 1; scale <= 15; scale++) {
+        printf("commit_to_poly/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec));
+    }
+
+    return EXIT_SUCCESS;
+}
--- a/src/kzg_proofs_test.c
+++ b/src/kzg_proofs_test.c
@ -78,7 +78,7 @@ void proof_multi(void) {
    bool result;

    // Compute proof at 2^coset_scale points
-    int coset_scale = 7, coset_len = (1 << coset_scale);
+    int coset_scale = 3, coset_len = (1 << coset_scale);
    fr_t y[coset_len];

    uint64_t secrets_len = poly_len > coset_len ? poly_len + 1 : coset_len + 1;