Use scratch space dependent batching in ecmult_multi

This commit is contained in:
Jonas Nick 2017-11-05 19:40:18 +00:00
parent 355a38f113
commit 36b22c9337
3 changed files with 192 additions and 55 deletions

View File

@ -34,6 +34,10 @@ typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge
/** /**
* Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai.
* Chooses the right algorithm for a given number of points and scratch space
* size. Resets and overwrites the given scratch space. If the points do not
* fit in the scratch space the algorithm is repeatedly run with batches of
* points.
* Returns: 1 on success (including when inp_g_sc is NULL and n is 0) * Returns: 1 on success (including when inp_g_sc is NULL and n is 0)
* 0 if there is not enough scratch space for a single point or * 0 if there is not enough scratch space for a single point or
* callback returns 0 * callback returns 0

View File

@ -8,6 +8,7 @@
#define SECP256K1_ECMULT_IMPL_H #define SECP256K1_ECMULT_IMPL_H
#include <string.h> #include <string.h>
#include <stdint.h>
#include "group.h" #include "group.h"
#include "scalar.h" #include "scalar.h"
@ -55,6 +56,8 @@
#define PIPPENGER_SCRATCH_OBJECTS 6 #define PIPPENGER_SCRATCH_OBJECTS 6
#define STRAUSS_SCRATCH_OBJECTS 6 #define STRAUSS_SCRATCH_OBJECTS 6
#define PIPPENGER_MAX_BUCKET_WINDOW 12
/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */
#ifdef USE_ENDOMORPHISM #ifdef USE_ENDOMORPHISM
#define ECMULT_PIPPENGER_THRESHOLD 96 #define ECMULT_PIPPENGER_THRESHOLD 96
@ -62,6 +65,12 @@
#define ECMULT_PIPPENGER_THRESHOLD 156 #define ECMULT_PIPPENGER_THRESHOLD 156
#endif #endif
#ifdef USE_ENDOMORPHISM
#define ECMULT_MAX_POINTS_PER_BATCH 5000000
#else
#define ECMULT_MAX_POINTS_PER_BATCH 10000000
#endif
/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
* the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
* contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
@ -545,6 +554,10 @@ static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context
return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
} }
static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) {
return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1);
}
/** Convert a number to WNAF notation. /** Convert a number to WNAF notation.
* The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. * The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
* It has the following guarantees: * It has the following guarantees:
@ -724,7 +737,7 @@ static int secp256k1_pippenger_bucket_window(size_t n) {
} else if (n <= 28600) { } else if (n <= 28600) {
return 11; return 11;
} else { } else {
return 12; return PIPPENGER_MAX_BUCKET_WINDOW;
} }
#else #else
if (n <= 2) { if (n <= 2) {
@ -750,11 +763,48 @@ static int secp256k1_pippenger_bucket_window(size_t n) {
} else if (n <= 35000) { } else if (n <= 35000) {
return 11; return 11;
} else { } else {
return 12; return PIPPENGER_MAX_BUCKET_WINDOW;
} }
#endif #endif
} }
/**
* Returns the maximum optimal number of points for a bucket_window.
*/
static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) {
switch(bucket_window) {
#ifdef USE_ENDOMORPHISM
case 1: return 4;
case 2: return 8;
case 3: return 40;
case 4: return 117;
case 5: return 280;
case 6: return 480;
case 7: return 2560;
case 8: return 2560;
case 9: return 9200;
case 10: return 17400;
case 11: return 28600;
case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
#else
case 1: return 2;
case 2: return 9;
case 3: return 42;
case 4: return 100;
case 5: return 280;
case 6: return 610;
case 7: return 1920;
case 8: return 3400;
case 9: return 10240;
case 10: return 19000;
case 11: return 35000;
case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
#endif
}
return 0;
}
#ifdef USE_ENDOMORPHISM #ifdef USE_ENDOMORPHISM
SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) {
secp256k1_scalar tmp = *s1; secp256k1_scalar tmp = *s1;
@ -865,11 +915,53 @@ static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_contex
return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
} }
#define MAX_BATCH_SIZE 1024 /**
* Returns the maximum number of points in addition to G that can be used with
* a given scratch space. The function ensures that fewer points may also be
* used.
*/
static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) {
size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS);
int bucket_window;
size_t res = 0;
for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) {
size_t n_points;
size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window);
size_t space_for_points;
size_t space_overhead;
size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
#ifdef USE_ENDOMORPHISM
entry_size = 2*entry_size;
#endif
space_overhead = ((1<<bucket_window) * sizeof(secp256k1_gej) + entry_size + sizeof(struct secp256k1_pippenger_state));
if (space_overhead > max_alloc) {
break;
}
space_for_points = max_alloc - space_overhead;
n_points = space_for_points/entry_size;
n_points = n_points > max_points ? max_points : n_points;
if (n_points > res) {
res = n_points;
}
if (n_points < max_points) {
/* A larger bucket_window may support even more points. But if we
* would choose that then the caller couldn't safely use any number
* smaller than what this function returns */
break;
}
}
return res;
}
typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t); typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t);
static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
size_t i; size_t i;
int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t);
size_t max_points;
size_t n_batches; size_t n_batches;
size_t n_batch_points; size_t n_batch_points;
@ -883,25 +975,36 @@ static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp2
return 1; return 1;
} }
if(n <= ECMULT_PIPPENGER_THRESHOLD) { max_points = secp256k1_pippenger_max_points(scratch);
if(!secp256k1_ecmult_strauss_batch(ctx, scratch, r, inp_g_sc, cb, cbdata, n, 0)) { if (max_points == 0) {
return 0;
} else if (max_points > ECMULT_MAX_POINTS_PER_BATCH) {
max_points = ECMULT_MAX_POINTS_PER_BATCH;
}
n_batches = (n+max_points-1)/max_points;
n_batch_points = (n+n_batches-1)/n_batches;
if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) {
f = secp256k1_ecmult_pippenger_batch;
} else {
max_points = secp256k1_strauss_max_points(scratch);
if (max_points == 0) {
return 0; return 0;
} }
} else { n_batches = (n+max_points-1)/max_points;
n_batches = (n+MAX_BATCH_SIZE-1)/MAX_BATCH_SIZE;
n_batch_points = (n+n_batches-1)/n_batches; n_batch_points = (n+n_batches-1)/n_batches;
f = secp256k1_ecmult_strauss_batch;
}
for(i = 0; i < n_batches; i++) { for(i = 0; i < n_batches; i++) {
size_t nbp = n < n_batch_points ? n : n_batch_points; size_t nbp = n < n_batch_points ? n : n_batch_points;
size_t offset = n_batch_points*i; size_t offset = n_batch_points*i;
secp256k1_gej tmp; secp256k1_gej tmp;
if(!secp256k1_ecmult_pippenger_batch(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) {
return 0; return 0;
} }
secp256k1_gej_add_var(r, r, &tmp, NULL); secp256k1_gej_add_var(r, r, &tmp, NULL);
n -= nbp; n -= nbp;
} }
}
return 1; return 1;
} }

View File

@ -2783,8 +2783,56 @@ void test_ecmult_multi(secp256k1_scratch *scratch, secp256k1_ecmult_multi_func e
} }
} }
void test_secp256k1_pippenger_bucket_window_inv(void) {
int i;
CHECK(secp256k1_pippenger_bucket_window_inv(0) == 0);
for(i = 1; i <= PIPPENGER_MAX_BUCKET_WINDOW; i++) {
#ifdef USE_ENDOMORPHISM
/* Bucket_window of 8 is not used with endo */
if (i == 8) {
continue;
}
#endif
CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)) == i);
if (i != PIPPENGER_MAX_BUCKET_WINDOW) {
CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)+1) > i);
}
}
}
/**
* Probabilistically test the function returning the maximum number of possible points
* for a given scratch space.
*/
void test_ecmult_multi_pippenger_max_points(void) {
size_t scratch_size = secp256k1_rand_int(256);
size_t max_size = secp256k1_pippenger_scratch_size(secp256k1_pippenger_bucket_window_inv(PIPPENGER_MAX_BUCKET_WINDOW-1)+512, 12);
secp256k1_scratch *scratch;
size_t n_points_supported;
int bucket_window = 0;
for(; scratch_size < max_size; scratch_size+=256) {
scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size);
CHECK(scratch != NULL);
n_points_supported = secp256k1_pippenger_max_points(scratch);
if (n_points_supported == 0) {
secp256k1_scratch_destroy(scratch);
continue;
}
bucket_window = secp256k1_pippenger_bucket_window(n_points_supported);
CHECK(secp256k1_scratch_resize(scratch, secp256k1_pippenger_scratch_size(n_points_supported, bucket_window), PIPPENGER_SCRATCH_OBJECTS));
secp256k1_scratch_destroy(scratch);
}
CHECK(bucket_window == PIPPENGER_MAX_BUCKET_WINDOW);
}
/**
* Run secp256k1_ecmult_multi_var with num points and a scratch space restricted to
* 1 <= i <= num points.
*/
void test_ecmult_multi_batching(void) { void test_ecmult_multi_batching(void) {
static const int n_points = 3*MAX_BATCH_SIZE; static const int n_points = 2*ECMULT_PIPPENGER_THRESHOLD;
secp256k1_scalar scG; secp256k1_scalar scG;
secp256k1_scalar szero; secp256k1_scalar szero;
secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points); secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points);
@ -2795,18 +2843,21 @@ void test_ecmult_multi_batching(void) {
int i; int i;
secp256k1_scratch *scratch; secp256k1_scratch *scratch;
int test_n_points[] = { MAX_BATCH_SIZE, MAX_BATCH_SIZE + 1, MAX_BATCH_SIZE + 2, 2*MAX_BATCH_SIZE, 2*MAX_BATCH_SIZE+1, 3*MAX_BATCH_SIZE };
secp256k1_gej_set_infinity(&r2); secp256k1_gej_set_infinity(&r2);
secp256k1_scalar_set_int(&szero, 0); secp256k1_scalar_set_int(&szero, 0);
/* Get random scalars and group elements */ /* Get random scalars and group elements and compute result */
random_scalar_order(&scG); random_scalar_order(&scG);
secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG); secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG);
for(i = 0; i < n_points; i++) { for(i = 0; i < n_points; i++) {
secp256k1_ge ptg; secp256k1_ge ptg;
secp256k1_gej ptgj;
random_group_element_test(&ptg); random_group_element_test(&ptg);
secp256k1_gej_set_ge(&ptgj, &ptg);
pt[i] = ptg; pt[i] = ptg;
random_scalar_order(&sc[i]); random_scalar_order(&sc[i]);
secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i], NULL);
secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL);
} }
data.sc = sc; data.sc = sc;
data.pt = pt; data.pt = pt;
@ -2822,10 +2873,8 @@ void test_ecmult_multi_batching(void) {
CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1)); CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1));
secp256k1_scratch_destroy(scratch); secp256k1_scratch_destroy(scratch);
/* Run secp256k1_ecmult_multi_var with i points and a scratch space secp256k1_gej_neg(&r2, &r2);
* restricted to i points. */ for(i = 1; i <= n_points; i++) {
for(i = 1; i <= ECMULT_PIPPENGER_THRESHOLD+2; i++) {
secp256k1_gej ptgj;
if (i > ECMULT_PIPPENGER_THRESHOLD) { if (i > ECMULT_PIPPENGER_THRESHOLD) {
int bucket_window = secp256k1_pippenger_bucket_window(i); int bucket_window = secp256k1_pippenger_bucket_window(i);
size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window); size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window);
@ -2834,37 +2883,11 @@ void test_ecmult_multi_batching(void) {
size_t scratch_size = secp256k1_strauss_scratch_size(i); size_t scratch_size = secp256k1_strauss_scratch_size(i);
scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT);
} }
CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, i)); CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, n_points));
/* compute running result */
secp256k1_gej_set_ge(&ptgj, &pt[i-1]);
secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i-1], NULL);
secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL);
secp256k1_gej_neg(&r, &r);
secp256k1_gej_add_var(&r, &r, &r2, NULL); secp256k1_gej_add_var(&r, &r, &r2, NULL);
CHECK(secp256k1_gej_is_infinity(&r)); CHECK(secp256k1_gej_is_infinity(&r));
secp256k1_scratch_destroy(scratch); secp256k1_scratch_destroy(scratch);
} }
scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(n_points) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT);
for(i = 0; i < (int)(sizeof(test_n_points) / sizeof(test_n_points[0])); i++) {
secp256k1_gej ptgj;
CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i]-1));
secp256k1_gej_set_infinity(&r2);
secp256k1_gej_add_var(&r2, &r2, &r, NULL);
CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i]));
secp256k1_gej_set_ge(&ptgj, &pt[test_n_points[i]-1]);
secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[test_n_points[i]-1], NULL);
secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL);
secp256k1_gej_neg(&r, &r);
secp256k1_gej_add_var(&r, &r, &r2, NULL);
CHECK(secp256k1_gej_is_infinity(&r));
}
secp256k1_scratch_destroy(scratch);
free(sc); free(sc);
free(pt); free(pt);
} }
@ -2872,10 +2895,17 @@ void test_ecmult_multi_batching(void) {
void run_ecmult_multi_tests(void) { void run_ecmult_multi_tests(void) {
secp256k1_scratch *scratch; secp256k1_scratch *scratch;
test_secp256k1_pippenger_bucket_window_inv();
test_ecmult_multi_pippenger_max_points();
scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 819200); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 819200);
test_ecmult_multi(scratch, &secp256k1_ecmult_multi_var); test_ecmult_multi(scratch, secp256k1_ecmult_multi_var);
test_ecmult_multi(scratch, &secp256k1_ecmult_pippenger_batch_single); test_ecmult_multi(scratch, secp256k1_ecmult_pippenger_batch_single);
test_ecmult_multi(scratch, &secp256k1_ecmult_strauss_batch_single); test_ecmult_multi(scratch, secp256k1_ecmult_strauss_batch_single);
secp256k1_scratch_destroy(scratch);
/* Run test_ecmult_multi with space for exactly one point */
scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(1) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT);
test_ecmult_multi(scratch, secp256k1_ecmult_multi_var);
secp256k1_scratch_destroy(scratch); secp256k1_scratch_destroy(scratch);
test_ecmult_multi_batching(); test_ecmult_multi_batching();