2025-11-04 10:58:02 +01:00

228 lines
7.0 KiB
C

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "goldilocks.h"
#include "ntt.h"
// -----------------------------------------------------------------------------
void goldilocks_ntt_forward_noalloc(int m, int src_stride, const uint64_t *gpows, const uint64_t *src, uint64_t *buf, uint64_t *tgt) {
if (m==0) {
tgt[0] = src[0];
return;
}
if (m==1) {
// N = 2
tgt[0] = goldilocks_add( src[0] , src[src_stride] ); // x + y
tgt[1] = goldilocks_sub( src[0] , src[src_stride] ); // x - y
return;
}
else {
int N = (1<< m );
int halfN = (1<<(m-1));
goldilocks_ntt_forward_noalloc( m-1 , src_stride<<1 , gpows , src , buf + N , buf );
goldilocks_ntt_forward_noalloc( m-1 , src_stride<<1 , gpows , src + src_stride , buf + N , buf + halfN );
for(int j=0; j<halfN; j++) {
const uint64_t gpow = gpows[j*src_stride];
tgt[j ] = goldilocks_mul( buf[j+halfN] , gpow ); // g*v[k]
tgt[j+halfN] = goldilocks_neg( tgt[j ] ); // - g*v[k]
tgt[j ] = goldilocks_add( tgt[j ] , buf[j] ); // u[k] + g*v[k]
tgt[j+halfN] = goldilocks_add( tgt[j+halfN] , buf[j] ); // u[k] - g*v[k]
}
}
}
// forward number-theoretical transform (evaluation of a polynomial)
// `src` and `tgt` should be `N = 2^m` sized arrays of field elements
// `gen` should be the generator of the multiplicative subgroup sized `N`
void goldilocks_ntt_forward(int m, const uint64_t gen, const uint64_t *src, uint64_t *tgt) {
int N = (1<<m);
int halfN = (N>>1);
// precalculate [1,g,g^2,g^3...]
uint64_t *gpows = (uint64_t*) malloc( 8 * halfN );
assert( gpows != 0 );
uint64_t x = gen;
gpows[0] = 1;
gpows[1] = gen;
for(int i=2; i<halfN; i++) {
x = goldilocks_mul( x , gen );
gpows[i] = x;
}
uint64_t *buf = (uint64_t*) malloc( 8 * (2*N) );
assert( buf != 0 );
goldilocks_ntt_forward_noalloc( m, 1, gpows, src, buf, tgt);
free(buf);
free(gpows);
}
// it's like `ntt_forward` but we pre-multiply the coefficients with `eta^k`
// resulting in evaluating f(eta*x) instead of f(x)
void goldilocks_ntt_forward_shifted(const uint64_t eta, int m, const uint64_t gen, const uint64_t *src, uint64_t *tgt) {
int N = (1<<m);
uint64_t *shifted = malloc( 8 * N );
assert( shifted != 0 );
uint64_t x = 1;
for(int i=0; i<N; i++) {
shifted[i] = goldilocks_mul( src[i] , x );
x = goldilocks_mul( x , eta );
}
goldilocks_ntt_forward( m, gen, shifted, tgt );
free(shifted);
}
// it's like `ntt_forward` but asymmetric, evaluating on a larger target subgroup
void goldilocks_ntt_forward_asymmetric(int m_src, int m_tgt, const uint64_t gen_src, const uint64_t gen_tgt, const uint64_t *src, uint64_t *tgt) {
assert( m_tgt >= m_src );
int N_src = (1 << m_src);
int N_tgt = (1 << m_tgt);
int halfN_src = (N_src >> 1);
int K = (1 << (m_tgt - m_src));
// precalculate [1,g,g^2,g^3...]
uint64_t *gpows = malloc( 8 * halfN_src );
assert( gpows != 0 );
uint64_t x = gen_src;
gpows[0] = 1;
gpows[1] = gen_src;
for(int i=2; i<halfN_src; i++) {
x = goldilocks_mul(x, gen_src);
gpows[i] = x;
}
uint64_t *shifted = malloc( 8 * N_src );
assert( shifted != 0 );
uint64_t *buf = malloc( 8 * (2*N_src) );
assert( buf != 0 );
// temporary target buffer (we could replace this by adding `tgt_stride`)
uint64_t *tgt_small = malloc( 8 * N_src );
assert( tgt_small != 0 );
// eta will be the shift
uint64_t eta = 1;
for(int k=0; k<K; k++) {
if (k==0) {
memcpy( shifted, src, N_src*8 );
}
else {
eta = goldilocks_mul( eta , gen_tgt );
uint64_t x = 1;
for(int i=0; i<N_src; i++) {
shifted[i] = goldilocks_mul( src[i] , x );
x = goldilocks_mul(x, eta);
}
}
goldilocks_ntt_forward_noalloc( m_src, 1, gpows, shifted, buf, tgt_small );
uint64_t *p = tgt_small;
uint64_t *q = tgt + k;
int tgt_stride = K;
for(int i=0; i<N_src; i++) {
q[i] = p[i];
p += 1;
q += tgt_stride;
}
}
free(tgt_small);
free(buf);
free(gpows);
free(shifted);
}
// -----------------------------------------------------------------------------
// inverse of 2 (which is is the same as `(p+1)/2`)
const uint64_t goldilocks_oneHalf = 0x7fffffff80000001ull;
void goldilocks_ntt_inverse_noalloc(int m, int tgt_stride, const uint64_t *gpows, const uint64_t *src, uint64_t *buf, uint64_t *tgt) {
if (m==0) {
tgt[0] = src[0];
return;
}
if (m==1) {
// N = 2
tgt[0 ] = goldilocks_add( src[0] , src[1] ); // x + y
tgt[tgt_stride] = goldilocks_sub( src[0] , src[1] ); // x - y
tgt[0 ] = goldilocks_div_by_2( tgt[0 ] ); // (x + y)/2
tgt[tgt_stride] = goldilocks_div_by_2( tgt[tgt_stride] ); // (x - y)/2
return;
}
else {
int N = (1<< m );
int halfN = (1<<(m-1));
for(int j=0; j<halfN; j++) {
uint64_t gpow = gpows[j*tgt_stride];
buf[j ] = goldilocks_add( src[j] , src[j+halfN] ); // x + y
buf[j+halfN] = goldilocks_sub( src[j] , src[j+halfN] ); // x - y
buf[j ] = goldilocks_div_by_2( buf[j ] ); // (x + y) / 2
buf[j+halfN] = goldilocks_mul ( buf[j+halfN] , gpow ); // (x - y) / (2*g^k)
}
goldilocks_ntt_inverse_noalloc( m-1 , tgt_stride<<1 , gpows , buf , buf + N , tgt );
goldilocks_ntt_inverse_noalloc( m-1 , tgt_stride<<1 , gpows , buf + halfN , buf + N , tgt + tgt_stride );
}
}
// inverse number-theoretical transform (interpolation of a polynomial)
// `src` and `tgt` should be `N = 2^m` sized arrays of field elements
// `gen` should be the generator of the multiplicative subgroup sized `N`
void goldilocks_ntt_inverse(int m, const uint64_t gen, const uint64_t *src, uint64_t *tgt) {
int N = (1<<m);
int halfN = (N>>1);
// precalculate [1/2,g^{-1}/2,g^{-2}/2,g^{-3}/2...]
uint64_t *gpows = malloc( 8 * halfN );
assert( gpows != 0 );
uint64_t x = goldilocks_oneHalf; // 1/2
uint64_t ginv = goldilocks_inv(gen); // gen^-1
for(int i=0; i<halfN; i++) {
gpows[i] = x;
x = goldilocks_mul(x, ginv);
}
uint64_t *buf = malloc( 8 * (2*N) );
assert( buf !=0 );
goldilocks_ntt_inverse_noalloc( m, 1, gpows, src, buf, tgt );
free(buf);
free(gpows);
}
// it's like `ntt_inverse` but we post-multiply the resulting coefficients with `eta^k`
// resulting in interpolating an f such that f(eta^-1 * omega^k) = y_k
void goldilocks_ntt_inverse_shifted(const uint64_t eta, int m, const uint64_t gen, const uint64_t *src, uint64_t *tgt) {
int N = (1<<m);
uint64_t *unshifted = malloc( 8*N );
assert( unshifted != 0 );
goldilocks_ntt_inverse( m, gen, src, unshifted );
uint64_t x = 1;
for(int i=0; i<N; i++) {
tgt[i] = goldilocks_mul( unshifted[i] , x );
x = goldilocks_mul( x , eta );
}
free(unshifted);
}
// -----------------------------------------------------------------------------