create a "bundle" from all the C files (better inlining opportunities for the C compiler); also add a simple benchmark for the C FFT implementation

This commit is contained in:
Balazs Komuves 2025-11-05 17:06:24 +01:00
parent 0eb39eb5c9
commit 0edb80b6b6
No known key found for this signature in database
GPG Key ID: F63B7AEF18435562
18 changed files with 127 additions and 17 deletions

View File

@ -268,6 +268,7 @@ instance Binary FriProof where
--------------------------------------------------------------------------------
{-
estimateFriProofSize :: FriConfig -> Int
estimateFriProofSize friConfig@(MkFriConfig{..}) = total where
@ -308,3 +309,5 @@ data FriProof = MkFriProof
, proofQueryRounds :: [FriQueryRound] -- ^ query rounds
, proofPowWitness :: F -- ^ witness showing that the prover did PoW
}
-}

View File

@ -1,14 +1,14 @@
{-# LANGUAGE CPP #-}
#ifdef USE_NAIVE_HASKELL
-- #ifdef USE_NAIVE_HASKELL
module NTT.FFT ( module NTT.FFT.Slow ) where
import NTT.FFT.Slow
#else
module NTT.FFT ( module NTT.FFT.Fast ) where
import NTT.FFT.Fast
#endif
-- #else
--
-- module NTT.FFT ( module NTT.FFT.Fast ) where
-- import NTT.FFT.Fast
--
-- #endif

View File

@ -16,7 +16,7 @@ import System.IO.Unsafe
import Data.Flat
import NTT.Poly
import NTT.Poly.Flat
import NTT.Subgroup
import Field.Goldilocks
import Misc

View File

@ -1,13 +1,13 @@
{-# LANGUAGE CPP #-}
#ifdef USE_NAIVE_HASKELL
-- #ifdef USE_NAIVE_HASKELL
module NTT.Poly ( module NTT.Poly.Naive ) where
import NTT.Poly.Naive
#else
module NTT.Poly ( module NTT.Poly.Flat ) where
import NTT.Poly.Flat
#endif
-- #else
--
-- module NTT.Poly ( module NTT.Poly.Flat ) where
-- import NTT.Poly.Flat
--
-- #endif

View File

@ -26,7 +26,13 @@ import Data.Flat as L
--------------------------------------------------------------------------------
newtype Poly a = MkPoly (L.FlatArray a)
newtype Poly a
= MkPoly (L.FlatArray a)
deriving Show
-- TEMPORARY HACK
instance (Eq a, Flat a) => Eq (Poly a) where
p == q = (coeffs p == coeffs q)
pattern XPoly n arr = MkPoly (L.MkFlatArray n arr)

1
reference/src/cbits/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
a.out

View File

@ -0,0 +1,59 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
//#include "goldilocks.h"
//#include "ntt.h"
#include "bundle.h"
//------------------------------------------------------------------------------
const int LOGN = 20;
const int N = (1<<LOGN);
const int NREPEAT = 20;
const uint64_t generator_size_32 = 0x185629dcda58878cULL;
//------------------------------------------------------------------------------
void print_time(const char *prefix, clock_t start, clock_t end) {
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
double msec_per_megabyte = 1000 * cpu_time_used * 1024 * 1024 / (8 * N * NREPEAT );
double megabyte_per_sec = 1000 / msec_per_megabyte;
printf("repeating %d %s of size 2^%d took %0.4f seconds (%0.0f MB/sec)\n", NREPEAT, prefix, LOGN, cpu_time_used, megabyte_per_sec);
}
int main() {
clock_t start, end;
uint64_t *src = (uint64_t*) malloc( 8 * N );
uint64_t *tgt = (uint64_t*) malloc( 8 * N );
for(int i=0; i<N; i++) {
src[i] = 101 + 7*i;
}
uint64_t gen = goldilocks_pow( generator_size_32 , 1<<(32-LOGN) );
start = clock();
for(int k=0; k<(NREPEAT/2); k++) {
goldilocks_ntt_forward( LOGN, gen, src, tgt );
goldilocks_ntt_forward( LOGN, gen, tgt, src );
}
end = clock();
print_time("forward NTT", start, end);
start = clock();
for(int k=0; k<(NREPEAT/2); k++) {
goldilocks_ntt_inverse( LOGN, gen, src, tgt );
goldilocks_ntt_inverse( LOGN, gen, tgt, src );
}
end = clock();
print_time("inverse NTT", start, end);
return 0;
}

View File

@ -0,0 +1,9 @@
// gcc can do better inlineing if everything is in the same file
// this can win like 33% speedup for the naive FFT algorithm...
#include "goldilocks.c"
#include "goldilocks_ext.c"
#include "monolith.c"
#include "ntt.c"
#include "short_dft.c"

View File

@ -0,0 +1,6 @@
#include "goldilocks.h"
#include "goldilocks_ext.h"
#include "monolith.h"
#include "ntt.h"
#include "short_dft.h"

View File

@ -5,4 +5,6 @@ gcc -c -O2 goldilocks_ext.c
gcc -c -O2 monolith.c
gcc -c -O2 ntt.c
gcc -c -O2 short_dft.c
gcc -c -O2 bundle.c
gcc -O2 bench_fft.c bundle.c

View File

@ -1,6 +1,8 @@
// the "Goldilocks" prime field of size `p = 2^64 - 2^32 + 1`
#ifndef _GOLDILOCKS_H_INCLUDED_
#include <stdint.h>
//------------------------------------------------------------------------------
@ -44,3 +46,5 @@ void goldilocks_convert_31_bytes_to_4_field_elements ( const uint8_t *ptr,
void goldilocks_convert_bytes_to_field_elements ( int rate, const uint8_t *ptr, uint64_t *felts );
//------------------------------------------------------------------------------
#endif // _GOLDILOCKS_H_INCLUDED_

View File

@ -1,6 +1,8 @@
// quadratic field extension F[x] = F(x) / (x^2 - 7) over the Goldilocks field
#ifndef _GOLDILOCKS_EXT_H_INCLUDED_
#include <stdint.h>
#include "goldilocks.h"
@ -21,3 +23,4 @@ void goldilocks_ext_pow(const uint64_t *b , int e , uint64_t *out);
//------------------------------------------------------------------------------
#endif // _GOLDILOCKS_EXT_H_INCLUDED_

View File

@ -1,4 +1,6 @@
// Monolith permutation and hash function
#include <assert.h>
#include <string.h>
@ -242,3 +244,4 @@ void goldilocks_monolith_bytes_digest(int rate, int N, const uint8_t *input, uin
}
//------------------------------------------------------------------------------

View File

@ -1,4 +1,7 @@
#ifndef _MONOLITH_H_INCLUDED_
#include <stdint.h>
//------------------------------------------------------------------------------
@ -11,3 +14,5 @@ void goldilocks_monolith_bytes_digest (int rate, int N, const uint8_t *input
void goldilocks_monolith_felts_digest (int rate, int N, const uint64_t *input, uint64_t *hash);
//------------------------------------------------------------------------------
#endif // _MONOLITH_H_INCLUDED_

View File

@ -1,4 +1,5 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

View File

@ -1,4 +1,6 @@
#ifndef _NTT_H_INCLUDED_
#include <stdint.h>
//------------------------------------------------------------------------------
@ -12,3 +14,5 @@ void goldilocks_ntt_inverse ( int m, uint64_t gen, const
void goldilocks_ntt_inverse_shifted (uint64_t eta, int m, uint64_t gen, const uint64_t *src, uint64_t *tgt);
//------------------------------------------------------------------------------
#endif // _NTT_H_INCLUDED_

View File

@ -1,4 +1,6 @@
#ifndef _SHORT_DFT_H_INCLUDED_
#include <stdint.h>
//------------------------------------------------------------------------------
@ -22,3 +24,5 @@ void short_inv_DFT_size_16_rescaled( int src_stride, int tgt_stride, uint64_t *s
// void short_inv_DFT_size_4_ext_rescaled( int src_stride, int tgt_stride, uint64_t *src, uint64_t *tgt );
//------------------------------------------------------------------------------
#endif // _SHORT_DFT_H_INCLUDED_

View File

@ -1,3 +1,3 @@
#!/bin/bash
ghci testMain.hs cbits/goldilocks.o cbits/goldilocks_ext.o cbits/monolith.o cbits/ntt.o cbits/short_dft.o
ghci testMain.hs cbits/bundle.o