diff --git a/.gitignore b/.gitignore index c999eff..f5bc502 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ test_c_kzg_4844 -test_c_kzg_4844_cov +test_c_kzg_4844_* coverage.html *.profraw *.profdata +*.prof +*.pdf *.o *.s *.a diff --git a/bindings/go/main.go b/bindings/go/main.go index a1c4e47..dcc6a37 100644 --- a/bindings/go/main.go +++ b/bindings/go/main.go @@ -42,6 +42,10 @@ var ( settings = C.KZGSettings{} ) +/////////////////////////////////////////////////////////////////////////////// +// Public functions +/////////////////////////////////////////////////////////////////////////////// + /* LoadTrustedSetup is the binding for: @@ -231,3 +235,24 @@ func VerifyAggregateKZGProof(blobs []Blob, commitmentsBytes []Bytes48, aggregate &settings) return bool(result), CKzgRet(ret) } + +/////////////////////////////////////////////////////////////////////////////// +// Private functions +/////////////////////////////////////////////////////////////////////////////// + +/* +sha256 is the binding for: + + void blst_sha256( + byte out[32], + const byte *msg, + size_t msg_len); +*/ +func sha256(msg []byte) Bytes32 { + var out Bytes32 + C.blst_sha256( + (*C.byte)(unsafe.Pointer(&out)), + *(**C.byte)(unsafe.Pointer(&msg)), + (C.size_t)(len(msg))) + return out +} diff --git a/bindings/go/main_test.go b/bindings/go/main_test.go index 08ccda1..f2a1d55 100644 --- a/bindings/go/main_test.go +++ b/bindings/go/main_test.go @@ -86,6 +86,23 @@ func GetRandBlob(seed int64) Blob { return blob } +/* +HumanBytes will convert an integer to a human-readable value. Adapted from: +https://programming.guide/go/formatting-byte-size-to-human-readable-format.html +*/ +func HumanBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%dB", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%v%cB", float64(b)/float64(div), "KMGTPE"[exp]) +} + /////////////////////////////////////////////////////////////////////////////// // Tests /////////////////////////////////////////////////////////////////////////////// @@ -161,6 +178,10 @@ func Benchmark(b *testing.B) { trustedProof, _ := ComputeAggregateKZGProof(blobs[:1]) proof := Bytes48(trustedProof) + /////////////////////////////////////////////////////////////////////////// + // Public functions + /////////////////////////////////////////////////////////////////////////// + b.Run("BlobToKZGCommitment", func(b *testing.B) { for n := 0; n < b.N; n++ { _, ret := BlobToKZGCommitment(blobs[0]) @@ -199,4 +220,19 @@ func Benchmark(b *testing.B) { } }) } + + /////////////////////////////////////////////////////////////////////////// + // Private functions + /////////////////////////////////////////////////////////////////////////// + + for i := 2; i <= 20; i += 2 { + var numBytes = int64(1 << i) + var bytes = make([]byte, numBytes) + b.Run(fmt.Sprintf("sha256(size=%v)", HumanBytes(numBytes)), func(b *testing.B) { + b.SetBytes(numBytes) + for n := 0; n < b.N; n++ { + sha256(bytes) + } + }) + } } diff --git a/src/Makefile b/src/Makefile index f3c0d65..71be0f7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,6 +29,13 @@ BLST = -L../lib -lblst # COVERAGE = -fprofile-instr-generate -fcoverage-mapping +# +# Settings for performance profiling. +# +PROFILE = -DPROFILE +PROFILER = -lprofiler +PROFILER_OPTS = CPUPROFILE_FREQUENCY=1000000000 + # # Platform specific options. # @@ -37,6 +44,8 @@ ifneq ($(OS),Windows_NT) UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Darwin) XCRUN = xcrun + PROFILE += -L$(shell brew --prefix gperftools)/lib + PROFILE += -I$(shell brew --prefix gperftools)/include endif endif @@ -50,10 +59,13 @@ all: c_kzg_4844.o @$(CC) $(CFLAGS) -c $< test_c_kzg_4844: test_c_kzg_4844.c c_kzg_4844.c - @$(CC) $(CFLAGS) -o $@ $< $(BLST) + @$(CC) $(CFLAGS) -o $@ $< $(BLST) test_c_kzg_4844_cov: test_c_kzg_4844.c c_kzg_4844.c - @$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST) + @$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST) + +test_c_kzg_4844_prof: test_c_kzg_4844.c c_kzg_4844.c + @$(CC) $(CFLAGS) $(PROFILE) -o $@ $< $(BLST) $(PROFILER) .PHONY: blst blst: @@ -68,17 +80,35 @@ test: test_c_kzg_4844 .PHONY: test_cov test_cov: test_c_kzg_4844_cov - @LLVM_PROFILE_FILE="ckzg.profraw" ./test_c_kzg_4844_cov + @LLVM_PROFILE_FILE="ckzg.profraw" ./$< @$(XCRUN) llvm-profdata merge --sparse ckzg.profraw -o ckzg.profdata @$(XCRUN) llvm-cov show --instr-profile=ckzg.profdata --format=html \ $< c_kzg_4844.c > coverage.html @$(XCRUN) llvm-cov report --instr-profile=ckzg.profdata \ --show-functions $< c_kzg_4844.c +.PHONY: run_profiler +run_profiler: test_c_kzg_4844_prof + @$(PROFILER_OPTS) ./$< + +.PHONY: profile_% +profile_%: run_profiler + @echo Profiling $*... + @pprof --pdf --nodefraction=0.00001 --edgefraction=0.00001 \ + ./test_c_kzg_4844_prof $*.prof > $*.pdf + +.PHONY: profile +profile: \ + profile_blob_to_kzg_commitment \ + profile_verify_kzg_proof \ + profile_verify_aggregate_kzg_proof \ + profile_compute_kzg_proof \ + profile_compute_aggregate_kzg_proof + .PHONY: clean clean: - @rm -f *.o *.profraw *.profdata *.html \ - test_c_kzg_4844 test_c_kzg_4844_cov + @rm -f *.o *.profraw *.profdata *.html xray-log.* *.prof *.pdf \ + test_c_kzg_4844 test_c_kzg_4844_cov test_c_kzg_4844_prof .PHONY: format format: diff --git a/src/PROFILE.md b/src/PROFILE.md new file mode 100644 index 0000000..e6ffa96 --- /dev/null +++ b/src/PROFILE.md @@ -0,0 +1,81 @@ +# Profiling + +We use [`gperftools`](https://github.com/gperftools/gperftools) (Google +Performance Tools) for profiling. Note, we also considered using +[`llvm-xray`](https://llvm.org/docs/XRay.html) but found it lacking in +comparison. This will not tell you how long (wall clock time) each function +took, but it will help you determine which functions are the most expensive. + +## Prequisities + +On Linux (Debian), you need to install: +``` +sudo apt install gperftools graphviz +``` + +On macOS, you need to install (via [homebrew](https://brew.sh)): +``` +brew install gperftools ghostscript graphviz +``` + +## How to run + +### Generating profiling graphs + +There is a Makefile rule that should just auto-magically work: +``` +make profile +``` + +For each profiled function, this will produce two files (a PROF and PDF +file). The PROF file is the raw profiling data and the PDF is the +human-friendly graph that generated from that profiling data. + +#### Errors on macOS + +Note, on macOS there may a lot of "errors" like: +``` +otool-classic: can't open file: /usr/lib/libc++.1.dylib +``` + +In my experience, you can ignore these. It's somewhat a known issue and may be +resolved later. The PDFs should still generate successfully. I think it's the +reason some function names are a hexadecimal address though. + +### Viewing profiling graphs + +On Linux, you can open an individual PDF file like: +``` +xdg-open blob_to_kzg_commitment.pdf +``` + +On macOS, you can open an individual PDF file like: +``` +open blob_to_kzg_commitment.pdf +``` + +Or, you can open all the PDF files like: +``` +open *.pdf +``` + +### Interpreting the profiling graphs + +These might not make much sense without guidance. From a high-level, this works +by polling the instruction pointer (what's being executed) at a specific rate +(like once every 5 nanoseconds) and tracking this information. From this, you +can infer the relative time each function uses by counting the number of samples +that are in each function. + +Given a box containing: +``` +my_func 189 (0.6%) of 28758 (96.8%) +``` + +* Each box is a unique function. +* Bigger boxes are more expensive. +* Lines between boxes are function calls. +* 189 is the number of profiling samples in this function. +* 0.6% is the percentage of profiling samples in the functions. +* 28758 is the number of profiling samples in this function and its callees. +* 96.8% is the percentage of profiling samples in this function and its callees. \ No newline at end of file diff --git a/src/test_c_kzg_4844.c b/src/test_c_kzg_4844.c index 6b862c1..499669e 100644 --- a/src/test_c_kzg_4844.c +++ b/src/test_c_kzg_4844.c @@ -8,6 +8,10 @@ #include #include +#ifdef PROFILE +#include +#endif + /////////////////////////////////////////////////////////////////////////////// // Globals /////////////////////////////////////////////////////////////////////////////// @@ -590,6 +594,96 @@ static void test_compute_and_verify_kzg_proof__succeeds_within_domain(void) { } } +/////////////////////////////////////////////////////////////////////////////// +// Profiling Functions +/////////////////////////////////////////////////////////////////////////////// + +#ifdef PROFILE +static void profile_blob_to_kzg_commitment(void) { + Blob blob; + Bytes32 field_element; + KZGCommitment c; + + get_rand_field_element(&field_element); + memset(&blob, 0, sizeof(blob)); + memcpy(blob.bytes, field_element.bytes, BYTES_PER_FIELD_ELEMENT); + + ProfilerStart("blob_to_kzg_commitment.prof"); + for (int i = 0; i < 1000; i++) { + blob_to_kzg_commitment(&c, &blob, &s); + } + ProfilerStop(); +} + +static void profile_verify_kzg_proof(void) { + Bytes32 z, y; + Bytes48 commitment, proof; + bool out; + + get_rand_g1_bytes(&commitment); + get_rand_field_element(&z); + get_rand_field_element(&y); + get_rand_g1_bytes(&proof); + + ProfilerStart("verify_kzg_proof.prof"); + for (int i = 0; i < 5000; i++) { + verify_kzg_proof(&out, &commitment, &z, &y, &proof, &s); + } + ProfilerStop(); +} + +static void profile_verify_aggregate_kzg_proof(void) { + int n = 16; + Blob blobs[n]; + Bytes48 commitments[n]; + Bytes48 proof; + bool out; + + for (int i = 0; i < n; i++) { + get_rand_g1_bytes(&commitments[i]); + get_rand_blob(&blobs[i]); + } + get_rand_g1_bytes(&proof); + + ProfilerStart("verify_aggregate_kzg_proof.prof"); + for (int i = 0; i < 1000; i++) { + verify_aggregate_kzg_proof(&out, blobs, commitments, n, &proof, &s); + } + ProfilerStop(); +} + +static void profile_compute_kzg_proof(void) { + Blob blob; + Bytes32 z; + KZGProof out; + + get_rand_blob(&blob); + get_rand_field_element(&z); + + ProfilerStart("compute_kzg_proof.prof"); + for (int i = 0; i < 100; i++) { + compute_kzg_proof(&out, &blob, &z, &s); + } + ProfilerStop(); +} + +static void profile_compute_aggregate_kzg_proof(void) { + int n = 16; + Blob blobs[n]; + KZGProof out; + + for (int i = 0; i < n; i++) { + get_rand_blob(&blobs[i]); + } + + ProfilerStart("compute_aggregate_kzg_proof.prof"); + for (int i = 0; i < 10; i++) { + compute_aggregate_kzg_proof(&out, blobs, n, &s); + } + ProfilerStop(); +} +#endif /* PROFILE */ + /////////////////////////////////////////////////////////////////////////////// // Main logic /////////////////////////////////////////////////////////////////////////////// @@ -638,6 +732,20 @@ int main(void) { RUN(test_log_2_byte__expected_values); RUN(test_compute_and_verify_kzg_proof__succeeds_round_trip); RUN(test_compute_and_verify_kzg_proof__succeeds_within_domain); + + /* + * These functions are only executed if we're profiling. To me, it makes + * sense to put these in the testing file so we can re-use the helper + * functions. Additionally, it checks that whatever performance changes + * haven't broken the library. + */ +#ifdef PROFILE + profile_blob_to_kzg_commitment(); + profile_verify_kzg_proof(); + profile_verify_aggregate_kzg_proof(); + profile_compute_kzg_proof(); + profile_compute_aggregate_kzg_proof(); +#endif teardown(); return TEST_REPORT();