Add profiling (#124)

* Start to add profiling

* Add include flags for macos

* Add profiling funcs for the other funcs

* Add profiling readme

* Move sentence

* Add warning section

* Make run_profiler phony

* Delete extra empty lines

* Add line about box size

* No more dropped nodes/edges

* Remove function focus

* Add sha256 benchmark

* Benchmark sha256 with different sizes

* Clean things up a little
This commit is contained in:
Justin Traglia 2023-02-10 14:33:32 -06:00 committed by GitHub
parent 5a3e294beb
commit 5c1128e380
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 288 additions and 6 deletions

4
.gitignore vendored
View File

@ -1,8 +1,10 @@
test_c_kzg_4844
test_c_kzg_4844_cov
test_c_kzg_4844_*
coverage.html
*.profraw
*.profdata
*.prof
*.pdf
*.o
*.s
*.a

View File

@ -42,6 +42,10 @@ var (
settings = C.KZGSettings{}
)
///////////////////////////////////////////////////////////////////////////////
// Public functions
///////////////////////////////////////////////////////////////////////////////
/*
LoadTrustedSetup is the binding for:
@ -231,3 +235,24 @@ func VerifyAggregateKZGProof(blobs []Blob, commitmentsBytes []Bytes48, aggregate
&settings)
return bool(result), CKzgRet(ret)
}
///////////////////////////////////////////////////////////////////////////////
// Private functions
///////////////////////////////////////////////////////////////////////////////
/*
sha256 is the binding for:
void blst_sha256(
byte out[32],
const byte *msg,
size_t msg_len);
*/
func sha256(msg []byte) Bytes32 {
var out Bytes32
C.blst_sha256(
(*C.byte)(unsafe.Pointer(&out)),
*(**C.byte)(unsafe.Pointer(&msg)),
(C.size_t)(len(msg)))
return out
}

View File

@ -86,6 +86,23 @@ func GetRandBlob(seed int64) Blob {
return blob
}
/*
HumanBytes will convert an integer to a human-readable value. Adapted from:
https://programming.guide/go/formatting-byte-size-to-human-readable-format.html
*/
func HumanBytes(b int64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%dB", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%v%cB", float64(b)/float64(div), "KMGTPE"[exp])
}
///////////////////////////////////////////////////////////////////////////////
// Tests
///////////////////////////////////////////////////////////////////////////////
@ -161,6 +178,10 @@ func Benchmark(b *testing.B) {
trustedProof, _ := ComputeAggregateKZGProof(blobs[:1])
proof := Bytes48(trustedProof)
///////////////////////////////////////////////////////////////////////////
// Public functions
///////////////////////////////////////////////////////////////////////////
b.Run("BlobToKZGCommitment", func(b *testing.B) {
for n := 0; n < b.N; n++ {
_, ret := BlobToKZGCommitment(blobs[0])
@ -199,4 +220,19 @@ func Benchmark(b *testing.B) {
}
})
}
///////////////////////////////////////////////////////////////////////////
// Private functions
///////////////////////////////////////////////////////////////////////////
for i := 2; i <= 20; i += 2 {
var numBytes = int64(1 << i)
var bytes = make([]byte, numBytes)
b.Run(fmt.Sprintf("sha256(size=%v)", HumanBytes(numBytes)), func(b *testing.B) {
b.SetBytes(numBytes)
for n := 0; n < b.N; n++ {
sha256(bytes)
}
})
}
}

View File

@ -29,6 +29,13 @@ BLST = -L../lib -lblst
#
COVERAGE = -fprofile-instr-generate -fcoverage-mapping
#
# Settings for performance profiling.
#
PROFILE = -DPROFILE
PROFILER = -lprofiler
PROFILER_OPTS = CPUPROFILE_FREQUENCY=1000000000
#
# Platform specific options.
#
@ -37,6 +44,8 @@ ifneq ($(OS),Windows_NT)
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
XCRUN = xcrun
PROFILE += -L$(shell brew --prefix gperftools)/lib
PROFILE += -I$(shell brew --prefix gperftools)/include
endif
endif
@ -50,10 +59,13 @@ all: c_kzg_4844.o
@$(CC) $(CFLAGS) -c $<
test_c_kzg_4844: test_c_kzg_4844.c c_kzg_4844.c
@$(CC) $(CFLAGS) -o $@ $< $(BLST)
@$(CC) $(CFLAGS) -o $@ $< $(BLST)
test_c_kzg_4844_cov: test_c_kzg_4844.c c_kzg_4844.c
@$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST)
@$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST)
test_c_kzg_4844_prof: test_c_kzg_4844.c c_kzg_4844.c
@$(CC) $(CFLAGS) $(PROFILE) -o $@ $< $(BLST) $(PROFILER)
.PHONY: blst
blst:
@ -68,17 +80,35 @@ test: test_c_kzg_4844
.PHONY: test_cov
test_cov: test_c_kzg_4844_cov
@LLVM_PROFILE_FILE="ckzg.profraw" ./test_c_kzg_4844_cov
@LLVM_PROFILE_FILE="ckzg.profraw" ./$<
@$(XCRUN) llvm-profdata merge --sparse ckzg.profraw -o ckzg.profdata
@$(XCRUN) llvm-cov show --instr-profile=ckzg.profdata --format=html \
$< c_kzg_4844.c > coverage.html
@$(XCRUN) llvm-cov report --instr-profile=ckzg.profdata \
--show-functions $< c_kzg_4844.c
.PHONY: run_profiler
run_profiler: test_c_kzg_4844_prof
@$(PROFILER_OPTS) ./$<
.PHONY: profile_%
profile_%: run_profiler
@echo Profiling $*...
@pprof --pdf --nodefraction=0.00001 --edgefraction=0.00001 \
./test_c_kzg_4844_prof $*.prof > $*.pdf
.PHONY: profile
profile: \
profile_blob_to_kzg_commitment \
profile_verify_kzg_proof \
profile_verify_aggregate_kzg_proof \
profile_compute_kzg_proof \
profile_compute_aggregate_kzg_proof
.PHONY: clean
clean:
@rm -f *.o *.profraw *.profdata *.html \
test_c_kzg_4844 test_c_kzg_4844_cov
@rm -f *.o *.profraw *.profdata *.html xray-log.* *.prof *.pdf \
test_c_kzg_4844 test_c_kzg_4844_cov test_c_kzg_4844_prof
.PHONY: format
format:

81
src/PROFILE.md Normal file
View File

@ -0,0 +1,81 @@
# Profiling
We use [`gperftools`](https://github.com/gperftools/gperftools) (Google
Performance Tools) for profiling. Note, we also considered using
[`llvm-xray`](https://llvm.org/docs/XRay.html) but found it lacking in
comparison. This will not tell you how long (wall clock time) each function
took, but it will help you determine which functions are the most expensive.
## Prequisities
On Linux (Debian), you need to install:
```
sudo apt install gperftools graphviz
```
On macOS, you need to install (via [homebrew](https://brew.sh)):
```
brew install gperftools ghostscript graphviz
```
## How to run
### Generating profiling graphs
There is a Makefile rule that should just auto-magically work:
```
make profile
```
For each profiled function, this will produce two files (a PROF and PDF
file). The PROF file is the raw profiling data and the PDF is the
human-friendly graph that generated from that profiling data.
#### Errors on macOS
Note, on macOS there may a lot of "errors" like:
```
otool-classic: can't open file: /usr/lib/libc++.1.dylib
```
In my experience, you can ignore these. It's somewhat a known issue and may be
resolved later. The PDFs should still generate successfully. I think it's the
reason some function names are a hexadecimal address though.
### Viewing profiling graphs
On Linux, you can open an individual PDF file like:
```
xdg-open blob_to_kzg_commitment.pdf
```
On macOS, you can open an individual PDF file like:
```
open blob_to_kzg_commitment.pdf
```
Or, you can open all the PDF files like:
```
open *.pdf
```
### Interpreting the profiling graphs
These might not make much sense without guidance. From a high-level, this works
by polling the instruction pointer (what's being executed) at a specific rate
(like once every 5 nanoseconds) and tracking this information. From this, you
can infer the relative time each function uses by counting the number of samples
that are in each function.
Given a box containing:
```
my_func 189 (0.6%) of 28758 (96.8%)
```
* Each box is a unique function.
* Bigger boxes are more expensive.
* Lines between boxes are function calls.
* 189 is the number of profiling samples in this function.
* 0.6% is the percentage of profiling samples in the functions.
* 28758 is the number of profiling samples in this function and its callees.
* 96.8% is the percentage of profiling samples in this function and its callees.

View File

@ -8,6 +8,10 @@
#include <stdio.h>
#include <string.h>
#ifdef PROFILE
#include <gperftools/profiler.h>
#endif
///////////////////////////////////////////////////////////////////////////////
// Globals
///////////////////////////////////////////////////////////////////////////////
@ -590,6 +594,96 @@ static void test_compute_and_verify_kzg_proof__succeeds_within_domain(void) {
}
}
///////////////////////////////////////////////////////////////////////////////
// Profiling Functions
///////////////////////////////////////////////////////////////////////////////
#ifdef PROFILE
static void profile_blob_to_kzg_commitment(void) {
Blob blob;
Bytes32 field_element;
KZGCommitment c;
get_rand_field_element(&field_element);
memset(&blob, 0, sizeof(blob));
memcpy(blob.bytes, field_element.bytes, BYTES_PER_FIELD_ELEMENT);
ProfilerStart("blob_to_kzg_commitment.prof");
for (int i = 0; i < 1000; i++) {
blob_to_kzg_commitment(&c, &blob, &s);
}
ProfilerStop();
}
static void profile_verify_kzg_proof(void) {
Bytes32 z, y;
Bytes48 commitment, proof;
bool out;
get_rand_g1_bytes(&commitment);
get_rand_field_element(&z);
get_rand_field_element(&y);
get_rand_g1_bytes(&proof);
ProfilerStart("verify_kzg_proof.prof");
for (int i = 0; i < 5000; i++) {
verify_kzg_proof(&out, &commitment, &z, &y, &proof, &s);
}
ProfilerStop();
}
static void profile_verify_aggregate_kzg_proof(void) {
int n = 16;
Blob blobs[n];
Bytes48 commitments[n];
Bytes48 proof;
bool out;
for (int i = 0; i < n; i++) {
get_rand_g1_bytes(&commitments[i]);
get_rand_blob(&blobs[i]);
}
get_rand_g1_bytes(&proof);
ProfilerStart("verify_aggregate_kzg_proof.prof");
for (int i = 0; i < 1000; i++) {
verify_aggregate_kzg_proof(&out, blobs, commitments, n, &proof, &s);
}
ProfilerStop();
}
static void profile_compute_kzg_proof(void) {
Blob blob;
Bytes32 z;
KZGProof out;
get_rand_blob(&blob);
get_rand_field_element(&z);
ProfilerStart("compute_kzg_proof.prof");
for (int i = 0; i < 100; i++) {
compute_kzg_proof(&out, &blob, &z, &s);
}
ProfilerStop();
}
static void profile_compute_aggregate_kzg_proof(void) {
int n = 16;
Blob blobs[n];
KZGProof out;
for (int i = 0; i < n; i++) {
get_rand_blob(&blobs[i]);
}
ProfilerStart("compute_aggregate_kzg_proof.prof");
for (int i = 0; i < 10; i++) {
compute_aggregate_kzg_proof(&out, blobs, n, &s);
}
ProfilerStop();
}
#endif /* PROFILE */
///////////////////////////////////////////////////////////////////////////////
// Main logic
///////////////////////////////////////////////////////////////////////////////
@ -638,6 +732,20 @@ int main(void) {
RUN(test_log_2_byte__expected_values);
RUN(test_compute_and_verify_kzg_proof__succeeds_round_trip);
RUN(test_compute_and_verify_kzg_proof__succeeds_within_domain);
/*
* These functions are only executed if we're profiling. To me, it makes
* sense to put these in the testing file so we can re-use the helper
* functions. Additionally, it checks that whatever performance changes
* haven't broken the library.
*/
#ifdef PROFILE
profile_blob_to_kzg_commitment();
profile_verify_kzg_proof();
profile_verify_aggregate_kzg_proof();
profile_compute_kzg_proof();
profile_compute_aggregate_kzg_proof();
#endif
teardown();
return TEST_REPORT();