Add profiling (#124)
* Start to add profiling * Add include flags for macos * Add profiling funcs for the other funcs * Add profiling readme * Move sentence * Add warning section * Make run_profiler phony * Delete extra empty lines * Add line about box size * No more dropped nodes/edges * Remove function focus * Add sha256 benchmark * Benchmark sha256 with different sizes * Clean things up a little
This commit is contained in:
parent
5a3e294beb
commit
5c1128e380
|
@ -1,8 +1,10 @@
|
|||
test_c_kzg_4844
|
||||
test_c_kzg_4844_cov
|
||||
test_c_kzg_4844_*
|
||||
coverage.html
|
||||
*.profraw
|
||||
*.profdata
|
||||
*.prof
|
||||
*.pdf
|
||||
*.o
|
||||
*.s
|
||||
*.a
|
||||
|
|
|
@ -42,6 +42,10 @@ var (
|
|||
settings = C.KZGSettings{}
|
||||
)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Public functions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
LoadTrustedSetup is the binding for:
|
||||
|
||||
|
@ -231,3 +235,24 @@ func VerifyAggregateKZGProof(blobs []Blob, commitmentsBytes []Bytes48, aggregate
|
|||
&settings)
|
||||
return bool(result), CKzgRet(ret)
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Private functions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
sha256 is the binding for:
|
||||
|
||||
void blst_sha256(
|
||||
byte out[32],
|
||||
const byte *msg,
|
||||
size_t msg_len);
|
||||
*/
|
||||
func sha256(msg []byte) Bytes32 {
|
||||
var out Bytes32
|
||||
C.blst_sha256(
|
||||
(*C.byte)(unsafe.Pointer(&out)),
|
||||
*(**C.byte)(unsafe.Pointer(&msg)),
|
||||
(C.size_t)(len(msg)))
|
||||
return out
|
||||
}
|
||||
|
|
|
@ -86,6 +86,23 @@ func GetRandBlob(seed int64) Blob {
|
|||
return blob
|
||||
}
|
||||
|
||||
/*
|
||||
HumanBytes will convert an integer to a human-readable value. Adapted from:
|
||||
https://programming.guide/go/formatting-byte-size-to-human-readable-format.html
|
||||
*/
|
||||
func HumanBytes(b int64) string {
|
||||
const unit = 1024
|
||||
if b < unit {
|
||||
return fmt.Sprintf("%dB", b)
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := b / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%v%cB", float64(b)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Tests
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -161,6 +178,10 @@ func Benchmark(b *testing.B) {
|
|||
trustedProof, _ := ComputeAggregateKZGProof(blobs[:1])
|
||||
proof := Bytes48(trustedProof)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Public functions
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
b.Run("BlobToKZGCommitment", func(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, ret := BlobToKZGCommitment(blobs[0])
|
||||
|
@ -199,4 +220,19 @@ func Benchmark(b *testing.B) {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Private functions
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
for i := 2; i <= 20; i += 2 {
|
||||
var numBytes = int64(1 << i)
|
||||
var bytes = make([]byte, numBytes)
|
||||
b.Run(fmt.Sprintf("sha256(size=%v)", HumanBytes(numBytes)), func(b *testing.B) {
|
||||
b.SetBytes(numBytes)
|
||||
for n := 0; n < b.N; n++ {
|
||||
sha256(bytes)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
40
src/Makefile
40
src/Makefile
|
@ -29,6 +29,13 @@ BLST = -L../lib -lblst
|
|||
#
|
||||
COVERAGE = -fprofile-instr-generate -fcoverage-mapping
|
||||
|
||||
#
|
||||
# Settings for performance profiling.
|
||||
#
|
||||
PROFILE = -DPROFILE
|
||||
PROFILER = -lprofiler
|
||||
PROFILER_OPTS = CPUPROFILE_FREQUENCY=1000000000
|
||||
|
||||
#
|
||||
# Platform specific options.
|
||||
#
|
||||
|
@ -37,6 +44,8 @@ ifneq ($(OS),Windows_NT)
|
|||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
XCRUN = xcrun
|
||||
PROFILE += -L$(shell brew --prefix gperftools)/lib
|
||||
PROFILE += -I$(shell brew --prefix gperftools)/include
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -50,10 +59,13 @@ all: c_kzg_4844.o
|
|||
@$(CC) $(CFLAGS) -c $<
|
||||
|
||||
test_c_kzg_4844: test_c_kzg_4844.c c_kzg_4844.c
|
||||
@$(CC) $(CFLAGS) -o $@ $< $(BLST)
|
||||
@$(CC) $(CFLAGS) -o $@ $< $(BLST)
|
||||
|
||||
test_c_kzg_4844_cov: test_c_kzg_4844.c c_kzg_4844.c
|
||||
@$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST)
|
||||
@$(CC) $(CFLAGS) $(COVERAGE) -o $@ $< $(BLST)
|
||||
|
||||
test_c_kzg_4844_prof: test_c_kzg_4844.c c_kzg_4844.c
|
||||
@$(CC) $(CFLAGS) $(PROFILE) -o $@ $< $(BLST) $(PROFILER)
|
||||
|
||||
.PHONY: blst
|
||||
blst:
|
||||
|
@ -68,17 +80,35 @@ test: test_c_kzg_4844
|
|||
|
||||
.PHONY: test_cov
|
||||
test_cov: test_c_kzg_4844_cov
|
||||
@LLVM_PROFILE_FILE="ckzg.profraw" ./test_c_kzg_4844_cov
|
||||
@LLVM_PROFILE_FILE="ckzg.profraw" ./$<
|
||||
@$(XCRUN) llvm-profdata merge --sparse ckzg.profraw -o ckzg.profdata
|
||||
@$(XCRUN) llvm-cov show --instr-profile=ckzg.profdata --format=html \
|
||||
$< c_kzg_4844.c > coverage.html
|
||||
@$(XCRUN) llvm-cov report --instr-profile=ckzg.profdata \
|
||||
--show-functions $< c_kzg_4844.c
|
||||
|
||||
.PHONY: run_profiler
|
||||
run_profiler: test_c_kzg_4844_prof
|
||||
@$(PROFILER_OPTS) ./$<
|
||||
|
||||
.PHONY: profile_%
|
||||
profile_%: run_profiler
|
||||
@echo Profiling $*...
|
||||
@pprof --pdf --nodefraction=0.00001 --edgefraction=0.00001 \
|
||||
./test_c_kzg_4844_prof $*.prof > $*.pdf
|
||||
|
||||
.PHONY: profile
|
||||
profile: \
|
||||
profile_blob_to_kzg_commitment \
|
||||
profile_verify_kzg_proof \
|
||||
profile_verify_aggregate_kzg_proof \
|
||||
profile_compute_kzg_proof \
|
||||
profile_compute_aggregate_kzg_proof
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
@rm -f *.o *.profraw *.profdata *.html \
|
||||
test_c_kzg_4844 test_c_kzg_4844_cov
|
||||
@rm -f *.o *.profraw *.profdata *.html xray-log.* *.prof *.pdf \
|
||||
test_c_kzg_4844 test_c_kzg_4844_cov test_c_kzg_4844_prof
|
||||
|
||||
.PHONY: format
|
||||
format:
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
# Profiling
|
||||
|
||||
We use [`gperftools`](https://github.com/gperftools/gperftools) (Google
|
||||
Performance Tools) for profiling. Note, we also considered using
|
||||
[`llvm-xray`](https://llvm.org/docs/XRay.html) but found it lacking in
|
||||
comparison. This will not tell you how long (wall clock time) each function
|
||||
took, but it will help you determine which functions are the most expensive.
|
||||
|
||||
## Prequisities
|
||||
|
||||
On Linux (Debian), you need to install:
|
||||
```
|
||||
sudo apt install gperftools graphviz
|
||||
```
|
||||
|
||||
On macOS, you need to install (via [homebrew](https://brew.sh)):
|
||||
```
|
||||
brew install gperftools ghostscript graphviz
|
||||
```
|
||||
|
||||
## How to run
|
||||
|
||||
### Generating profiling graphs
|
||||
|
||||
There is a Makefile rule that should just auto-magically work:
|
||||
```
|
||||
make profile
|
||||
```
|
||||
|
||||
For each profiled function, this will produce two files (a PROF and PDF
|
||||
file). The PROF file is the raw profiling data and the PDF is the
|
||||
human-friendly graph that generated from that profiling data.
|
||||
|
||||
#### Errors on macOS
|
||||
|
||||
Note, on macOS there may a lot of "errors" like:
|
||||
```
|
||||
otool-classic: can't open file: /usr/lib/libc++.1.dylib
|
||||
```
|
||||
|
||||
In my experience, you can ignore these. It's somewhat a known issue and may be
|
||||
resolved later. The PDFs should still generate successfully. I think it's the
|
||||
reason some function names are a hexadecimal address though.
|
||||
|
||||
### Viewing profiling graphs
|
||||
|
||||
On Linux, you can open an individual PDF file like:
|
||||
```
|
||||
xdg-open blob_to_kzg_commitment.pdf
|
||||
```
|
||||
|
||||
On macOS, you can open an individual PDF file like:
|
||||
```
|
||||
open blob_to_kzg_commitment.pdf
|
||||
```
|
||||
|
||||
Or, you can open all the PDF files like:
|
||||
```
|
||||
open *.pdf
|
||||
```
|
||||
|
||||
### Interpreting the profiling graphs
|
||||
|
||||
These might not make much sense without guidance. From a high-level, this works
|
||||
by polling the instruction pointer (what's being executed) at a specific rate
|
||||
(like once every 5 nanoseconds) and tracking this information. From this, you
|
||||
can infer the relative time each function uses by counting the number of samples
|
||||
that are in each function.
|
||||
|
||||
Given a box containing:
|
||||
```
|
||||
my_func 189 (0.6%) of 28758 (96.8%)
|
||||
```
|
||||
|
||||
* Each box is a unique function.
|
||||
* Bigger boxes are more expensive.
|
||||
* Lines between boxes are function calls.
|
||||
* 189 is the number of profiling samples in this function.
|
||||
* 0.6% is the percentage of profiling samples in the functions.
|
||||
* 28758 is the number of profiling samples in this function and its callees.
|
||||
* 96.8% is the percentage of profiling samples in this function and its callees.
|
|
@ -8,6 +8,10 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef PROFILE
|
||||
#include <gperftools/profiler.h>
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Globals
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -590,6 +594,96 @@ static void test_compute_and_verify_kzg_proof__succeeds_within_domain(void) {
|
|||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Profiling Functions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef PROFILE
|
||||
static void profile_blob_to_kzg_commitment(void) {
|
||||
Blob blob;
|
||||
Bytes32 field_element;
|
||||
KZGCommitment c;
|
||||
|
||||
get_rand_field_element(&field_element);
|
||||
memset(&blob, 0, sizeof(blob));
|
||||
memcpy(blob.bytes, field_element.bytes, BYTES_PER_FIELD_ELEMENT);
|
||||
|
||||
ProfilerStart("blob_to_kzg_commitment.prof");
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
blob_to_kzg_commitment(&c, &blob, &s);
|
||||
}
|
||||
ProfilerStop();
|
||||
}
|
||||
|
||||
static void profile_verify_kzg_proof(void) {
|
||||
Bytes32 z, y;
|
||||
Bytes48 commitment, proof;
|
||||
bool out;
|
||||
|
||||
get_rand_g1_bytes(&commitment);
|
||||
get_rand_field_element(&z);
|
||||
get_rand_field_element(&y);
|
||||
get_rand_g1_bytes(&proof);
|
||||
|
||||
ProfilerStart("verify_kzg_proof.prof");
|
||||
for (int i = 0; i < 5000; i++) {
|
||||
verify_kzg_proof(&out, &commitment, &z, &y, &proof, &s);
|
||||
}
|
||||
ProfilerStop();
|
||||
}
|
||||
|
||||
static void profile_verify_aggregate_kzg_proof(void) {
|
||||
int n = 16;
|
||||
Blob blobs[n];
|
||||
Bytes48 commitments[n];
|
||||
Bytes48 proof;
|
||||
bool out;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
get_rand_g1_bytes(&commitments[i]);
|
||||
get_rand_blob(&blobs[i]);
|
||||
}
|
||||
get_rand_g1_bytes(&proof);
|
||||
|
||||
ProfilerStart("verify_aggregate_kzg_proof.prof");
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
verify_aggregate_kzg_proof(&out, blobs, commitments, n, &proof, &s);
|
||||
}
|
||||
ProfilerStop();
|
||||
}
|
||||
|
||||
static void profile_compute_kzg_proof(void) {
|
||||
Blob blob;
|
||||
Bytes32 z;
|
||||
KZGProof out;
|
||||
|
||||
get_rand_blob(&blob);
|
||||
get_rand_field_element(&z);
|
||||
|
||||
ProfilerStart("compute_kzg_proof.prof");
|
||||
for (int i = 0; i < 100; i++) {
|
||||
compute_kzg_proof(&out, &blob, &z, &s);
|
||||
}
|
||||
ProfilerStop();
|
||||
}
|
||||
|
||||
static void profile_compute_aggregate_kzg_proof(void) {
|
||||
int n = 16;
|
||||
Blob blobs[n];
|
||||
KZGProof out;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
get_rand_blob(&blobs[i]);
|
||||
}
|
||||
|
||||
ProfilerStart("compute_aggregate_kzg_proof.prof");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
compute_aggregate_kzg_proof(&out, blobs, n, &s);
|
||||
}
|
||||
ProfilerStop();
|
||||
}
|
||||
#endif /* PROFILE */
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main logic
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -638,6 +732,20 @@ int main(void) {
|
|||
RUN(test_log_2_byte__expected_values);
|
||||
RUN(test_compute_and_verify_kzg_proof__succeeds_round_trip);
|
||||
RUN(test_compute_and_verify_kzg_proof__succeeds_within_domain);
|
||||
|
||||
/*
|
||||
* These functions are only executed if we're profiling. To me, it makes
|
||||
* sense to put these in the testing file so we can re-use the helper
|
||||
* functions. Additionally, it checks that whatever performance changes
|
||||
* haven't broken the library.
|
||||
*/
|
||||
#ifdef PROFILE
|
||||
profile_blob_to_kzg_commitment();
|
||||
profile_verify_kzg_proof();
|
||||
profile_verify_aggregate_kzg_proof();
|
||||
profile_compute_kzg_proof();
|
||||
profile_compute_aggregate_kzg_proof();
|
||||
#endif
|
||||
teardown();
|
||||
|
||||
return TEST_REPORT();
|
||||
|
|
Loading…
Reference in New Issue