diff --git a/.github/actions/compile-witness-generator/action.yml b/.github/actions/compile-witness-generator/action.yml index c3f3d15..f439b50 100644 --- a/.github/actions/compile-witness-generator/action.yml +++ b/.github/actions/compile-witness-generator/action.yml @@ -104,6 +104,16 @@ runs: if [ "$OS" = "macos" ]; then SED_I="sed -i ''"; fi $SED_I ':a;N;$!ba;s/\n}\n\n*$/\n return 0;\n}/' "${CIRCUIT_CPP_PATH}/main.cpp" + # circom's --c backend emits an empty ~Circom_CalcWit destructor (the binary + # exits after one witness); linked as a library it leaks the signal buffer on + # every call. Patch the destructor to free the per-call allocations. + - name: Patch ${{ inputs.circuit-name-display }} CalcWit Leak + shell: bash + env: + WITNESS_GENERATOR_RESOURCES_PATH: ${{ steps.parse-circuit-path.outputs.WITNESS_GENERATOR_RESOURCES_PATH }} + CIRCUIT_CPP_PATH: ${{ steps.parse-circuit-path.outputs.CIRCUIT_CPP_PATH }} + run: sh "${WITNESS_GENERATOR_RESOURCES_PATH}/fix_calcwit_leak.sh" "${CIRCUIT_CPP_PATH}" + - name: Install llvm-objcopy (macOS) if: ${{ inputs.os == 'macos' }} shell: bash diff --git a/.github/resources/witness-generator/fix_calcwit_leak.sh b/.github/resources/witness-generator/fix_calcwit_leak.sh new file mode 100755 index 0000000..7f65abe --- /dev/null +++ b/.github/resources/witness-generator/fix_calcwit_leak.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# Patch the circom-generated `Circom_CalcWit` destructor to free the per-call +# allocations made in its constructor. +# +# circom's `--c` backend emits an EMPTY `~Circom_CalcWit()` destructor on +# purpose: the generated witness-generator *binary* computes a single witness +# and then exits, so the OS reclaims everything. We link the generated code as +# an in-process *library* and call it once per proof, so that empty destructor +# leaks the entire signal buffer (`signalValues`, ~total_signals * sizeof(Fr) = +# megabytes), plus `componentMemory` (and its per-component sub-buffers) and +# `inputSignalAssigned`, on EVERY call. +# +# This rewrites the destructor to free those allocations. The per-component +# frees mirror circom's own `release_memory_component` (guarded `if(ptr) +# delete[]`), so they are safe even if the generated run already released some +# components. +# +# Usage: fix_calcwit_leak.sh +set -eu + +DIR="${1:?usage: fix_calcwit_leak.sh }" +CALCWIT="$DIR/calcwit.cpp" + +[ -f "$CALCWIT" ] || { echo "fix_calcwit_leak: $CALCWIT not found" >&2; exit 1; } + +if grep -q "logos: free per-call allocations" "$CALCWIT"; then + echo "fix_calcwit_leak: $CALCWIT already patched, skipping" + exit 0 +fi + +perl -0777 -i -pe ' + my $body = q{Circom_CalcWit::~Circom_CalcWit() { + // logos: free per-call allocations. circom leaves this destructor empty + // because the generated binary exits after one witness; we call the + // generated code in-process as a library, so without these frees every + // witness-generation call leaks signalValues (megabytes), componentMemory + // and inputSignalAssigned. These three are allocated once in the constructor + // and freed nowhere else. NOTE: the per-component sub-buffers + // (subcomponents/outputIsSet/mutexes/...) are already released during the + // witness computation by circoms own release_memory_component, so they must + // NOT be freed here -- doing so double-frees. + delete[] signalValues; + delete[] inputSignalAssigned; + delete[] componentMemory; +}}; + my $n = (s/Circom_CalcWit::~Circom_CalcWit\s*\(\s*\)\s*\{.*?\n\}/$body/s); + die "fix_calcwit_leak: could not locate ~Circom_CalcWit() destructor\n" unless $n == 1; +' "$CALCWIT" + +echo "fix_calcwit_leak: patched $CALCWIT" diff --git a/justfile b/justfile index d6efac5..9949283 100644 --- a/justfile +++ b/justfile @@ -1,6 +1,7 @@ cargo_root := justfile_directory() + "/rust" src := justfile_directory() + "/src" ci_makefile := justfile_directory() + "/.github/resources/witness-generator/Makefile" +calcwit_fix := justfile_directory() + "/.github/resources/witness-generator/fix_calcwit_leak.sh" circom_version := "2.2.2" # This version must match the version used in the CI os := `uname -s` @@ -34,6 +35,8 @@ poq: check-circom circom blend/poq.circom --c --r1cs --no_asm --O2 --output blend # circom-generated main() has no return on the success path; patch it before -O3 turns it into an infinite loop {{sed_i}} ':a;N;$!ba;s/\n}\n\n*$/\n return 0;\n}/' blend/poq_cpp/main.cpp + # circom's empty ~Circom_CalcWit destructor leaks per call when linked as a library; free the per-call allocations + sh {{calcwit_fix}} blend/poq_cpp cp -r {{src}}/poq blend/poq_cpp/poq cp {{src}}/circom_adapter.cpp {{src}}/circom_adapter.hpp {{src}}/circom_fwd.hpp {{src}}/types.hpp {{src}}/assert.h blend/poq_cpp/ cp {{ci_makefile}} blend/poq_cpp/Makefile @@ -50,6 +53,8 @@ pol: check-circom circom mantle/pol.circom --c --r1cs --no_asm --O2 --output mantle # circom-generated main() has no return on the success path; patch it before -O3 turns it into an infinite loop {{sed_i}} ':a;N;$!ba;s/\n}\n\n*$/\n return 0;\n}/' mantle/pol_cpp/main.cpp + # circom's empty ~Circom_CalcWit destructor leaks per call when linked as a library; free the per-call allocations + sh {{calcwit_fix}} mantle/pol_cpp cp -r {{src}}/pol mantle/pol_cpp/pol cp {{src}}/circom_adapter.cpp {{src}}/circom_adapter.hpp {{src}}/circom_fwd.hpp {{src}}/types.hpp {{src}}/assert.h mantle/pol_cpp/ cp {{ci_makefile}} mantle/pol_cpp/Makefile @@ -66,6 +71,8 @@ poc: check-circom circom mantle/poc.circom --c --r1cs --no_asm --O2 --output mantle # circom-generated main() has no return on the success path; patch it before -O3 turns it into an infinite loop {{sed_i}} ':a;N;$!ba;s/\n}\n\n*$/\n return 0;\n}/' mantle/poc_cpp/main.cpp + # circom's empty ~Circom_CalcWit destructor leaks per call when linked as a library; free the per-call allocations + sh {{calcwit_fix}} mantle/poc_cpp cp -r {{src}}/poc mantle/poc_cpp/poc cp {{src}}/circom_adapter.cpp {{src}}/circom_adapter.hpp {{src}}/circom_fwd.hpp {{src}}/types.hpp {{src}}/assert.h mantle/poc_cpp/ cp {{ci_makefile}} mantle/poc_cpp/Makefile @@ -82,6 +89,8 @@ signature: check-circom circom mantle/signature.circom --c --r1cs --no_asm --O2 --output mantle # circom-generated main() has no return on the success path; patch it before -O3 turns it into an infinite loop {{sed_i}} ':a;N;$!ba;s/\n}\n\n*$/\n return 0;\n}/' mantle/signature_cpp/main.cpp + # circom's empty ~Circom_CalcWit destructor leaks per call when linked as a library; free the per-call allocations + sh {{calcwit_fix}} mantle/signature_cpp cp -r {{src}}/signature mantle/signature_cpp/signature cp {{src}}/circom_adapter.cpp {{src}}/circom_adapter.hpp {{src}}/circom_fwd.hpp {{src}}/types.hpp {{src}}/assert.h mantle/signature_cpp/ cp {{ci_makefile}} mantle/signature_cpp/Makefile diff --git a/src/circom_adapter.cpp b/src/circom_adapter.cpp index c2a590d..0560753 100644 --- a/src/circom_adapter.cpp +++ b/src/circom_adapter.cpp @@ -4,6 +4,17 @@ #include #include #include +#include + +Circom_Circuit* getCachedCircuit(const ConstBytes& circuit_bytes) { + // The circuit is immutable, compiled-in data and has no destructor that + // frees its internal buffers, so load it once and keep it for the process + // lifetime instead of allocating (and leaking) it on every call. + static Circom_Circuit* cached = nullptr; + static std::once_flag once; + std::call_once(once, [&]() { cached = loadCircuit(circuit_bytes); }); + return cached; +} Circom_Circuit* loadCircuit(const ConstBytes& circuit_bytes) { Circom_Circuit* circuit = new Circom_Circuit; diff --git a/src/circom_adapter.hpp b/src/circom_adapter.hpp index e90e0c6..bd4798f 100644 --- a/src/circom_adapter.hpp +++ b/src/circom_adapter.hpp @@ -7,6 +7,16 @@ // Return value Circom_Circuit* loadCircuit(const ConstBytes& circuit); + +// Returns a process-wide, lazily-loaded circuit for this library. +// +// `loadCircuit` allocates the circuit's internal buffers (input hash map, +// witness->signal list, constants, IO field defs) but `Circom_Circuit` has no +// destructor, so `delete circuit` leaks all of them. The circuit is immutable +// data derived from the compiled-in `.dat`, so we load it exactly once and +// reuse it for every (including concurrent) witness-generation call. +Circom_Circuit* getCachedCircuit(const ConstBytes& circuit); + void loadJson(Circom_CalcWit *ctx, const char* inputs_json); void writeBinWitness(Circom_CalcWit *ctx, Bytes* output_witness); diff --git a/src/poc/ffi.cpp b/src/poc/ffi.cpp index d5a25d4..5ec2244 100644 --- a/src/poc/ffi.cpp +++ b/src/poc/ffi.cpp @@ -89,26 +89,23 @@ static Status validate_witness_arguments(const WitnessInput* input, const Bytes* static Status generate_witness_impl(const WitnessInput* input, Bytes* output) { const ConstBytes& circuit_bytes = input->dat; - Circom_Circuit* circuit = loadCircuit(circuit_bytes); + Circom_Circuit* circuit = getCachedCircuit(circuit_bytes); Circom_CalcWit* ctx = new Circom_CalcWit(circuit); try { loadJson(ctx, input->inputs_json); } catch (...) { delete ctx; - delete circuit; throw; } if (ctx->getRemaingInputsToBeSet()!=0) { const std::string message = "Not all inputs have been set. Only " + std::to_string(get_main_input_signal_no()-ctx->getRemaingInputsToBeSet()) + " out of " + std::to_string(get_main_input_signal_no()) + "."; delete ctx; - delete circuit; return status_new(StatusCode_InvalidInput, message.c_str()); } writeBinWitness(ctx, output); delete ctx; - delete circuit; return status_ok(); } diff --git a/src/pol/ffi.cpp b/src/pol/ffi.cpp index 4a9cac2..176f3f9 100644 --- a/src/pol/ffi.cpp +++ b/src/pol/ffi.cpp @@ -89,26 +89,23 @@ static Status validate_witness_arguments(const WitnessInput* input, const Bytes* static Status generate_witness_impl(const WitnessInput* input, Bytes* output) { const ConstBytes& circuit_bytes = input->dat; - Circom_Circuit* circuit = loadCircuit(circuit_bytes); + Circom_Circuit* circuit = getCachedCircuit(circuit_bytes); Circom_CalcWit* ctx = new Circom_CalcWit(circuit); try { loadJson(ctx, input->inputs_json); } catch (...) { delete ctx; - delete circuit; throw; } if (ctx->getRemaingInputsToBeSet()!=0) { const std::string message = "Not all inputs have been set. Only " + std::to_string(get_main_input_signal_no()-ctx->getRemaingInputsToBeSet()) + " out of " + std::to_string(get_main_input_signal_no()) + "."; delete ctx; - delete circuit; return status_new(StatusCode_InvalidInput, message.c_str()); } writeBinWitness(ctx, output); delete ctx; - delete circuit; return status_ok(); } diff --git a/src/poq/ffi.cpp b/src/poq/ffi.cpp index 7715c89..44c69ab 100644 --- a/src/poq/ffi.cpp +++ b/src/poq/ffi.cpp @@ -89,26 +89,23 @@ static Status validate_witness_arguments(const WitnessInput* input, const Bytes* static Status generate_witness_impl(const WitnessInput* input, Bytes* output) { const ConstBytes& circuit_bytes = input->dat; - Circom_Circuit* circuit = loadCircuit(circuit_bytes); + Circom_Circuit* circuit = getCachedCircuit(circuit_bytes); Circom_CalcWit* ctx = new Circom_CalcWit(circuit); try { loadJson(ctx, input->inputs_json); } catch (...) { delete ctx; - delete circuit; throw; } if (ctx->getRemaingInputsToBeSet()!=0) { const std::string message = "Not all inputs have been set. Only " + std::to_string(get_main_input_signal_no()-ctx->getRemaingInputsToBeSet()) + " out of " + std::to_string(get_main_input_signal_no()) + "."; delete ctx; - delete circuit; return status_new(StatusCode_InvalidInput, message.c_str()); } writeBinWitness(ctx, output); delete ctx; - delete circuit; return status_ok(); } diff --git a/src/signature/ffi.cpp b/src/signature/ffi.cpp index 94a7990..43a792b 100644 --- a/src/signature/ffi.cpp +++ b/src/signature/ffi.cpp @@ -89,26 +89,23 @@ static Status validate_witness_arguments(const WitnessInput* input, const Bytes* static Status generate_witness_impl(const WitnessInput* input, Bytes* output) { const ConstBytes& circuit_bytes = input->dat; - Circom_Circuit* circuit = loadCircuit(circuit_bytes); + Circom_Circuit* circuit = getCachedCircuit(circuit_bytes); Circom_CalcWit* ctx = new Circom_CalcWit(circuit); try { loadJson(ctx, input->inputs_json); } catch (...) { delete ctx; - delete circuit; throw; } if (ctx->getRemaingInputsToBeSet()!=0) { const std::string message = "Not all inputs have been set. Only " + std::to_string(get_main_input_signal_no()-ctx->getRemaingInputsToBeSet()) + " out of " + std::to_string(get_main_input_signal_no()) + "."; delete ctx; - delete circuit; return status_new(StatusCode_InvalidInput, message.c_str()); } writeBinWitness(ctx, output); delete ctx; - delete circuit; return status_ok(); }