diff --git a/README.md b/README.md
index 101795d..3efbbc2 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@ Done so far:
   - FFTs over the G1 group
   - Polynomial single commitment and verification
   - Polynomial multi commitment and verification
-  - [FK20](https://github.com/khovratovich/Kate/blob/master/Kate_amortized.pdf) single proof method
+  - [FK20](https://github.com/khovratovich/Kate/blob/master/Kate_amortized.pdf) single proof method (normal, and optimised for data availability)
+  - FK20 multi proof method (normal, and optimised for data availability)
 
 ## Install
 
diff --git a/src/c_kzg_util.c b/src/c_kzg_util.c
index e3be39a..d8c7287 100644
--- a/src/c_kzg_util.c
+++ b/src/c_kzg_util.c
@@ -25,16 +25,58 @@
 /**
  * Wrapped `malloc()` that reports failures to allocate.
  *
- * @param[out] p Pointer to the allocated space
+ * @param[out] x Pointer to the allocated space
  * @param[in]  n The number of bytes to be allocated
  * @retval C_CZK_OK      All is well
  * @retval C_CZK_MALLOC  Memory allocation failed
  */
-C_KZG_RET c_kzg_malloc(void **p, size_t n) {
+C_KZG_RET c_kzg_malloc(void **x, size_t n) {
     if (n > 0) {
-        *p = malloc(n);
-        return *p != NULL ? C_KZG_OK : C_KZG_MALLOC;
+        *x = malloc(n);
+        return *x != NULL ? C_KZG_OK : C_KZG_MALLOC;
     }
-    *p = NULL;
+    *x = NULL;
     return C_KZG_OK;
 }
+
+/**
+ * Allocate memory for an array of `blst_fr`.
+ *
+ * @remark Free the space later using `free()`.
+ *
+ * @param[out] x Pointer to the allocated space
+ * @param[in]  n The number of blst_fr to be allocated
+ * @retval C_CZK_OK      All is well
+ * @retval C_CZK_MALLOC  Memory allocation failed
+ */
+C_KZG_RET new_fr(blst_fr **x, size_t n) {
+    return c_kzg_malloc((void **)x, n * sizeof **x);
+}
+
+/**
+ * Allocate memory for an array of `blst_p1`.
+ *
+ * @remark Free the space later using `free()`.
+ *
+ * @param[out] x Pointer to the allocated space
+ * @param[in]  n The number of blst_p1 to be allocated
+ * @retval C_CZK_OK      All is well
+ * @retval C_CZK_MALLOC  Memory allocation failed
+ */
+C_KZG_RET new_p1(blst_p1 **x, size_t n) {
+    return c_kzg_malloc((void **)x, n * sizeof **x);
+}
+
+/**
+ * Allocate memory for an array of `blst_p2`.
+ *
+ * @remark Free the space later using `free()`.
+ *
+ * @param[out] x Pointer to the allocated space
+ * @param[in]  n The number of blst_p2 to be allocated
+ * @retval C_CZK_OK      All is well
+ * @retval C_CZK_MALLOC  Memory allocation failed
+ */
+C_KZG_RET new_p2(blst_p2 **x, size_t n) {
+    return c_kzg_malloc((void **)x, n * sizeof **x);
+}
diff --git a/src/c_kzg_util.h b/src/c_kzg_util.h
index ca54b20..31d7900 100644
--- a/src/c_kzg_util.h
+++ b/src/c_kzg_util.h
@@ -20,3 +20,6 @@
 #include "c_kzg.h"
 
 C_KZG_RET c_kzg_malloc(void **p, size_t n);
+C_KZG_RET new_fr(blst_fr **x, size_t n);
+C_KZG_RET new_p1(blst_p1 **x, size_t n);
+C_KZG_RET new_p2(blst_p2 **x, size_t n);
diff --git a/src/c_kzg_util_test.c b/src/c_kzg_util_test.c
index 081b6c4..4d59337 100644
--- a/src/c_kzg_util_test.c
+++ b/src/c_kzg_util_test.c
@@ -22,6 +22,7 @@
 void malloc_works(void) {
     int *p;
     TEST_CHECK(C_KZG_OK == c_kzg_malloc((void **)&p, 4));
+    free(p);
 }
 
 void malloc_huge_fails(void) {
diff --git a/src/fft_common.c b/src/fft_common.c
index 5322f78..41f7af0 100644
--- a/src/fft_common.c
+++ b/src/fft_common.c
@@ -90,8 +90,8 @@ C_KZG_RET new_fft_settings(FFTSettings *fs, unsigned int max_scale) {
     blst_fr_from_uint64(&fs->root_of_unity, scale2_root_of_unity[max_scale]);
 
     // Allocate space for the roots of unity
-    TRY(c_kzg_malloc((void **)&fs->expanded_roots_of_unity, (fs->max_width + 1) * sizeof *fs->expanded_roots_of_unity));
-    TRY(c_kzg_malloc((void **)&fs->reverse_roots_of_unity, (fs->max_width + 1) * sizeof *fs->reverse_roots_of_unity));
+    TRY(new_fr(&fs->expanded_roots_of_unity, fs->max_width + 1));
+    TRY(new_fr(&fs->reverse_roots_of_unity, fs->max_width + 1));
 
     // Populate the roots of unity
     TRY(expand_root_of_unity(fs->expanded_roots_of_unity, &fs->root_of_unity, fs->max_width));
diff --git a/src/fft_fr.c b/src/fft_fr.c
index 893397e..46918a4 100644
--- a/src/fft_fr.c
+++ b/src/fft_fr.c
@@ -113,37 +113,3 @@ C_KZG_RET fft_fr(blst_fr *out, const blst_fr *in, bool inverse, uint64_t n, cons
     }
     return C_KZG_OK;
 }
-
-/**
- * Wrapper for #fft_fr that allocates memory for the output.
- *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_fft_fr.
- *
- * @param[out] out     The results (array of length @p n)
- * @param[in]  in      The input data (array of length @p n)
- * @param[in]  inverse `false` for forward transform, `true` for inverse transform
- * @param[in]  n       Length of the FFT, must be a power of two
- * @param[in]  fs      Pointer to previously initialised FFTSettings structure with `max_width` at least @p n.
- * @retval C_CZK_OK      All is well
- * @retval C_CZK_BADARGS Invalid parameters were supplied
- * @retval C_CZK_MALLOC  Memory allocation failed
- */
-C_KZG_RET new_fft_fr(blst_fr **out, const blst_fr *in, bool inverse, uint64_t n, const FFTSettings *fs) {
-    C_KZG_RET ret;
-    TRY(c_kzg_malloc((void **)out, n * sizeof **out));
-    ret = fft_fr(*out, in, inverse, n, fs);
-    if (ret == C_KZG_BADARGS) {
-        free_fft_fr(*out);
-    }
-    return ret;
-}
-
-/**
- * Recover memory allocated by #new_fft_fr.
- *
- * @param x The array to be freed
- */
-void free_fft_fr(blst_fr *x) {
-    free(x);
-}
diff --git a/src/fft_fr.h b/src/fft_fr.h
index 4ca9f0b..f9a2048 100644
--- a/src/fft_fr.h
+++ b/src/fft_fr.h
@@ -23,5 +23,3 @@ void fft_fr_slow(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr
 void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride,
                  uint64_t n);
 C_KZG_RET fft_fr(blst_fr *out, const blst_fr *in, bool inverse, uint64_t n, const FFTSettings *fs);
-C_KZG_RET new_fft_fr(blst_fr **out, const blst_fr *in, bool inverse, uint64_t n, const FFTSettings *fs);
-void free_fft_fr(blst_fr *x);
diff --git a/src/fft_g1.c b/src/fft_g1.c
index 907eb8a..0c74ea7 100644
--- a/src/fft_g1.c
+++ b/src/fft_g1.c
@@ -114,37 +114,3 @@ C_KZG_RET fft_g1(blst_p1 *out, const blst_p1 *in, bool inverse, uint64_t n, cons
     }
     return C_KZG_OK;
 }
-
-/**
- * Wrapper for #fft_g1 that allocates memory for the output.
- *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_fft_g1.
- *
- * @param[out] out     The results (array of length @p n)
- * @param[in]  in      The input data (array of length @p n)
- * @param[in]  inverse `false` for forward transform, `true` for inverse transform
- * @param[in]  n       Length of the FFT, must be a power of two
- * @param[in]  fs      Pointer to previously initialised FFTSettings structure with `max_width` at least @p n.
- * @retval C_CZK_OK      All is well
- * @retval C_CZK_BADARGS Invalid parameters were supplied
- * @retval C_CZK_MALLOC  Memory allocation failed
- */
-C_KZG_RET new_fft_g1(blst_p1 **out, const blst_p1 *in, bool inverse, uint64_t n, const FFTSettings *fs) {
-    C_KZG_RET ret;
-    TRY(c_kzg_malloc((void **)out, n * sizeof **out));
-    ret = fft_g1(*out, in, inverse, n, fs);
-    if (ret == C_KZG_BADARGS) {
-        free_fft_g1(*out);
-    }
-    return ret;
-}
-
-/**
- * Recover memory allocated by #new_fft_g1.
- *
- * @param x The array to be freed
- */
-void free_fft_g1(blst_p1 *x) {
-    free(x);
-}
diff --git a/src/fft_g1.h b/src/fft_g1.h
index 49941dd..5b3eb66 100644
--- a/src/fft_g1.h
+++ b/src/fft_g1.h
@@ -23,5 +23,3 @@ void fft_g1_slow(blst_p1 *out, const blst_p1 *in, uint64_t stride, const blst_fr
 void fft_g1_fast(blst_p1 *out, const blst_p1 *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride,
                  uint64_t n);
 C_KZG_RET fft_g1(blst_p1 *out, const blst_p1 *in, bool inverse, uint64_t n, const FFTSettings *fs);
-C_KZG_RET new_fft_g1(blst_p1 **out, const blst_p1 *in, bool inverse, uint64_t n, const FFTSettings *fs);
-void free_fft_g1(blst_p1 *x);
diff --git a/src/fk20_proofs.c b/src/fk20_proofs.c
index bc7daa1..5b3688e 100644
--- a/src/fk20_proofs.c
+++ b/src/fk20_proofs.c
@@ -19,6 +19,8 @@
  *
  * Implements amortised KZG proofs as per the [FK20
  * paper](https://github.com/khovratovich/Kate/blob/master/Kate_amortized.pdf).
+ *
+ * @todo Split this out into smaller files.
  */
 
 #include <string.h> // memcpy()
@@ -120,7 +122,7 @@ C_KZG_RET toeplitz_part_1(blst_p1 *out, const blst_p1 *x, uint64_t n, const FFTS
     uint64_t n2 = n * 2;
     blst_p1 *x_ext;
 
-    TRY(c_kzg_malloc((void **)&x_ext, n2 * sizeof *x_ext));
+    TRY(new_p1(&x_ext, n2));
     for (uint64_t i = 0; i < n; i++) {
         x_ext[i] = x[i];
     }
@@ -134,108 +136,48 @@ C_KZG_RET toeplitz_part_1(blst_p1 *out, const blst_p1 *x, uint64_t n, const FFTS
     return C_KZG_OK;
 }
 
-/**
- * Wrapper for #toeplitz_part_1 that allocates memory for the output.
- *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_toeplitz_part_1.
- *
- * @param[out] out The FFT of the extension of @p x, size @p n * 2
- * @param[in]  x   The input vector, size @p n
- * @param[in]  n   The length of the input vector @p x
- * @param[in]  fs  The FFT settings previously initialised with #new_fft_settings
- * @retval C_CZK_OK      All is well
- * @retval C_CZK_ERROR   An internal error occurred
- * @retval C_CZK_MALLOC  Memory allocation failed
- */
-C_KZG_RET new_toeplitz_part_1(blst_p1 **out, const blst_p1 *x, uint64_t n, const FFTSettings *fs) {
-    TRY(c_kzg_malloc((void **)out, n * 2 * sizeof **out));
-    TRY(toeplitz_part_1(*out, x, n, fs));
-    return C_KZG_OK;
-}
-
-/**
- * Recover memory allocated by #new_toeplitz_part_1.
- *
- * @param x The array to be freed
- */
-void free_toeplitz_part_1(blst_p1 *x) {
-    free(x);
-}
-
 /**
  * The second part of the Toeplitz matrix multiplication algorithm.
  *
  * @param[out] out Array of G1 group elements, length `n`
  * @param[in]  toeplitz_coeffs Toeplitz coefficients, a polynomial length `n`
- * @param[in]  fk  FK20 single settings previously initialised by #new_fk20_single_settings
+ * @param[in]  x_ext_fft The Fourier transform of the extended `x` vector, length `n`
+ * @param[in]  fs  The FFT settings previously initialised with #new_fft_settings
  * @retval C_CZK_OK      All is well
  * @retval C_CZK_BADARGS Invalid parameters were supplied
  * @retval C_CZK_ERROR   An internal error occurred
  * @retval C_CZK_MALLOC  Memory allocation failed
  */
-C_KZG_RET toeplitz_part_2(blst_p1 *out, const poly *toeplitz_coeffs, const FK20SingleSettings *fk) {
+C_KZG_RET toeplitz_part_2(blst_p1 *out, const poly *toeplitz_coeffs, const blst_p1 *x_ext_fft, const FFTSettings *fs) {
     blst_fr *toeplitz_coeffs_fft;
 
-    ASSERT(toeplitz_coeffs->length == fk->x_ext_fft_len, C_KZG_BADARGS);
+    // ASSERT(toeplitz_coeffs->length == fk->x_ext_fft_len, C_KZG_BADARGS); // TODO: how to implement?
 
-    TRY(new_fft_fr(&toeplitz_coeffs_fft, toeplitz_coeffs->coeffs, false, toeplitz_coeffs->length, fk->ks->fs));
+    TRY(new_fr(&toeplitz_coeffs_fft, toeplitz_coeffs->length));
+    TRY(fft_fr(toeplitz_coeffs_fft, toeplitz_coeffs->coeffs, false, toeplitz_coeffs->length, fs));
 
     for (uint64_t i = 0; i < toeplitz_coeffs->length; i++) {
-        p1_mul(&out[i], &fk->x_ext_fft[i], &toeplitz_coeffs_fft[i]);
+        p1_mul(&out[i], &x_ext_fft[i], &toeplitz_coeffs_fft[i]);
     }
 
-    free_fft_fr(toeplitz_coeffs_fft);
+    free(toeplitz_coeffs_fft);
     return C_KZG_OK;
 }
 
-/**
- * Wrapper for #toeplitz_part_2 that allocates memory for the output.
- *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_toeplitz_part_2.
- *
- * @param[out] out Array of G1 group elements, length `n`
- * @param[in]  toeplitz_coeffs Toeplitz coefficients, a polynomial length `n`
- * @param[in]  fk  FK20 single settings previously initialised by #new_fk20_single_settings
- * @retval C_CZK_OK      All is well
- * @retval C_CZK_BADARGS Invalid parameters were supplied
- * @retval C_CZK_ERROR   An internal error occurred
- * @retval C_CZK_MALLOC  Memory allocation failed
- */
-C_KZG_RET new_toeplitz_part_2(blst_p1 **out, const poly *toeplitz_coeffs, const FK20SingleSettings *fk) {
-    C_KZG_RET ret;
-    TRY(c_kzg_malloc((void **)out, toeplitz_coeffs->length * sizeof **out));
-    ret = toeplitz_part_2(*out, toeplitz_coeffs, fk);
-    if (ret == C_KZG_BADARGS) {
-        free_toeplitz_part_2(*out);
-    }
-    return ret;
-}
-
-/**
- * Recover memory allocated by #new_toeplitz_part_2.
- *
- * @param x The array to be freed
- */
-void free_toeplitz_part_2(blst_p1 *x) {
-    free(x);
-}
-
 /**
  * The third part of the Toeplitz matrix multiplication algorithm: transform back and zero the top half.
  *
  * @param[out] out Array of G1 group elements, length @p n2
  * @param[in]  h_ext_fft FFT of the extended `h` values, length @p n2
  * @param[in]  n2  Size of the arrays
- * @param[in]  fk  FK20 single settings previously initialised by #new_fk20_single_settings
+ * @param[in]  fs  The FFT settings previously initialised with #new_fft_settings
  * @retval C_CZK_OK      All is well
  * @retval C_CZK_ERROR   An internal error occurred
  */
-C_KZG_RET toeplitz_part_3(blst_p1 *out, const blst_p1 *h_ext_fft, uint64_t n2, const FK20SingleSettings *fk) {
+C_KZG_RET toeplitz_part_3(blst_p1 *out, const blst_p1 *h_ext_fft, uint64_t n2, const FFTSettings *fs) {
     uint64_t n = n2 / 2;
 
-    TRY(fft_g1(out, h_ext_fft, true, n2, fk->ks->fs));
+    TRY(fft_g1(out, h_ext_fft, true, n2, fs));
 
     // Zero the second half of h
     for (uint64_t i = n; i < n2; i++) {
@@ -246,81 +188,62 @@ C_KZG_RET toeplitz_part_3(blst_p1 *out, const blst_p1 *h_ext_fft, uint64_t n2, c
 }
 
 /**
- * Wrapper for #toeplitz_part_3 that allocates memory for the output.
+ * Reorder and extend polynomial coefficients for the toeplitz method, strided version.
  *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_toeplitz_part_3.
+ * @remark The upper half of the input polynomial coefficients is treated as being zero.
  *
- * @param[out] out Array of G1 group elements, length @p n2
- * @param[in]  h_ext_fft FFT of the extended `h` values, length @p n2
- * @param[in]  n2  Size of the arrays
- * @param[in]  fk  FK20 single settings previously initialised by #new_fk20_single_settings
+ * @param[out] out The reordered polynomial, size `n * 2 / stride`
+ * @param[in]  in  The input polynomial, size `n`
+ * @param[in]  offset The offset
+ * @param[in]  stride The stride
  * @retval C_CZK_OK      All is well
- * @retval C_CZK_ERROR   An internal error occurred
+ * @retval C_CZK_BADARGS Invalid parameters were supplied
  * @retval C_CZK_MALLOC  Memory allocation failed
  */
-C_KZG_RET new_toeplitz_part_3(blst_p1 **out, const blst_p1 *h_ext_fft, uint64_t n2, const FK20SingleSettings *fk) {
-    TRY(c_kzg_malloc((void **)out, n2 * sizeof **out));
-    TRY(toeplitz_part_3(*out, h_ext_fft, n2, fk));
-    return C_KZG_OK;
-}
+C_KZG_RET toeplitz_coeffs_stride(poly *out, const poly *in, uint64_t offset, uint64_t stride) {
+    uint64_t n = in->length, k, k2;
 
-/**
- * Recover memory allocated by #new_toeplitz_part_3.
- *
- * @param x The array to be freed
- */
-void free_toeplitz_part_3(blst_p1 *x) {
-    free(x);
+    ASSERT(stride > 0, C_KZG_BADARGS);
+
+    k = n / stride;
+    k2 = k * 2;
+
+    out->coeffs[0] = in->coeffs[n - 1 - offset];
+    for (uint64_t i = 1; i <= k + 1; i++) {
+        out->coeffs[i] = fr_zero;
+    }
+    for (uint64_t i = k + 2, j = 2 * stride - offset - 1; i < k2; i++, j += stride) {
+        out->coeffs[i] = in->coeffs[j];
+    }
+
+    return C_KZG_OK;
 }
 
 /**
  * Reorder and extend polynomial coefficients for the toeplitz method.
  *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_toeplitz_coeffs_step.
+ * @remark The upper half of the input polynomial coefficients is treated as being zero.
  *
  * @param[out] out The reordered polynomial, size `n * 2`
  * @param[in]  in  The input polynomial, size `n`
  * @retval C_CZK_OK      All is well
  * @retval C_CZK_MALLOC  Memory allocation failed
  */
-C_KZG_RET new_toeplitz_coeffs_step(poly *out, const poly *in) {
-    uint64_t n = in->length, n2 = n * 2;
-
-    TRY(new_poly(out, n2));
-
-    out->coeffs[0] = in->coeffs[n - 1];
-    for (uint64_t i = 1; i <= n + 1; i++) {
-        out->coeffs[i] = fr_zero;
-    }
-    for (uint64_t i = n + 2; i < n2; i++) {
-        out->coeffs[i] = in->coeffs[i - (n + 1)];
-    }
-
-    return C_KZG_OK;
-}
-
-/**
- * Recover memory allocated by #new_toeplitz_coeffs_step.
- *
- * @param p The coefficients to be freed
- */
-void free_toeplitz_coeffs_step(poly *p) {
-    free_poly(p);
+C_KZG_RET toeplitz_coeffs_step(poly *out, const poly *in) {
+    return toeplitz_coeffs_stride(out, in, 0, 1);
 }
 
 /**
  * Optimised version of the FK20 algorithm for use in data availability checks.
  *
- * The upper half of the polynomial coefficients is always 0, so we do not need to extend to twice the size
- * for Toeplitz matrix multiplication.
- *
  * Simultaneously calculates all the KZG proofs for `x_i = w^i` (`0 <= i < 2n`), where `w` is a `(2 * n)`th root of
  * unity. The `2n` comes from the polynomial being extended with zeros to twice the original size.
  *
  * `out[i]` is the proof for `y[i]`, the evaluation of the polynomial at `fs.expanded_roots_of_unity[i]`.
  *
+ * @remark Only the lower half of the polynomial is supplied; the upper, zero, half is assumed. The
+ * #toeplitz_coeffs_step routine does the right thing.
+ *
  * @param[out] out Array size `n * 2`
  * @param[in]  p   Polynomial, size `n`
  * @param[in]  fk  FK20 single settings previously initialised by #new_fk20_single_settings
@@ -337,14 +260,20 @@ C_KZG_RET fk20_single_da_opt(blst_p1 *out, const poly *p, const FK20SingleSettin
     ASSERT(n2 <= fk->ks->fs->max_width, C_KZG_BADARGS);
     ASSERT(is_power_of_two(n), C_KZG_BADARGS);
 
-    TRY(new_toeplitz_coeffs_step(&toeplitz_coeffs, p));
-    TRY(new_toeplitz_part_2(&h_ext_fft, &toeplitz_coeffs, fk));
-    TRY(new_toeplitz_part_3(&h, h_ext_fft, n2, fk));
+    TRY(new_poly(&toeplitz_coeffs, 2 * p->length));
+    TRY(toeplitz_coeffs_step(&toeplitz_coeffs, p));
+
+    TRY(new_p1(&h_ext_fft, toeplitz_coeffs.length));
+    TRY(toeplitz_part_2(h_ext_fft, &toeplitz_coeffs, fk->x_ext_fft, fk->ks->fs));
+
+    TRY(new_p1(&h, n2));
+    TRY(toeplitz_part_3(h, h_ext_fft, n2, fk->ks->fs));
+
     TRY(fft_g1(out, h, false, n2, fk->ks->fs));
 
-    free_toeplitz_coeffs_step(&toeplitz_coeffs);
-    free_toeplitz_part_2(h_ext_fft);
-    free_toeplitz_part_3(h);
+    free_poly(&toeplitz_coeffs);
+    free(h_ext_fft);
+    free(h);
     return C_KZG_OK;
 }
 
@@ -390,14 +319,125 @@ C_KZG_RET da_using_fk20_single(blst_p1 *out, const poly *p, const FK20SingleSett
  * ...
  * proof[i]: w^(i*l + 0), w^(i*l + 1), ... w^(i*l + l - 1)
  * ```
+ *
+ * @param[out] out The proofs, array size @p p->length * 2
+ * @param[in]  p   The polynomial
+ * @param[in]  fk  FK20 multi settings previously initialised by #new_fk20_multi_settings
  */
-void fk20_multi(void) {}
+C_KZG_RET fk20_compute_proof_multi(blst_p1 *out, const poly *p, const FK20MultiSettings *fk) {
+    uint64_t n = p->length, n2 = n * 2;
+    blst_p1 *h_ext_fft, *h_ext_fft_file, *h;
+    poly toeplitz_coeffs;
+
+    ASSERT(fk->ks->fs->max_width >= n2, C_KZG_BADARGS);
+
+    TRY(new_p1(&h_ext_fft, n2));
+    for (uint64_t i = 0; i < n2; i++) {
+        h_ext_fft[i] = g1_identity;
+    }
+
+    TRY(new_poly(&toeplitz_coeffs, 2 * p->length));
+    TRY(new_p1(&h_ext_fft_file, toeplitz_coeffs.length));
+    for (uint64_t i = 0; i < fk->chunk_len; i++) {
+        TRY(toeplitz_coeffs_step(&toeplitz_coeffs, p));
+        TRY(toeplitz_part_2(h_ext_fft_file, &toeplitz_coeffs, fk->x_ext_fft_files[i], fk->ks->fs));
+        for (uint64_t j = 0; j < n2; j++) {
+            blst_p1_add(&h_ext_fft[j], &h_ext_fft[j], &h_ext_fft_file[j]);
+        }
+    }
+    free_poly(&toeplitz_coeffs);
+    free(h_ext_fft_file);
+
+    TRY(new_p1(&h, n2));
+    TRY(toeplitz_part_3(h, h_ext_fft, n2, fk->ks->fs));
+
+    TRY(fft_g1(out, h, false, n2, fk->ks->fs));
+
+    free(h_ext_fft);
+    free(h);
+    return C_KZG_OK;
+}
+
+/**
+ * FK20 multi-proof method, optimized for data availability where the top half of polynomial
+ * coefficients is zero.
+ *
+ * @remark Only the lower half of the polynomial is supplied; the upper, zero, half is assumed. The
+ * #toeplitz_coeffs_stride routine does the right thing.
+ *
+ * @param[out] out The proofs, array size `2 * n / fk->chunk_length`
+ * @param[in]  p   The polynomial, length `n`
+ * @param[in]  fk  FK20 multi settings previously initialised by #new_fk20_multi_settings
+ */
+C_KZG_RET fk20_multi_da_opt(blst_p1 *out, const poly *p, const FK20MultiSettings *fk) {
+    uint64_t n = p->length, n2 = n * 2, k, k2;
+    blst_p1 *h_ext_fft, *h_ext_fft_file, *h;
+    poly toeplitz_coeffs;
+
+    ASSERT(n2 <= fk->ks->fs->max_width, C_KZG_BADARGS);
+    ASSERT(is_power_of_two(n), C_KZG_BADARGS);
+
+    n = n2 / 2;
+    k = n / fk->chunk_len;
+    k2 = k * 2;
+
+    TRY(new_p1(&h_ext_fft, k2));
+    for (uint64_t i = 0; i < k2; i++) {
+        h_ext_fft[i] = g1_identity;
+    }
+
+    TRY(new_poly(&toeplitz_coeffs, n2 / fk->chunk_len));
+    TRY(new_p1(&h_ext_fft_file, toeplitz_coeffs.length));
+    for (uint64_t i = 0; i < fk->chunk_len; i++) {
+        TRY(toeplitz_coeffs_stride(&toeplitz_coeffs, p, i, fk->chunk_len));
+        TRY(toeplitz_part_2(h_ext_fft_file, &toeplitz_coeffs, fk->x_ext_fft_files[i], fk->ks->fs));
+        for (uint64_t j = 0; j < k2; j++) {
+            blst_p1_add(&h_ext_fft[j], &h_ext_fft[j], &h_ext_fft_file[j]);
+        }
+    }
+    free_poly(&toeplitz_coeffs);
+    free(h_ext_fft_file);
+
+    // Calculate `h`
+    TRY(new_p1(&h, k2));
+    TRY(toeplitz_part_3(h, h_ext_fft, k2, fk->ks->fs));
+
+    // Overwrite the second half of `h` with zero
+    for (uint64_t i = k; i < k2; i++) {
+        h[i] = g1_identity;
+    }
+
+    TRY(fft_g1(out, h, false, k2, fk->ks->fs));
+
+    free(h_ext_fft);
+    free(h);
+
+    return C_KZG_OK;
+}
+
+/**
+ * Computes all the KZG proofs for data availability checks.
+ *
+ * This involves sampling on the double domain and reordering according to reverse bit order.
+ *
+ */
+C_KZG_RET da_using_fk20_multi(blst_p1 *out, const poly *p, const FK20MultiSettings *fk) {
+    uint64_t n = p->length, n2 = n * 2;
+
+    ASSERT(n2 <= fk->ks->fs->max_width, C_KZG_BADARGS);
+    ASSERT(is_power_of_two(n), C_KZG_BADARGS);
+
+    TRY(fk20_multi_da_opt(out, p, fk));
+    TRY(reverse_bit_order(out, sizeof out[0], n2 / fk->chunk_len));
+
+    return C_KZG_OK;
+}
 
 /**
  * Initialise settings for an FK20 single proof.
  *
- * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling the
- * corresponding `free_` function. In this case, #free_fk20_single_settings.
+ * @remark As with all functions prefixed `new_`, this allocates memory that needs to be reclaimed by calling
+ * the corresponding `free_` function. In this case, #free_fk20_single_settings.
  *
  * @param[out] fk The initialised settings
  * @param[in]  n2 The desired size of `x_ext_fft`, a power of two
@@ -418,13 +458,14 @@ C_KZG_RET new_fk20_single_settings(FK20SingleSettings *fk, uint64_t n2, const KZ
     fk->ks = ks;
     fk->x_ext_fft_len = n2;
 
-    TRY(c_kzg_malloc((void **)&x, n * sizeof *x));
+    TRY(new_p1(&x, n));
     for (uint64_t i = 0; i < n - 1; i++) {
         x[i] = ks->secret_g1[n - 2 - i];
     }
     x[n - 1] = g1_identity;
 
-    TRY(new_toeplitz_part_1(&fk->x_ext_fft, x, n, ks->fs));
+    TRY(new_p1(&fk->x_ext_fft, 2 * n));
+    TRY(toeplitz_part_1(fk->x_ext_fft, x, n, ks->fs));
 
     free(x);
     return C_KZG_OK;
@@ -462,9 +503,10 @@ C_KZG_RET new_fk20_multi_settings(FK20MultiSettings *fk, uint64_t n2, uint64_t c
     fk->ks = ks;
     fk->chunk_len = chunk_len;
 
+    // `x_ext_fft_files` is two dimensional. Allocate space for pointers to the rows.
     TRY(c_kzg_malloc((void **)&fk->x_ext_fft_files, chunk_len * sizeof *fk->x_ext_fft_files));
 
-    TRY(c_kzg_malloc((void **)&x, k * sizeof *x));
+    TRY(new_p1(&x, k));
     for (uint64_t offset = 0; offset < chunk_len; offset++) {
         uint64_t start = n - chunk_len - 1 - offset;
         for (uint64_t i = 0, j = start; i + 1 < k; i++, j -= chunk_len) {
@@ -472,7 +514,8 @@ C_KZG_RET new_fk20_multi_settings(FK20MultiSettings *fk, uint64_t n2, uint64_t c
         }
         x[k - 1] = g1_identity;
 
-        TRY(new_toeplitz_part_1(&fk->x_ext_fft_files[offset], x, k, ks->fs));
+        TRY(new_p1(&fk->x_ext_fft_files[offset], 2 * k));
+        TRY(toeplitz_part_1(fk->x_ext_fft_files[offset], x, k, ks->fs));
     }
 
     free(x);
@@ -485,7 +528,7 @@ C_KZG_RET new_fk20_multi_settings(FK20MultiSettings *fk, uint64_t n2, uint64_t c
  * @param fk The settings to be freed
  */
 void free_fk20_single_settings(FK20SingleSettings *fk) {
-    free_toeplitz_part_1(fk->x_ext_fft);
+    free(fk->x_ext_fft);
     fk->x_ext_fft_len = 0;
 }
 
@@ -496,7 +539,7 @@ void free_fk20_single_settings(FK20SingleSettings *fk) {
  */
 void free_fk20_multi_settings(FK20MultiSettings *fk) {
     for (uint64_t i = 0; i < fk->chunk_len; i++) {
-        free_toeplitz_part_1((fk->x_ext_fft_files)[i]);
+        free((fk->x_ext_fft_files)[i]);
     }
     free(fk->x_ext_fft_files);
     fk->chunk_len = 0;
diff --git a/src/fk20_proofs.h b/src/fk20_proofs.h
index ee874e1..ed360c0 100644
--- a/src/fk20_proofs.h
+++ b/src/fk20_proofs.h
@@ -65,18 +65,14 @@ uint32_t reverse_bits(uint32_t a);
 uint32_t reverse_bits_limited(uint32_t n, uint32_t value);
 C_KZG_RET reverse_bit_order(void *values, size_t size, uint64_t n);
 C_KZG_RET toeplitz_part_1(blst_p1 *out, const blst_p1 *x, uint64_t n, const FFTSettings *fs);
-C_KZG_RET new_toeplitz_part_1(blst_p1 **out, const blst_p1 *x, uint64_t n, const FFTSettings *fs);
-void free_toeplitz_part_1(blst_p1 *x);
-C_KZG_RET toeplitz_part_2(blst_p1 *out, const poly *toeplitz_coeffs, const FK20SingleSettings *fk);
-C_KZG_RET new_toeplitz_part_2(blst_p1 **out, const poly *toeplitz_coeffs, const FK20SingleSettings *fk);
-void free_toeplitz_part_2(blst_p1 *x);
-C_KZG_RET toeplitz_part_3(blst_p1 *out, const blst_p1 *h_ext_fft, uint64_t n2, const FK20SingleSettings *fk);
-C_KZG_RET new_toeplitz_part_3(blst_p1 **out, const blst_p1 *h_ext_fft, uint64_t n2, const FK20SingleSettings *fk);
-void free_toeplitz_part_3(blst_p1 *x);
-C_KZG_RET new_toeplitz_coeffs_step(poly *out, const poly *in);
-void free_toeplitz_coeffs_step(poly *p);
+C_KZG_RET toeplitz_part_2(blst_p1 *out, const poly *toeplitz_coeffs, const blst_p1 *x_ext_fft, const FFTSettings *fs);
+C_KZG_RET toeplitz_part_3(blst_p1 *out, const blst_p1 *h_ext_fft, uint64_t n2, const FFTSettings *fs);
+C_KZG_RET toeplitz_coeffs_stride(poly *out, const poly *in, uint64_t offset, uint64_t stride);
+C_KZG_RET toeplitz_coeffs_step(poly *out, const poly *in);
 C_KZG_RET fk20_single_da_opt(blst_p1 *out, const poly *p, const FK20SingleSettings *fk);
 C_KZG_RET da_using_fk20_single(blst_p1 *out, const poly *p, const FK20SingleSettings *fk);
+C_KZG_RET fk20_multi_da_opt(blst_p1 *out, const poly *p, const FK20MultiSettings *fk);
+C_KZG_RET da_using_fk20_multi(blst_p1 *out, const poly *p, const FK20MultiSettings *fk);
 C_KZG_RET new_fk20_single_settings(FK20SingleSettings *fk, uint64_t n2, const KZGSettings *ks);
 C_KZG_RET new_fk20_multi_settings(FK20MultiSettings *fk, uint64_t n2, uint64_t chunk_len, const KZGSettings *ks);
 void free_fk20_single_settings(FK20SingleSettings *fk);
diff --git a/src/fk20_proofs_test.c b/src/fk20_proofs_test.c
index 08b95f9..a6dd048 100644
--- a/src/fk20_proofs_test.c
+++ b/src/fk20_proofs_test.c
@@ -18,6 +18,7 @@
 #include "debug_util.h"
 #include "test_util.h"
 #include "fk20_proofs.h"
+#include "c_kzg_util.h"
 
 void test_reverse_bits_macros(void) {
     TEST_CHECK(128 == rev_byte(1));
@@ -241,6 +242,117 @@ void fk_multi_settings(void) {
     free_fk20_multi_settings(&fk);
 }
 
+void fk_multi_0(void) {
+    FFTSettings fs;
+    KZGSettings ks;
+    FK20MultiSettings fk;
+    uint64_t n, chunk_len, chunk_count;
+    uint64_t secrets_len;
+    blst_p1 *s1;
+    blst_p2 *s2;
+    poly p;
+    uint64_t vv[] = {1, 2, 3, 4, 7, 8, 9, 10, 13, 14, 1, 15, 1, 1000, 134, 33};
+    blst_p1 commitment;
+    blst_p1 *all_proofs;
+    blst_fr *extended_coeffs, *extended_coeffs_fft;
+    blst_fr *ys, *ys2;
+    uint64_t domain_stride;
+
+    chunk_len = 16;
+    chunk_count = 32;
+    n = chunk_len * chunk_count;
+    secrets_len = 2 * n;
+
+    TEST_CHECK(C_KZG_OK == new_p1(&s1, secrets_len));
+    TEST_CHECK(C_KZG_OK == new_p2(&s2, secrets_len));
+
+    generate_trusted_setup(s1, s2, &secret, secrets_len);
+    TEST_CHECK(C_KZG_OK == new_fft_settings(&fs, 4 + 5 + 1));
+    TEST_CHECK(C_KZG_OK == new_kzg_settings(&ks, s1, s2, secrets_len, &fs));
+    TEST_CHECK(C_KZG_OK == new_fk20_multi_settings(&fk, n * 2, chunk_len, &ks));
+
+    // Create a test polynomial: 512 coefficients
+    TEST_CHECK(C_KZG_OK == new_poly(&p, n));
+    for (int i = 0; i < chunk_count; i++) {
+        for (int j = 0; j < chunk_len; j++) {
+            uint64_t v = vv[j];
+            if (j == 3) v += i;
+            if (j == 5) v += i * i;
+            fr_from_uint64(&p.coeffs[i * chunk_len + j], v);
+        }
+        fr_negate(&p.coeffs[i * chunk_len + 12], &p.coeffs[i * chunk_len + 12]);
+        fr_negate(&p.coeffs[i * chunk_len + 14], &p.coeffs[i * chunk_len + 14]);
+    }
+
+    commit_to_poly(&commitment, &p, &ks);
+
+    // Compute the multi proofs, assuming that the polynomial will be extended with zeros
+    TEST_CHECK(C_KZG_OK == new_p1(&all_proofs, 2 * chunk_count));
+    TEST_CHECK(C_KZG_OK == da_using_fk20_multi(all_proofs, &p, &fk));
+
+    // Now actually extend the polynomial with zeros
+    TEST_CHECK(C_KZG_OK == new_fr(&extended_coeffs, 2 * n));
+    for (uint64_t i = 0; i < n; i++) {
+        extended_coeffs[i] = p.coeffs[i];
+    }
+    for (uint64_t i = n; i < 2 * n; i++) {
+        extended_coeffs[i] = fr_zero;
+    }
+    TEST_CHECK(C_KZG_OK == new_fr(&extended_coeffs_fft, 2 * n));
+    TEST_CHECK(C_KZG_OK == fft_fr(extended_coeffs_fft, extended_coeffs, false, 2 * n, &fs));
+    TEST_CHECK(C_KZG_OK == reverse_bit_order(extended_coeffs_fft, sizeof extended_coeffs_fft[0], 2 * n));
+
+    // Verify the proofs
+    TEST_CHECK(C_KZG_OK == new_fr(&ys, chunk_len));
+    TEST_CHECK(C_KZG_OK == new_fr(&ys2, chunk_len));
+    domain_stride = fs.max_width / (2 * n);
+    for (uint64_t pos = 0; pos < 2 * chunk_count; pos++) {
+        uint64_t domain_pos, stride;
+        blst_fr x;
+        bool result;
+
+        domain_pos = reverse_bits_limited(2 * chunk_count, pos);
+        x = fs.expanded_roots_of_unity[domain_pos * domain_stride];
+
+        // The ys from the extended coeffients
+        for (uint64_t i = 0; i < chunk_len; i++) {
+            ys[i] = extended_coeffs_fft[chunk_len * pos + i];
+        }
+        TEST_CHECK(C_KZG_OK == reverse_bit_order(ys, sizeof ys[0], chunk_len));
+
+        // Now recreate the ys by evaluating the polynomial in the sub-domain range
+        stride = fs.max_width / chunk_len;
+        for (uint64_t i = 0; i < chunk_len; i++) {
+            blst_fr z;
+            blst_fr_mul(&z, &x, &fs.expanded_roots_of_unity[i * stride]);
+            eval_poly(&ys2[i], &p, &z);
+        }
+
+        // ys and ys2 should be equal
+        for (uint64_t i = 0; i < chunk_len; i++) {
+            TEST_CHECK(fr_equal(&ys[i], &ys2[i]));
+        }
+
+        // Verify this proof
+        TEST_CHECK(C_KZG_OK == check_proof_multi(&result, &commitment, &all_proofs[pos], &x, ys, chunk_len, &ks));
+        TEST_CHECK(true == result);
+    }
+
+    free_poly(&p);
+    free(all_proofs);
+    free(extended_coeffs);
+    free(extended_coeffs_fft);
+    free(ys);
+    free(ys2);
+    free(s1);
+    free(s2);
+    free_fft_settings(&fs);
+    free_kzg_settings(&ks);
+    free_fk20_multi_settings(&fk);
+}
+
+// TODO: compare results of fk20_multi_da_opt() and  fk20_compute_proof_multi()
+
 TEST_LIST = {
     {"FK20_PROOFS_TEST", title},
     {"test_reverse_bits_macros", test_reverse_bits_macros},
@@ -252,5 +364,6 @@ TEST_LIST = {
     {"fk_single", fk_single},
     {"fk_single_strided", fk_single_strided},
     {"fk_multi_settings", fk_multi_settings},
+    {"fk_multi_0", fk_multi_0},
     {NULL, NULL} /* zero record marks the end of the list */
 };
diff --git a/src/kzg_proofs.c b/src/kzg_proofs.c
index 937d77a..f9cb50d 100644
--- a/src/kzg_proofs.c
+++ b/src/kzg_proofs.c
@@ -210,8 +210,8 @@ C_KZG_RET new_kzg_settings(KZGSettings *ks, const blst_p1 *secret_g1, const blst
     ks->length = length;
 
     // Allocate space for the secrets
-    TRY(c_kzg_malloc((void **)&ks->secret_g1, ks->length * sizeof *ks->secret_g1));
-    TRY(c_kzg_malloc((void **)&ks->secret_g2, ks->length * sizeof *ks->secret_g2));
+    TRY(new_p1(&ks->secret_g1, ks->length));
+    TRY(new_p2(&ks->secret_g2, ks->length));
 
     // Populate the secrets
     for (uint64_t i = 0; i < ks->length; i++) {
diff --git a/src/poly.c b/src/poly.c
index 48112ed..fc95bc9 100644
--- a/src/poly.c
+++ b/src/poly.c
@@ -128,7 +128,7 @@ C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor
  */
 C_KZG_RET new_poly(poly *out, uint64_t length) {
     out->length = length;
-    return c_kzg_malloc((void **)&out->coeffs, length * sizeof *out->coeffs);
+    return new_fr(&out->coeffs, length);
 }
 
 /**
@@ -141,6 +141,8 @@ C_KZG_RET new_poly(poly *out, uint64_t length) {
  * @param[in]  length The number of coefficients, which is one more than the polynomial's degree
  * @retval C_CZK_OK      All is well
  * @retval C_CZK_MALLOC  Memory allocation failed
+ *
+ * @todo This is likely not useful. Remove?
  */
 C_KZG_RET new_poly_with_coeffs(poly *out, const blst_fr *coeffs, uint64_t length) {
     TRY(new_poly(out, length));