From 7c97751c137a86eaa4dc03659576bdcf717e2186 Mon Sep 17 00:00:00 2001 From: Jakub Nabaglo Date: Wed, 18 Aug 2021 16:45:17 -0700 Subject: [PATCH] Optimized transpose (#191) * Start work on optimized transpose * Optimized transpose: 12-14% improvement on benchmark * Transpose: Avoid bounds checks and change order of loops dynamically * Transpose: re-enable bounds checks --- src/util/mod.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/util/mod.rs b/src/util/mod.rs index 853e09f1..8e32dc04 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -38,12 +38,32 @@ pub(crate) fn transpose_poly_values(polys: Vec>) - pub fn transpose(matrix: &[Vec]) -> Vec> { let l = matrix.len(); let w = matrix[0].len(); - let mut transposed = vec![vec![F::ZERO; l]; w]; + + let mut transposed = vec![vec![]; w]; for i in 0..w { - for j in 0..l { - transposed[i][j] = matrix[j][i]; + transposed[i].reserve_exact(l); + unsafe { + // After .reserve_exact(l), transposed[i] will have capacity at least l. Hence, set_len + // will not cause the buffer to overrun. + transposed[i].set_len(l); } } + + // Optimization: ensure the larger loop is outside. + if w >= l { + for i in 0..w { + for j in 0..l { + transposed[i][j] = matrix[j][i]; + } + } + } else { + for j in 0..l { + for i in 0..w { + transposed[i][j] = matrix[j][i]; + } + } + } + transposed }