From 4ab91e4b28dcdabf28709c0acb7e33858799fdbd Mon Sep 17 00:00:00 2001
From: Balazs Komuves <bkomuves@gmail.com>
Date: Thu, 22 Jan 2026 22:37:06 +0100
Subject: [PATCH] wrote a different bigint squaring routine, but on (this
 particular machine) it's actually slower than the naive multiplication

---
 src/bn254/bigint.rs     | 46 +++++++++++++++++++++++++++++++++++++++--
 src/bn254/montgomery.rs |  9 +++++++-
 src/bn254/platform.rs   | 11 +++++++---
 3 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/src/bn254/bigint.rs b/src/bn254/bigint.rs
index 8a0dccd..aee9673 100644
--- a/src/bn254/bigint.rs
+++ b/src/bn254/bigint.rs
@@ -5,9 +5,11 @@
 
 #![allow(dead_code)]
 #![allow(non_snake_case)]
+#![allow(unused_parens)]
+#![allow(unused_imports)]
 
 use std::fmt;
-use std::cmp::Ordering;
+use std::cmp::{Ordering,min};
 
 use crate::bn254::platform::*;
 
@@ -205,11 +207,51 @@ impl<const N: usize> BigInt<N> {
     BigInt::multiply(big1,big2)
   }
 
-  // TODO: optimize this!
+  // TODO: optimize this?!
   pub fn sqr(big: &BigInt<N>) -> BigInt<{N+N}> {
     BigInt::multiply(big,big)
   }
 
+  pub fn sqr_naive(big: &BigInt<N>) -> BigInt<{N+N}> {
+    BigInt::multiply(big,big)
+  }
+
+  // -----------------------------------
+
+/*
+
+  pub fn sqr_isnt_faster(big: &BigInt<N>) -> BigInt<{N+N}> {
+
+    let mut product : [u32; N+N] = [0; N+N];
+    let mut carry   : u64 = 0;
+
+    for k in 0..(N+N-1) {
+
+      let mut sum_lo: u64 = carry;
+      let mut sum_hi: u64 = 0;
+      for i in 0..min(N,k+1) {
+        let j = k - i;
+        if j < N && i <= j {
+          let (lo,hi) = mulExt32( big.limbs[i], big.limbs[j] );
+          sum_lo += (lo as u64);
+          sum_hi += (hi as u64);
+          if i < j {
+            sum_lo += (lo as u64);
+            sum_hi += (hi as u64);
+          }
+        }
+      }
+      let (u,v) = takeApart64(sum_lo);
+      product[k] = u;
+      carry = sum_hi + (v as u64);
+    }
+
+    product[N+N-1] = (carry as u32);
+    BigInt { limbs: product }
+  }
+
+*/
+
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/bn254/montgomery.rs b/src/bn254/montgomery.rs
index 620958a..3f6dc11 100644
--- a/src/bn254/montgomery.rs
+++ b/src/bn254/montgomery.rs
@@ -83,7 +83,7 @@ impl Mont {
 
     for i in 0..8 {
       let mut carry: u32 = 0;
-      let m: u32 = truncMul32( T[i] , MONT_Q );
+      let m: u32 = mulTrunc32( T[i] , MONT_Q );
       for j in 0..8 {
         let (lo,hi) = mulAddAdd32( m, FIELD_PRIME.limbs[j], carry, T[i+j] );
         T[i+j] = lo;
@@ -136,6 +136,13 @@ impl Mont {
     Mont::redc( BigInt { limbs: tmp } )
   }
 
+  // take a small number, interpret it as modulo P, 
+  // and convert to Montgomery representation
+  pub fn convert_from_u32(x: u32) -> Mont {
+    let big: Big = BigInt::from_u32(x);
+    Mont::unsafe_convert_from_big( &big )
+  }
+
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/bn254/platform.rs b/src/bn254/platform.rs
index 550272a..85d9761 100644
--- a/src/bn254/platform.rs
+++ b/src/bn254/platform.rs
@@ -21,12 +21,11 @@ pub fn subBorrow32(x: u32, y: u32, cin: bool) -> (u32,bool) {
   u32::borrowing_sub(x,y,cin)
 }
 
-pub fn truncMul32(x: u32, y: u32) -> u32 {
+pub fn mulTrunc32(x: u32, y: u32) -> u32 {
   u32::wrapping_mul(x,y)
 }
 
-pub fn extMul32(x: u32, y: u32) -> (u32,u32) {
-  // u32::carrying_mul(x,y,0)
+pub fn mulExt32(x: u32, y: u32) -> (u32,u32) {
   u32::widening_mul(x,y)
 }
 
@@ -38,6 +37,12 @@ pub fn mulAddAdd32(x: u32, y: u32, a: u32, b: u32) -> (u32,u32) {
   u32::carrying_mul_add(x,y,a,b)
 }
 
+pub fn takeApart64(x: u64) -> (u32,u32) {
+  let lo: u32 = (x & 0x_FFFF_FFFF) as u32;
+  let hi: u32 = (x >> 32         ) as u32;
+  (lo,hi)
+}
+
 //------------------------------------------------------------------------------
 // portable version