From 58756dd824f17c360432e3ffc915a26770c68e26 Mon Sep 17 00:00:00 2001
From: Balazs Komuves <bkomuves@gmail.com>
Date: Tue, 14 Oct 2025 18:09:30 +0200
Subject: [PATCH] fast C implementation for Goldilocks field + tests for field
 implementations

---
 reference/src/Field/Class.hs                 |  55 ++++
 reference/src/Field/Goldilocks.hs            |  14 +-
 reference/src/Field/Goldilocks/Extension.hs  |  12 +
 reference/src/Field/Goldilocks/Fast.hs       | 153 +++++++++
 reference/src/Field/Goldilocks/Slow.hs       |   9 +
 reference/src/Field/Properties.hs            | 321 +++++++++++++++++++
 reference/src/Field/Tests.hs                 |  47 +++
 reference/src/Outsource/Types.hs             |  44 +++
 reference/src/cbits/compile.sh               |   4 +
 reference/src/cbits/goldilocks.c             | 250 +++++++++++++++
 reference/src/cbits/goldilocks.h             |  39 +++
 reference/src/cbits/goldilocks.o             | Bin 0 -> 5184 bytes
 reference/src/cbits/monolith.c               | 238 ++++++++++++++
 reference/src/cbits/monolith.h               |  12 +
 reference/src/cbits/monolith.o               | Bin 0 -> 9648 bytes
 reference/src/cbits/monolith_constants.inc   |  71 ++++
 reference/src/cbits/monolith_conv_uint64.inc | 267 +++++++++++++++
 17 files changed, 1532 insertions(+), 4 deletions(-)
 create mode 100644 reference/src/Field/Class.hs
 create mode 100644 reference/src/Field/Goldilocks/Fast.hs
 create mode 100644 reference/src/Field/Properties.hs
 create mode 100644 reference/src/Field/Tests.hs
 create mode 100644 reference/src/Outsource/Types.hs
 create mode 100755 reference/src/cbits/compile.sh
 create mode 100644 reference/src/cbits/goldilocks.c
 create mode 100644 reference/src/cbits/goldilocks.h
 create mode 100644 reference/src/cbits/goldilocks.o
 create mode 100644 reference/src/cbits/monolith.c
 create mode 100644 reference/src/cbits/monolith.h
 create mode 100644 reference/src/cbits/monolith.o
 create mode 100644 reference/src/cbits/monolith_constants.inc
 create mode 100644 reference/src/cbits/monolith_conv_uint64.inc

diff --git a/reference/src/Field/Class.hs b/reference/src/Field/Class.hs
new file mode 100644
index 0000000..01bce31
--- /dev/null
+++ b/reference/src/Field/Class.hs
@@ -0,0 +1,55 @@
+
+module Field.Class where
+
+--------------------------------------------------------------------------------
+
+import Data.Proxy
+
+import System.Random
+
+import qualified Field.Goldilocks           as Goldi   
+import qualified Field.Goldilocks.Extension as GoldiExt
+
+--------------------------------------------------------------------------------
+
+class (Show a, Eq a, Num a, Fractional a) => Field a where
+  fieldSize :: Proxy a -> Integer
+  zero      :: a
+  one       :: a
+  isZero    :: a -> Bool
+  isOne     :: a -> Bool
+  square    :: a -> a
+  power     :: a -> Integer -> a
+  power_    :: a -> Int     -> a
+  rndIO     :: IO a
+
+inverse :: Field a => a -> a
+inverse = recip
+
+--------------------------------------------------------------------------------
+
+instance Field Goldi.F where
+  fieldSize _ = Goldi.goldilocksPrime
+  zero        = Goldi.zero
+  one         = Goldi.one
+  isZero      = Goldi.isZero
+  isOne       = Goldi.isOne
+  square      = Goldi.sqr
+  power       = Goldi.pow
+  power_      = Goldi.pow_
+  rndIO       = randomIO
+
+--------------------------------------------------------------------------------
+
+instance Field GoldiExt.FExt where
+  fieldSize _ = (Goldi.goldilocksPrime ^ 2)
+  zero        = GoldiExt.zero
+  one         = GoldiExt.one
+  isZero      = GoldiExt.isZero
+  isOne       = GoldiExt.isOne
+  square      = GoldiExt.sqr
+  power       = GoldiExt.pow
+  power_      = GoldiExt.pow_
+  rndIO       = randomIO
+
+--------------------------------------------------------------------------------
diff --git a/reference/src/Field/Goldilocks.hs b/reference/src/Field/Goldilocks.hs
index 35787d0..971ed4e 100644
--- a/reference/src/Field/Goldilocks.hs
+++ b/reference/src/Field/Goldilocks.hs
@@ -1,8 +1,14 @@
 
-module Field.Goldilocks 
-  ( module Field.Goldilocks.Slow
-  )
-  where
+{-# LANGUAGE CPP #-}
 
+#ifdef USE_NAIVE_HASKELL
+
+module Field.Goldilocks ( module Field.Goldilocks.Slow ) where
 import Field.Goldilocks.Slow
 
+#else
+
+module Field.Goldilocks ( module Field.Goldilocks.Fast ) where
+import Field.Goldilocks.Fast
+
+#endif
diff --git a/reference/src/Field/Goldilocks/Extension.hs b/reference/src/Field/Goldilocks/Extension.hs
index 288181c..df35659 100644
--- a/reference/src/Field/Goldilocks/Extension.hs
+++ b/reference/src/Field/Goldilocks/Extension.hs
@@ -17,6 +17,7 @@ import System.Random
 import Data.Binary
 
 import Field.Goldilocks ( F )
+import qualified Field.Goldilocks as Goldi
 
 --------------------------------------------------------------------------------
 
@@ -58,6 +59,17 @@ instance Random F2 where
 
 --------------------------------------------------------------------------------
 
+zero, one, two :: F2
+zero = F2 Goldi.zero Goldi.zero 
+one  = F2 Goldi.one  Goldi.zero 
+two  = F2 Goldi.two  Goldi.zero 
+
+isZero, isOne :: F2 -> Bool
+isZero (F2 r i) = Goldi.isZero r && Goldi.isZero i
+isOne  (F2 r i) = Goldi.isOne  r && Goldi.isZero i
+
+--------------------------------------------------------------------------------
+
 inj :: F -> F2
 inj r = F2 r 0 
 
diff --git a/reference/src/Field/Goldilocks/Fast.hs b/reference/src/Field/Goldilocks/Fast.hs
new file mode 100644
index 0000000..ddeefac
--- /dev/null
+++ b/reference/src/Field/Goldilocks/Fast.hs
@@ -0,0 +1,153 @@
+
+-- | Bindings to a C implementation of the Goldilocks prime field
+
+{-# LANGUAGE ForeignFunctionInterface, BangPatterns, NumericUnderscores #-}
+module Field.Goldilocks.Fast where
+
+--------------------------------------------------------------------------------
+
+import Prelude hiding ( div )
+import qualified Prelude
+
+import Data.Bits
+import Data.Word
+import Data.Ratio
+
+import Foreign.C
+
+import System.Random
+
+import Data.Binary
+import Data.Binary.Get ( getWord64le )
+import Data.Binary.Put ( putWord64le )
+
+import Text.Printf
+
+--------------------------------------------------------------------------------
+
+type F = Goldilocks
+
+fromF :: F -> Word64
+fromF (MkGoldilocks x) = x
+
+unsafeToF :: Word64 -> F
+unsafeToF = MkGoldilocks
+
+toF :: Word64 -> F
+toF = mkGoldilocks . fromIntegral
+
+intToF :: Int -> F
+intToF = mkGoldilocks . fromIntegral
+
+instance Binary F where
+  put x = putWord64le (fromF x)
+  get = toF <$> getWord64le
+
+--------------------------------------------------------------------------------
+
+newtype Goldilocks 
+  = MkGoldilocks Word64
+  deriving Eq
+
+instance Show Goldilocks where
+  show (MkGoldilocks k) = printf "0x%016x" k
+
+zero, one, two :: Goldilocks
+zero = MkGoldilocks 0
+one  = MkGoldilocks 1
+two  = MkGoldilocks 2
+
+isZero, isOne :: Goldilocks -> Bool
+isZero (MkGoldilocks x) = x == 0
+isOne  (MkGoldilocks x) = x == 1
+
+--------------------------------------------------------------------------------
+
+instance Num Goldilocks where
+  fromInteger = mkGoldilocks
+  negate = neg
+  (+)    = add
+  (-)    = sub
+  (*)    = mul
+  abs    = id
+  signum _ = MkGoldilocks 1
+
+instance Fractional Goldilocks where
+  fromRational y = fromInteger (numerator y) `div` fromInteger (denominator y)
+  recip  = inv
+  (/)    = div
+
+instance Random Goldilocks where
+  -- random :: RandomGen g => g -> (a, g) 
+  random  g = let (x,g') = randomR (0,goldilocksPrimeWord64-1) g in (MkGoldilocks x, g')
+  randomR = error "randomR/Goldilocks: doesn't make much sense"
+
+--------------------------------------------------------------------------------
+
+-- | @p = 2^64 - 2^32 + 1@
+goldilocksPrime :: Integer
+goldilocksPrime = 0x_ffff_ffff_0000_0001
+
+goldilocksPrimeWord64 :: Word64
+goldilocksPrimeWord64 = 0x_ffff_ffff_0000_0001
+
+modp :: Integer -> Integer
+modp a = mod a goldilocksPrime
+
+mkGoldilocks :: Integer -> Goldilocks
+mkGoldilocks = MkGoldilocks . fromInteger . modp
+
+-- | A fixed generator of the multiplicative subgroup of the field
+theMultiplicativeGenerator :: Goldilocks
+theMultiplicativeGenerator = mkGoldilocks 7
+
+--------------------------------------------------------------------------------
+
+foreign import ccall unsafe "goldilocks_neg" c_goldilocks_neg :: Word64 -> Word64
+foreign import ccall unsafe "goldilocks_add" c_goldilocks_add :: Word64 -> Word64 -> Word64
+foreign import ccall unsafe "goldilocks_sub" c_goldilocks_sub :: Word64 -> Word64 -> Word64
+foreign import ccall unsafe "goldilocks_sqr" c_goldilocks_sqr :: Word64 -> Word64
+foreign import ccall unsafe "goldilocks_mul" c_goldilocks_mul :: Word64 -> Word64 -> Word64
+foreign import ccall unsafe "goldilocks_inv" c_goldilocks_inv :: Word64 -> Word64
+foreign import ccall unsafe "goldilocks_div" c_goldilocks_div :: Word64 -> Word64 -> Word64
+foreign import ccall unsafe "goldilocks_pow" c_goldilocks_pow :: Word64 -> CInt   -> Word64
+
+neg :: Goldilocks -> Goldilocks
+neg (MkGoldilocks k) = MkGoldilocks (c_goldilocks_neg k) 
+
+add :: Goldilocks -> Goldilocks -> Goldilocks
+add (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_add a b) 
+
+sub :: Goldilocks -> Goldilocks -> Goldilocks
+sub (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_sub a b) 
+
+sqr :: Goldilocks -> Goldilocks
+sqr (MkGoldilocks a)  = MkGoldilocks (c_goldilocks_sqr a) 
+
+mul :: Goldilocks -> Goldilocks -> Goldilocks
+mul (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_mul a b) 
+
+inv :: Goldilocks -> Goldilocks
+inv (MkGoldilocks a)  = MkGoldilocks (c_goldilocks_inv a) 
+
+div :: Goldilocks -> Goldilocks -> Goldilocks
+div (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_div a b) 
+
+--------------------------------------------------------------------------------
+
+pow_ :: Goldilocks -> Int -> Goldilocks
+pow_ (MkGoldilocks x) e = MkGoldilocks $ c_goldilocks_pow x (fromIntegral e :: CInt)
+
+pow :: Goldilocks -> Integer -> Goldilocks
+pow x e 
+  | e == 0    = 1
+  | e <  0    = pow (inv x) (negate e)
+  | otherwise = go 1 x e
+  where
+    go !acc _  0     = acc
+    go !acc !s !expo = case expo .&. 1 of
+      0 -> go acc     (sqr s) (shiftR expo 1)
+      _ -> go (acc*s) (sqr s) (shiftR expo 1)
+
+--------------------------------------------------------------------------------
+
diff --git a/reference/src/Field/Goldilocks/Slow.hs b/reference/src/Field/Goldilocks/Slow.hs
index 97dbb97..1692f6a 100644
--- a/reference/src/Field/Goldilocks/Slow.hs
+++ b/reference/src/Field/Goldilocks/Slow.hs
@@ -47,6 +47,15 @@ newtype Goldilocks
 instance Show Goldilocks where
   show (MkGoldilocks k) = printf "0x%016x" k
 
+zero, one, two :: Goldilocks
+zero = MkGoldilocks 0
+one  = MkGoldilocks 1
+two  = MkGoldilocks 2
+
+isZero, isOne :: Goldilocks -> Bool
+isZero (MkGoldilocks x) = x == 0
+isOne  (MkGoldilocks x) = x == 1
+
 --------------------------------------------------------------------------------
 
 instance Num Goldilocks where
diff --git a/reference/src/Field/Properties.hs b/reference/src/Field/Properties.hs
new file mode 100644
index 0000000..972b9e2
--- /dev/null
+++ b/reference/src/Field/Properties.hs
@@ -0,0 +1,321 @@
+
+
+-- | Property tests for rings and fields
+
+{-# LANGUAGE ScopedTypeVariables, Rank2Types, TypeApplications, FlexibleInstances, ConstraintKinds #-}
+module Field.Properties where
+
+--------------------------------------------------------------------------------
+
+import Data.Proxy
+import Data.IORef
+
+import Control.Monad
+import System.IO
+import System.Random
+
+import Field.Class
+
+--------------------------------------------------------------------------------
+-- compatibility hacks
+
+type Ring a = Field a
+
+--------------------------------------------------------------------------------
+
+runFieldTests :: forall a. Field a => IORef Bool -> Int -> Proxy a -> IO ()
+runFieldTests okflag n pxy = do
+  runRingTests      okflag n pxy
+  runFieldOnlyTests okflag n pxy 
+
+
+runRingTests :: forall a. Ring a => IORef Bool -> Int -> Proxy a -> IO ()
+runRingTests okflag n pxy = do
+
+  forM_ ringProps $ \prop -> case prop of
+  
+    RingProp1 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      return (test x) 
+
+    RingProp2 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      y <- rndIO @a
+      return (test x y) 
+
+    RingProp3 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      y <- rndIO @a
+      z <- rndIO @a
+      return (test x y z) 
+
+runFieldOnlyTests :: forall a. Field a => IORef Bool -> Int -> Proxy a -> IO ()
+runFieldOnlyTests okflag n pxy = do
+
+  forM_ fieldOnlyProps $ \prop -> case prop of
+  
+    FieldProp1 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      return (test x) 
+
+    FieldProp2 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      y <- rndIO @a
+      return (test x y) 
+
+    FieldProp3 test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      y <- rndIO @a
+      z <- rndIO @a
+      return (test x y z) 
+
+    FieldPropE test name -> doTests okflag n name $ do
+      x <- rndIO @a
+      e <- randomRIO (-1000,1000::Int)
+      return (test x e) 
+
+--------------------------------------------------------------------------------
+
+doTests :: IORef Bool -> Int -> String -> IO Bool -> IO Bool
+doTests okflag n name testAction = 
+  do
+    let str = " - " ++ name ++ "... " 
+    putStr $ str ++ replicate (30 - length str) ' '
+    hFlush stdout
+    oks <- forM [1..n] $ \i -> testAction
+    let ok = and oks
+    case ok of
+      True  -> putStrLn $ "ok (passed " ++ show n ++ " tests)"
+      False -> do
+        writeIORef okflag False
+        putStrLn $ "FAILED!! (FAILED " ++ show (countFalses oks) ++ " tests!)"
+    return ok
+  where
+    countFalses :: [Bool] -> Int
+    countFalses = length . filter (==False)
+
+--------------------------------------------------------------------------------
+
+data RingProp
+  = RingProp1  (forall a. Ring a  => a -> Bool          ) String
+  | RingProp2  (forall a. Ring a  => a -> a -> Bool     ) String
+  | RingProp3  (forall a. Ring a  => a -> a -> a -> Bool) String
+
+data FieldProp
+  = FieldProp1 (forall a. Field a => a -> Bool          ) String
+  | FieldProp2 (forall a. Field a => a -> a -> Bool     ) String
+  | FieldProp3 (forall a. Field a => a -> a -> a -> Bool) String
+  | FieldPropE (forall a. Field a => a -> Int -> Bool   ) String
+
+--------------------------------------------------------------------------------
+
+ringProps :: [RingProp]
+ringProps = 
+  [ RingProp1 prop_add_left_unit               "add left unit"
+  , RingProp1 prop_add_right_unit              "add right unit"
+  , RingProp1 prop_add_left_inv                "add left inv"
+  , RingProp1 prop_add_right_inv               "add right inv"
+  , RingProp2 prop_add_commutative             "add comm"
+  , RingProp3 prop_add_associative             "add assoc"
+  , RingProp2 prop_sub_def                     "sub def"
+  , RingProp3 prop_add_sub_associative_1       "add-sub assoc /1"
+  , RingProp3 prop_add_sub_associative_2       "add-sub assoc /2"
+  , RingProp3 prop_add_sub_associative_3       "add-sub assoc /3"
+  , RingProp1 prop_is_zero                     "is zero"
+  , RingProp1 prop_is_one                      "is one"
+  , RingProp1 prop_is_equal                    "is equal"
+  , RingProp1 prop_mul_left_unit               "mul left unit"
+  , RingProp1 prop_mul_right_unit              "mul right unit"
+  , RingProp2 prop_mul_commutative             "mul comm"
+  , RingProp3 prop_mul_associative             "mul assoc"
+  , RingProp1 prop_square_def                  "square def"
+  , RingProp2 prop_square_distrib              "square distributive"
+  , RingProp3 prop_add_mul_left_distributive   "add+mul left distr"
+  , RingProp3 prop_add_mul_right_distributive  "add+mul right distr"
+  , RingProp3 prop_sub_mul_left_distributive   "sub+mul left distr"
+  , RingProp3 prop_sub_mul_right_distributive  "sub+mul right distr"
+  , RingProp1 prop_power_0                     "0-th power"
+  , RingProp1 prop_power_1                     "1-th power"
+  , RingProp1 prop_power_2                     "2-th power"
+  , RingProp1 prop_power_3                     "3-th power"
+  , RingProp1 prop_power_4                     "4-th power"
+  , RingProp1 prop_power_5                     "5-th power"
+  ]
+
+fieldOnlyProps :: [FieldProp]
+fieldOnlyProps = 
+  [ FieldProp1 prop_mul_left_inv               "mul left inf"
+  , FieldProp1 prop_mul_right_inv              "mul right inf"
+  , FieldProp2 prop_div_def                    "div def"
+  , FieldProp1 prop_inv_def                    "inv def"
+  , FieldProp2 prop_div_test                   "div defining prop."
+  , FieldProp1 prop_inv_fermat                 "inv == fermat"
+  , FieldProp1 prop_fermat_1                   "fermat/1"
+  , FieldProp1 prop_fermat_2                   "fermat/2"
+  , FieldPropE prop_power_vs_power_            "power vs. power_"
+  , FieldProp1 prop_power_neg                  "negative power"
+  , FieldProp3 prop_mul_div_associative_1      "mul-div assoc /1"
+  , FieldProp3 prop_mul_div_associative_2      "mul-div assoc /2"
+  , FieldProp3 prop_mul_div_associative_3      "mul-div assoc /3"
+--  , FieldProp3 prop_batch_inverse              "batch inverse"
+--  , FieldProp1 prop_frobenius                  "frobenius == frobeniusNaive"
+  ]
+
+--------------------------------------------------------------------------------
+-- * Ring properties
+
+prop_add_left_unit :: Ring a => a -> Bool
+prop_add_left_unit x = zero + x == x
+
+prop_add_right_unit :: Ring a => a -> Bool
+prop_add_right_unit x = x + zero == x
+
+prop_add_left_inv :: Ring a => a -> Bool
+prop_add_left_inv x = (negate x) + x == zero
+
+prop_add_right_inv :: Ring a => a -> Bool
+prop_add_right_inv x = x + (negate x) == zero
+
+prop_add_commutative :: Ring a => a -> a -> Bool
+prop_add_commutative x y = (x + y == y + x)
+
+prop_add_associative :: Ring a => a -> a -> a -> Bool
+prop_add_associative x y z = ((x + y) + z) == (x + (y + z))
+
+prop_sub_def :: Ring a => a -> a -> Bool
+prop_sub_def x y = (x + (negate y) == x - y)
+
+prop_add_sub_associative_1 :: Ring a => a -> a -> a -> Bool
+prop_add_sub_associative_1 x y z = ((x + y) - z) == (x + (y - z))
+
+prop_add_sub_associative_2 :: Ring a => a -> a -> a -> Bool
+prop_add_sub_associative_2 x y z = ((x - y) + z) == (x - (y - z))
+
+prop_add_sub_associative_3 :: Ring a => a -> a -> a -> Bool
+prop_add_sub_associative_3 x y z = ((x - y) - z) == (x - (y + z))
+
+----------------------------------------
+
+prop_is_zero :: forall a. Ring a => a -> Bool
+prop_is_zero x = isZero (zero @a) && isZero x == (x == 0)
+
+prop_is_one :: forall a. Ring a => a -> Bool
+prop_is_one x = isOne (one @a) && isOne x == (x == 1)
+
+prop_is_equal :: forall a. Ring a => a -> Bool
+prop_is_equal x = and
+  [ zero  == zero @a
+  , zero  /= one  @a
+  , one   /= zero @a
+  , one   == one  @a
+  , x     == x 
+  , (x+1) /= x    
+  , x     /= (x+1)
+  ]
+
+----------------------------------------
+
+prop_mul_left_unit :: Ring a => a -> Bool
+prop_mul_left_unit x = (one * x == x)
+
+prop_mul_right_unit :: Ring a => a -> Bool
+prop_mul_right_unit x = (x * one == x)
+
+prop_mul_commutative :: Ring a => a -> a -> Bool
+prop_mul_commutative x y = (x * y == y * x)
+
+prop_mul_associative :: Ring a => a -> a -> a -> Bool
+prop_mul_associative x y z = ((x * y) * z) == (x * (y * z))
+
+prop_square_def :: Ring a => a -> Bool
+prop_square_def x = (square x == x*x)
+
+prop_square_distrib :: Ring a => a -> a -> Bool
+prop_square_distrib x y =  (square (x+y) == square x + 2*x*y + square y)
+                        && (square (x-y) == square x - 2*x*y + square y)
+
+----------------------------------------
+
+prop_add_mul_left_distributive :: Ring a => a -> a -> a -> Bool
+prop_add_mul_left_distributive x y z = (x + y) * z ==  x*z + y*z
+
+prop_add_mul_right_distributive :: Ring a => a -> a -> a -> Bool
+prop_add_mul_right_distributive x y z = x * (y + z) ==  x*y + x*z
+
+prop_sub_mul_left_distributive :: Ring a => a -> a -> a -> Bool
+prop_sub_mul_left_distributive x y z = (x - y) * z ==  x*z - y*z
+
+prop_sub_mul_right_distributive :: Ring a => a -> a -> a -> Bool
+prop_sub_mul_right_distributive x y z = x * (y - z) ==  x*y - x*z
+
+--------------------------------------------------------------------------------
+
+prop_power_0 :: Ring a => a -> Bool
+prop_power_0 x = power x 0 == (if x == 0 then zero else one)
+
+prop_power_1 :: Ring a => a -> Bool
+prop_power_1 x = power x 1 == x
+
+prop_power_2 :: Ring a => a -> Bool
+prop_power_2 x = power x 2 == x *x
+
+prop_power_3 :: Ring a => a -> Bool
+prop_power_3 x = power x 3 == x*x*x
+
+prop_power_4 :: Ring a => a -> Bool
+prop_power_4 x = power x 4 == x*x*x*x
+
+prop_power_5 :: Ring a => a -> Bool
+prop_power_5 x = power x 5 == x*x*x*x*x
+
+--------------------------------------------------------------------------------
+-- * Field properties
+
+prop_mul_left_inv :: Field a => a -> Bool
+prop_mul_left_inv x = isZero x || (inverse x) * x == one
+
+prop_mul_right_inv :: Field a => a -> Bool
+prop_mul_right_inv x = isZero x || x * (inverse x) == one 
+
+prop_div_def :: Field a => a -> a -> Bool
+prop_div_def x y = (x * (inverse y) == x / y)
+
+prop_inv_def :: Field a => a -> Bool
+prop_inv_def x = (inverse x == 1 / x)
+
+prop_div_test :: Field a => a -> a -> Bool
+prop_div_test x y = isZero y || (x/y)*y == x
+
+prop_inv_fermat :: forall a. Field a => a -> Bool
+prop_inv_fermat x = (inverse x) == power x (p - 2) where p = fieldSize (Proxy @a)
+
+prop_fermat_1 :: forall a. Field a => a -> Bool
+prop_fermat_1 x = power x p == x where p = fieldSize (Proxy @a)
+
+prop_fermat_2 :: forall a. Field a => a -> Bool
+prop_fermat_2 x = power x (p - 1) == one where p = fieldSize (Proxy @a)
+
+prop_power_vs_power_ :: forall a. Field a => a -> Int -> Bool
+prop_power_vs_power_ x e = power x (fromIntegral e) == power_ x e
+
+prop_power_neg :: forall a. Field a => a -> Bool
+prop_power_neg x = power x (-1) == inverse x
+
+prop_mul_div_associative_1 :: Field a => a -> a -> a -> Bool
+prop_mul_div_associative_1 x y z = ((x * y) / z) == (x * (y / z))
+
+prop_mul_div_associative_2 :: Field a => a -> a -> a -> Bool
+prop_mul_div_associative_2 x y z = ((x / y) * z) == (x / (y / z))
+
+prop_mul_div_associative_3 :: Field a => a -> a -> a -> Bool
+prop_mul_div_associative_3 x y z = ((x / y) / z) == (x / (y * z))
+
+-- prop_batch_inverse :: Field a => a -> a -> a -> Bool
+-- prop_batch_inverse x y z = any (==0) as || (map recip as == bs) where
+--   as = [ x,y,z, x+y, y+z, z+x, x+y+z ]
+--   bs = batchInverse as
+
+-- prop_frobenius :: Field a => a -> Bool
+-- prop_frobenius x = (frobenius x == frobeniusNaive x)
+
+--------------------------------------------------------------------------------
diff --git a/reference/src/Field/Tests.hs b/reference/src/Field/Tests.hs
new file mode 100644
index 0000000..423ab8a
--- /dev/null
+++ b/reference/src/Field/Tests.hs
@@ -0,0 +1,47 @@
+
+module Field.Tests where
+
+--------------------------------------------------------------------------------
+
+import Control.Monad
+
+import Data.Proxy
+import Data.IORef
+
+import Field.Class
+import Field.Properties
+
+import Field.Goldilocks           ( F    )
+import Field.Goldilocks.Extension ( FExt )
+
+--------------------------------------------------------------------------------
+
+nn = 1000
+
+runMyFieldTests :: IO Bool
+runMyFieldTests = do 
+  ok1 <- runGoldilocksTests
+  ok2 <- runGoldilocksExtensionTests
+  return (ok1 && ok2)
+
+--------------------------------------------------------------------------------
+
+runGoldilocksTests :: IO Bool
+runGoldilocksTests = do
+  putStrLn "\nTests for the Goldilocks field:"
+  putStrLn   "==============================="
+  okflag <- newIORef True
+  runFieldTests okflag nn (Proxy @F)
+  readIORef okflag
+
+runGoldilocksExtensionTests :: IO Bool
+runGoldilocksExtensionTests = do
+  putStrLn "\nTests for the Goldilocks quadratic extension field:"
+  putStrLn   "==================================================="
+  okflag <- newIORef True
+  runFieldTests okflag nn (Proxy @FExt)
+  readIORef okflag
+
+--------------------------------------------------------------------------------
+
+
diff --git a/reference/src/Outsource/Types.hs b/reference/src/Outsource/Types.hs
new file mode 100644
index 0000000..b7cb5c5
--- /dev/null
+++ b/reference/src/Outsource/Types.hs
@@ -0,0 +1,44 @@
+
+{-# LANGUAGE StrictData, RecordWildCards #-}
+module Outsource.Types where
+
+--------------------------------------------------------------------------------
+
+import FRI.Types
+
+--------------------------------------------------------------------------------
+
+-- | The type parameter is only there, because in the proof, we don't want to
+-- repeat the FRI configuration (which is already included in the FRI proof). 
+--
+-- It's a bit ugly, but hey this is just a prototype anyway!
+--
+data OutsourceConfig' friconfig = MkOutsourceConfig
+  { outsrcFriConfig   :: friconfig     -- ^ the FRI protocol configuration
+  , outsrcKeepParity  :: Log2          -- ^ how much parity data to keep: Original data size times @2^(-k)@
+  }
+  deriving (Eq,Show)
+
+type OutsourceConfigFull = OutsourceConfig' FriConfig
+type OutsourceConfig_    = OutsourceConfig' ()
+
+-- | The size of the rows (= number of columns in the data matrix)
+outSrcNColumns :: OutsourceConfigFull -> Int
+outSrcNColumns = friNColumns . outsrcFriConfig
+
+--------------------------------------------------------------------------------
+
+-- | Proof that the outsourcing of Reed-Solomon is done correctly.
+--
+-- This is checked against the original data Merkle root and RS-encoded Merkle root
+data OutsourceProof = MkOutsourceProof
+  { outsrcConfig     :: OutsourceConfig' ()      -- ^ we don't want to repeat the FRI configuration...
+  , outsrcFriProof   :: FriProof                 -- ^ ...which is already included in the FRI proof
+  , outsrcConnection :: ConnectionProof          -- ^ connect the original data to the parity data
+  }
+  deriving (Eq,Show)
+
+data ConnectionProof = MkConnectionProof
+  -- TODO
+
+--------------------------------------------------------------------------------
diff --git a/reference/src/cbits/compile.sh b/reference/src/cbits/compile.sh
new file mode 100755
index 0000000..628824a
--- /dev/null
+++ b/reference/src/cbits/compile.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+gcc -c -O2 goldilocks.c
+gcc -c -O2 monolith.c
diff --git a/reference/src/cbits/goldilocks.c b/reference/src/cbits/goldilocks.c
new file mode 100644
index 0000000..c404fba
--- /dev/null
+++ b/reference/src/cbits/goldilocks.c
@@ -0,0 +1,250 @@
+
+#include <stdint.h>
+#include <stdio.h>      // for testing only
+#include <assert.h>
+
+#include "goldilocks.h"
+
+//------------------------------------------------------------------------------
+
+#define GOLDILOCKS_HALFPRIME_PLUS1 0x7fffffff80000001
+
+//------------------------------------------------------------------------------
+// *** Goldilocks field ***
+
+int goldilocks_isvalid(uint64_t x) {
+  return (x < GOLDILOCKS_PRIME);
+}
+
+uint64_t goldilocks_neg(uint64_t x) {
+  return (x==0) ? 0 : (GOLDILOCKS_PRIME - x);
+}
+
+uint64_t goldilocks_add(uint64_t x, uint64_t y) {
+  uint64_t z = x + y;
+  return ( (z >= GOLDILOCKS_PRIME) || (z<x) ) ? (z - GOLDILOCKS_PRIME) : z;
+}
+
+uint64_t goldilocks_add_to_uint64(uint64_t x, uint64_t y) {
+  uint64_t z = x + y;
+  return (z<x) ? (z - GOLDILOCKS_PRIME) : z;
+}
+
+uint64_t goldilocks_sub(uint64_t x, uint64_t y) {
+  uint64_t z = x - y;
+  return (z > x) ? (z + GOLDILOCKS_PRIME) : z;
+}
+
+uint64_t goldilocks_sub_safe(uint64_t x, uint64_t y) {
+  return goldilocks_add( x , goldilocks_neg(y) );
+}
+
+//--------------------------------------
+
+uint64_t goldilocks_rdc(__uint128_t x) {
+  // x = n0 + 2^64 * n1 + 2^96 * n2
+  uint64_t n0 = (uint64_t)x;
+  uint64_t n1 = (x >> 64) & 0xffffffff;
+  uint64_t n2 = (x >> 96);
+  
+  uint64_t mid = (n1 << 32) - n1;     // (2^32 - 1) * n1
+  uint64_t tmp = n0 + mid;
+  if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
+
+  uint64_t res = tmp - n2;
+  if (res > tmp) { res += GOLDILOCKS_PRIME; }
+  return (res >= GOLDILOCKS_PRIME) ? (res - GOLDILOCKS_PRIME) : res;
+}
+
+// reduce to 64-bit, but it can be still bigger than `p`
+uint64_t goldilocks_rdc_to_uint64(__uint128_t x) {
+  // x = n0 + 2^64 * n1 + 2^96 * n2
+  uint64_t n0 = (uint64_t)x;
+  uint64_t n1 = (x >> 64) & 0xffffffff;
+  uint64_t n2 = (x >> 96);
+  
+  uint64_t mid = (n1 << 32) - n1;     // (2^32 - 1) * n1
+  uint64_t tmp = n0 + mid;
+  if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
+
+  uint64_t res = tmp - n2;
+  if (res > tmp) { res += GOLDILOCKS_PRIME; }
+  return res;
+}
+
+// we assume x < 2^96
+uint64_t goldilocks_rdc_small(__uint128_t x) {
+  // x = n0 + 2^64 * n1
+  uint64_t n0 = (uint64_t)x;
+  uint64_t n1 = (x >> 64);
+
+  uint64_t mid = (n1 << 32) - n1;     // (2^32 - 1) * n1
+  uint64_t tmp = n0 + mid;
+  if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
+
+  uint64_t res = tmp;
+  return (res >= GOLDILOCKS_PRIME) ? (res - GOLDILOCKS_PRIME) : res;
+}
+
+//--------------------------------------
+
+uint64_t goldilocks_mul(uint64_t x, uint64_t y) {
+  __uint128_t z = (__uint128_t)x * (__uint128_t)y;
+  return goldilocks_rdc(z); 
+}
+
+uint64_t goldilocks_mul_to_uint64(uint64_t x, uint64_t y) {
+  __uint128_t z = (__uint128_t)x * (__uint128_t)y;
+  return goldilocks_rdc_to_uint64(z); 
+}
+
+uint64_t goldilocks_mul_add128(uint64_t x, uint64_t y, __uint128_t z) {
+  __uint128_t w = (__uint128_t)x * (__uint128_t)y + z;
+  return goldilocks_rdc(w); 
+}
+
+uint64_t goldilocks_sqr(uint64_t x) {
+  __uint128_t z = (__uint128_t)x * (__uint128_t)x;
+  return goldilocks_rdc(z); 
+}
+
+uint64_t goldilocks_sqr_add(uint64_t x, uint64_t y) {
+  __uint128_t z = (__uint128_t)x * x + y;
+  return goldilocks_rdc(z); 
+}
+
+// only reduce to uint64, not to [0..p-1]
+uint64_t goldilocks_sqr_add_to_uint64(uint64_t x, uint64_t y) {
+  __uint128_t z = (__uint128_t)x * x + y;
+  return goldilocks_rdc_to_uint64(z); 
+}
+
+uint64_t goldilocks_mul_small(uint64_t x, uint32_t y) {
+  __uint128_t z = (__uint128_t)x * (__uint128_t)y;
+  return goldilocks_rdc_small(z); 
+}
+
+//------------------------------------------------------------------------------
+
+uint64_t goldilocks_euclid(uint64_t x0, uint64_t y0, uint64_t u0, uint64_t v0) {
+
+  uint64_t x = x0;
+  uint64_t y = y0;
+  uint64_t u = u0;
+  uint64_t v = v0;
+
+  while( ( (u!=1) && (v!=1) ) ) {
+
+    while (!(u & 1ull)) {
+      u = u >> 1;
+      int odd = x & 1ull;
+      x = x >> 1;
+      if (odd) { x += GOLDILOCKS_HALFPRIME_PLUS1; }
+    }
+
+    while (!(v & 1ull)) {
+      v = v >> 1;
+      int odd = y & 1ull;
+      y = y >> 1;
+      if (odd) { y += GOLDILOCKS_HALFPRIME_PLUS1; }
+    }
+
+    if (u < v) {
+      // u-v < 0, that is, u < v
+      v = v - u;
+      y = goldilocks_sub(y , x);
+    }
+    else {
+      // u-v >= 0, that is, u >= v
+      u = u - v;
+      x = goldilocks_sub(x , y);
+    }
+  
+  }
+
+  if (u == 1) { 
+    return x;
+  } 
+  else { 
+    return y;
+  }
+}
+
+uint64_t goldilocks_div(uint64_t a, uint64_t b) {
+  return goldilocks_euclid(a,0,b,GOLDILOCKS_PRIME);
+}
+
+uint64_t goldilocks_inv(uint64_t a) {
+  return goldilocks_div(1, a);
+}
+
+//------------------------------------------------------------------------------
+
+uint64_t goldilocks_pow(uint64_t base, int expo) {
+  if (expo == 0) { return 1; }
+  if (expo <  0) { return goldilocks_pow( goldilocks_inv(base) , -expo ); }
+
+  int      e   = expo;
+  uint64_t sq  = base;
+  uint64_t acc = 1;
+
+  while (e != 0) { 
+    if ((e & 1) != 0) {
+      acc = goldilocks_mul( acc, sq );
+    } 
+    if (e > 0) {
+      sq = goldilocks_mul( sq , sq );
+      e  = e >> 1;
+    }
+  }
+
+  return acc;
+}
+
+//==============================================================================
+// *** debugging ***
+
+void debug_print_state(const char *msg, int n, uint64_t *state) {
+  printf("-----------------\n");
+  printf("%s\n",msg);
+  for(int i=0;i<n;i++) {
+    printf(" - 0x%016llx = %llu\n",state[i],state[i]);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#define MASK 0x3fffffffffffffffULL
+
+// NOTE: we assume a little-endian architecture
+void goldilocks_convert_31_bytes_to_4_field_elements(const uint8_t *ptr, uint64_t *felts) {
+  const uint64_t *q0  = (const uint64_t*)(ptr   );
+  const uint64_t *q7  = (const uint64_t*)(ptr+ 7);
+  const uint64_t *q15 = (const uint64_t*)(ptr+15);
+  const uint64_t *q23 = (const uint64_t*)(ptr+23);
+
+  felts[0] =  (q0 [0]) & MASK;
+  felts[1] = ((q7 [0]) >> 6) | ((uint64_t)(ptr[15] & 0x0f) << 58);
+  felts[2] = ((q15[0]) >> 4) | ((uint64_t)(ptr[23] & 0x03) << 60); 
+  felts[3] = ((q23[0]) >> 2);
+}
+
+void goldilocks_convert_bytes_to_field_elements(int rate, const uint8_t *ptr, uint64_t *felts) {
+  switch(rate) {
+
+    case 4:
+      goldilocks_convert_31_bytes_to_4_field_elements(ptr, felts);
+      break;
+
+    case 8:
+      goldilocks_convert_31_bytes_to_4_field_elements(ptr   , felts  ); 
+      goldilocks_convert_31_bytes_to_4_field_elements(ptr+31, felts+4);
+      break;
+
+    default:
+      assert( 0 );
+      break;
+  }
+}
+
+//------------------------------------------------------------------------------
diff --git a/reference/src/cbits/goldilocks.h b/reference/src/cbits/goldilocks.h
new file mode 100644
index 0000000..c53af1e
--- /dev/null
+++ b/reference/src/cbits/goldilocks.h
@@ -0,0 +1,39 @@
+
+#include <stdint.h>
+
+//------------------------------------------------------------------------------
+
+#define GOLDILOCKS_PRIME 0xffffffff00000001
+
+//------------------------------------------------------------------------------
+
+int goldilocks_isvalid(uint64_t x);
+
+uint64_t goldilocks_neg(uint64_t x);
+uint64_t goldilocks_add(uint64_t x, uint64_t y);
+uint64_t goldilocks_sub(uint64_t x, uint64_t y);
+uint64_t goldilocks_sqr(uint64_t x);
+uint64_t goldilocks_mul(uint64_t x, uint64_t y);
+uint64_t goldilocks_mul_small(uint64_t x, uint32_t y);
+uint64_t goldilocks_inv(uint64_t a);
+uint64_t goldilocks_div(uint64_t a, uint64_t b);
+uint64_t goldilocks_pow(uint64_t b, int e);
+
+//------------------------------------------------------------------------------
+
+uint64_t goldilocks_rdc          (__uint128_t x);
+uint64_t goldilocks_rdc_to_uint64(__uint128_t x);
+uint64_t goldilocks_rdc_small    (__uint128_t x);
+
+uint64_t goldilocks_mul_to_uint64    (uint64_t x, uint64_t y);
+uint64_t goldilocks_mul_add128       (uint64_t x, uint64_t y, __uint128_t z);
+uint64_t goldilocks_sqr_add          (uint64_t x, uint64_t y);
+uint64_t goldilocks_sqr_add_to_uint64(uint64_t x, uint64_t y);
+uint64_t goldilocks_mul_small        (uint64_t x, uint32_t y);
+
+//------------------------------------------------------------------------------
+
+void goldilocks_convert_31_bytes_to_4_field_elements (      const uint8_t *ptr, uint64_t *felts );
+void goldilocks_convert_bytes_to_field_elements ( int rate, const uint8_t *ptr, uint64_t *felts );
+
+//------------------------------------------------------------------------------
diff --git a/reference/src/cbits/goldilocks.o b/reference/src/cbits/goldilocks.o
new file mode 100644
index 0000000000000000000000000000000000000000..3f202973f7053814fdd35ff8772d3731add222a6
GIT binary patch
literal 5184
zcmb_gZ%kX)6+hPx$g_<dXqlSi!)&jqW<x-bMhTn3FszgEhi(<})O@J)>=+VAjDcWM
zs(>)tG-WbLV2QNsQ3*En1AmfiidJdbmoiP2l6o}R(DrE`*f$p^S(+skbfN5b-n%cs
zI9B$jSGw=qbAI=n`@8qtd*8EfeSY_gQX)Yp{3u4f!3{4e%68Ox12xpV$2K@~$;dvI
zz6Ca}3WF$`mS`VHm~B%JedkE%scyA?t(q;4XXB=uXe-MiO0uwurbYV_r#j=ur9snj
z<5_Vc62I)QJWXlD-e5w!?yjClG@<p!-|mdJX<Ex`8(`XImzp&pr#ZI)@Z0u`kh@`H
zySXebbVfy~K`leA$E3WF%!B(Kf&Mb&*>a-a!iezn1}YaGn(IrbUQ~+$n*VXh{4jjo
zLuz*E>yta?745IAtAf%VqEDBUvpU;UO1=;vn)3@awY+a~M|L=Ty-_%`*`H1BI3XOB
znxI}P|4Ck7@|iY{X(y3of2w<GQ-w%3Vw?=dv11I3J7*X6Cu4Xvjo~df`S>4)7XyQ+
zO}?~)xxueWI8vP$FN?7-PS!4*DSsW}!oCc?9^uG(M+*B_qTfOIFF)~jq`dIw`TH>z
zb2a#3JO?-)@NsN#-5h(6CIW-=6P0V>#j2t3VrNqz?Sv0FVQ%0AzfX)cc7UT@a&-RR
zah`IW$N1nNY;&qme&+xWCwO!;`3)Ybu>V&)RK&7>3G-1A!*1{r;N|%ec>QnjtaZvL
zqS8J@6Y9|XgahkyK&Z$s*5*0+)rxQ~iYPP;IR%*+j$HRs@&V`6CTQQXVw-J#+_dOK
zjbXogDEJ^OM)gDMTlXpkuFOV(RY|-NctqldNI{N7HimtXs!QoHvZbquve%$5)Sc5c
zQnEhpIlbBU13l^;)x*A|&V0gT@)3{ML#z{x82h#nZErD!aDM;H{FcGB5?uL=2ynTh
zh*=+axKOWQ|1;Mqab9l$SEqqHXHTvHM>TK}d@wEXUF1x=aBo=L;y52$1!eDp>(<>D
z^hR=KRo?}D#5ba!^qtqIz?JzJT#M;b=E`~#xMviaT~gFpK5HJ>BCrK0*;++Z!5D!*
zpUW)xSAjoccc9}u?z;;=75r*YZ{d73g5ynbL4O@{C^hiX*->C_%#m@P<GjROqBLCt
z&bKi4dv)ja3)ll?WY3n{9I4vfW4cF_X30CIPvgvvVlFpvKX|~M=P)Vfz&g)iNwFLJ
zMG5Wkm8|T`Vr`kb7hDe)(;nVeJcm2DFI<D_gnRJD#4Vi7+t_>V=9=_^aLpNO={u)i
z6<e{c4kPDm`v5!^M$YNOf==CgaZ;ZuPwK8A_k?@s#>B_Mm4a_d9a1N*A>OYL?`~6d
zdRnM+F8DHcRk*V*#GA%_#x|bU^vGGgYe>8}Tui-L#WmaUuk{eGKl5ez3Dz3x{D3**
z)mwpeZ_OyqS<LG&=L+|B@86-jR@XzEGg@Ca_8I4e`{D(M+#q*Y_qdT8dukZDNk1z$
z688YVVdaKO({Z70$PJ~-@IH8w8!E;A8Z-74=O*n->C7W1H>`iY9$H*qe`#G2fBNiA
zG4t?ywwbjv%FNmyX#Afzs}GT<N5&qbF(MjAy|ko!1$&MzO_osaVL_osN=0P_(by#$
zjd%PwxRQC9W|ou+qtE*#*sqxT+L>=7cJs_<Z#K<ftZ`mLm4d74qu@{dM5}5~aK*kq
z_>F*zUhr?fanWA{+kM#Hf~^>~a@bVv*R-OpH81*0X_aHFo%vv9O`&m58TBR=3VFQL
zJ5oX+zl?nVvHgf0*bw^wV*3$0up#yV#P%b$1F@?IY5coM8qXk4H&G8^tU1V!N|LRA
zEKauGDoVEEyP~24wu7jg&t)6VoJh9*1MRz5vwK*(t0K6174usVv|8QKy;9Q=tN$4D
zTiCvE@%4eKm8y=|%C3$R^`n?ysyu!1@IYWC&~aiV01gKS{3~l`em~7~<h8cuiMEjk
z0qH|N@J%xoh5Y4Lv8T|_TkWkKc%`;(Z!9+8ZScMli}gF`cz3L=Gu9nF*@v&S`04gj
z3GL`RiS|A%(XAcpY>%~R?XmW*_IRRiL)V%p)l&85f-!Omf0LnX{wy@U)fm9O;F5>$
zH0`v@lzF)I4Ohs&P5KwY2PD3~K>XiGJXipKDDm0?_)a8%OF{k;$NuAzH^0jguP=b#
zmbhGpJb$mW6pG&=@k0Lmt;{88JoEXjNW2igTKO@3rGWq6BrexkMl(C}bF9d0&zm9-
ze}jKep~!X0!!so&o`AqmGMZ`Q=Ue<E3gtCZ<oQoYJY(W{{%2Ja@E5?Z0QVAAzXu5_
zEmNj!co!1XTP~Zj;SwZ*w~U#x;YX0*TfHCS`yh8gf?SK=lnvV;zXp4ml%<f^3N5NB
z8<x-+dda+$Q&J|S3`-f5(kmrNxddm!U&<*dlTwDI3`*&hlB8V1dZ1Y{FXfb!Nh!lp
z2Bq{$31Rqcv7L2~y0_S--B<9ODu1thi|umxyLgV3U&3#=>)`3e09N@34L5A-ST`;`
zjH6n=Q-{s|MOMC|=)?GjvVTPOzc2M8QeTt$J5smeonRdavVONVgA2af$l7n?Qr~H?
z!9t)v<JNukbJ$t8_TN6KTl>u?b?bbbknybZge5Tit@AM?ahQ#A4^Lw}*tz^g;?{lf
z1#FC4=l71pt^1}@>SczvkrOkXJa6wy+`1ntq;B1J-;(pS?#J75Jl<zqej)o?_Yd2d
zk9FUCDDgJ4JAGg3)_uJMJ4{c?H=%6Q>o(z}ZFsr$bibXWcg9cW=xv?Qd;0O3+vxSY
zS7J=lv}678sHW9y80&dn&l=&?x~K2mJ)7UWF%5sr<PhuYg|2ClzCM0qpGw4Y(60X2
zW?k#+io|lQr`n>=u*TbuV<GT2P~sSAZS6<<kDD4OL=x>eVf*@zYJHJoxyJVXXsoj>
zr(b8^>BweVq%F4(CZuh)fnZJ?jF+Qhn(R}zyFSm1ua|yIVeg(C1fBW2<{5|_z5vQA
q_0%t>)gT=;dq_53rC6e?XSb!I0ktk?;OFd!nkc@)YU&KfTKYE~p~&d~

literal 0
HcmV?d00001

diff --git a/reference/src/cbits/monolith.c b/reference/src/cbits/monolith.c
new file mode 100644
index 0000000..808520e
--- /dev/null
+++ b/reference/src/cbits/monolith.c
@@ -0,0 +1,238 @@
+
+#include <assert.h>
+
+#include "goldilocks.h"
+#include "monolith.h"
+
+//==============================================================================
+// *** Monolith hash ***
+//
+// compatible with <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo>
+//
+
+/* 
+monolith test vector (permutation of [0..11]) 
+---------------------------------------------
+from <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo/-/blob/master/plain_impls/src/monolith_64/monolith_64.rs?ref_type=heads#L653>
+
+0x516dd661e959f541 = 5867581605548782913
+0x082c137169707901 = 588867029099903233
+0x53dff3fd9f0a5beb = 6043817495575026667
+0x0b2ebaa261590650 = 805786589926590032
+0x89aadb57e2969cb6 = 9919982299747097782
+0x5d3d6905970259bd = 6718641691835914685
+0x6e5ac1a4c0cfa0fe = 7951881005429661950
+0xd674b7736abfc5ce = 15453177927755089358
+0x0d8697e1cd9a235f = 974633365445157727
+0x85fc4017c247136e = 9654662171963364206
+0x572bafd76e511424 = 6281307445101925412
+0xbec1638e28eae57f = 13745376999934453119
+
+*/
+
+//--------------------------------------
+// ** sbox layer
+
+// based on the reference implementation from 
+// <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo>
+uint64_t goldilocks_monolith_single_bar(uint64_t x) {
+
+  //  uint64_t y1 = ((x & 0x8080808080808080) >> 7) | ((x & 0x7F7F7F7F7F7F7F7F) << 1); 
+  //  uint64_t y2 = ((x & 0xC0C0C0C0C0C0C0C0) >> 6) | ((x & 0x3F3F3F3F3F3F3F3F) << 2); 
+  //  uint64_t y3 = ((x & 0xE0E0E0E0E0E0E0E0) >> 5) | ((x & 0x1F1F1F1F1F1F1F1F) << 3); 
+  //  uint64_t z  = x ^ ((~y1) & y2 & y3);
+  //  uint64_t r  = ((z  & 0x8080808080808080) >> 7) | ((z  & 0x7F7F7F7F7F7F7F7F) << 1);
+
+  const uint64_t mask80 = 0x8080808080808080;
+  const uint64_t mask7F = ~mask80;
+  uint64_t y1 = ((x  & mask80) >> 7) | ((x  & mask7F) << 1); 
+  uint64_t y2 = ((y1 & mask80) >> 7) | ((y1 & mask7F) << 1); 
+  uint64_t y3 = ((y2 & mask80) >> 7) | ((y2 & mask7F) << 1); 
+  uint64_t z  = x ^ ((~y1) & y2 & y3);
+  uint64_t r  = ((z  & mask80) >> 7) | ((z  & mask7F) << 1);
+  return r;
+}
+
+// the sbox-layer (note: it's only applied to the first 4 field elements!)
+void goldilocks_monolith_bars(uint64_t *state) {
+  for(int j=0; j<4; j++) { state[j] = goldilocks_monolith_single_bar(state[j]); }
+}
+
+//--------------------------------------
+// ** nonlinear layer
+
+// the nonlinear layer
+//
+// remark: since the next layer is always the linear diffusion, it's enough
+// to reduce to 64 bit, don't have to reduce to [0..p-1]. 
+// As in the linear layer we split into two 32 bit words anyway.
+void goldilocks_monolith_bricks(uint64_t *state) {
+  for(int i=11; i>0; i--) state[i] = goldilocks_sqr_add_to_uint64( state[i-1] , state[i] );
+}
+
+//--------------------------------------
+// ** fast diffusion layer
+
+#include "monolith_conv_uint64.inc"
+
+// we split the input to low and high 32 bit words
+// do circular convolution on them, which safe because there is no overflow in 64 bit words
+// but should be much faster as there are no modulo operations just 64-bit machine word ops
+// then reconstruct and reduce at the end
+void goldilocks_monolith_concrete(uint64_t *state) {
+  uint64_t lo[12];
+  uint64_t hi[12];
+ 
+  for(int i=0; i<12; i++) { 
+    uint64_t x = state[i];
+    lo[i] = x & 0xffffffff;
+    hi[i] = x >> 32;
+  }
+
+  uint64_circular_conv_12_with( lo , lo );
+  uint64_circular_conv_12_with( hi , hi );
+
+  for(int i=0; i<12; i++) {
+    __uint128_t x = (((__uint128_t)hi[i]) << 32) + lo[i];
+    state[i] = goldilocks_rdc_small(x);
+  }
+}
+
+void goldilocks_monolith_concrete_rc(uint64_t *state, const uint64_t *rc) {
+  uint64_t lo[12];
+  uint64_t hi[12];
+ 
+  for(int i=0; i<12; i++) { 
+    uint64_t x = state[i];
+    lo[i] = x & 0xffffffff;
+    hi[i] = x >> 32;
+  }
+
+  uint64_circular_conv_12_with( lo , lo );
+  uint64_circular_conv_12_with( hi , hi );
+
+  for(int i=0; i<12; i++) {
+    __uint128_t x = (((__uint128_t)hi[i]) << 32) + lo[i] + rc[i];
+    state[i] = goldilocks_rdc_small(x);
+  }
+}
+
+//--------------------------------------
+// ** rounds
+
+#include "monolith_constants.inc"
+
+void goldilocks_monolith_round(int round_idx, uint64_t *state) {
+  goldilocks_monolith_bars       (state);
+  goldilocks_monolith_bricks     (state);
+  goldilocks_monolith_concrete_rc(state , &(monolith_t12_round_constants[round_idx][0]) );
+}
+
+void goldilocks_monolith_permutation(uint64_t *state) {
+  // initial layer
+  goldilocks_monolith_concrete(state);
+  // five rounds with RC
+  for(int r=0; r<5; r++) {
+    goldilocks_monolith_round(r, state);
+  }
+  // last round, no RC
+  goldilocks_monolith_bars    (state);
+  goldilocks_monolith_bricks  (state);
+  goldilocks_monolith_concrete(state);
+}
+
+//------------------------------------------------------------------------------
+
+// compression function: input is two 4-element vector of field elements, 
+// and the output is a vector of 4 field elements
+void goldilocks_monolith_keyed_compress(const uint64_t *x, const uint64_t *y, uint64_t key, uint64_t *out) {
+  uint64_t state[12];
+  for(int i=0; i<4; i++) {
+    state[i  ] = x[i];
+    state[i+4] = y[i];
+    state[i+8] = 0;
+  }
+  state[8] = key;
+  goldilocks_monolith_permutation(state);
+  for(int i=0; i<4; i++) {
+    out[i] = state[i];
+  }
+}
+
+void goldilocks_monolith_compress(const uint64_t *x, const uint64_t *y, uint64_t *out) {
+  goldilocks_monolith_keyed_compress(x, y, 0, out);
+}
+
+//------------------------------------------------------------------------------
+
+// hash a sequence of field elements into a digest of 4 field elements
+void goldilocks_monolith_felts_digest(int rate, int N, const uint64_t *input, uint64_t *hash) {
+
+  assert( (rate >= 1) && (rate <= 8) );
+
+  uint64_t domsep = rate + 256*12 + 65536*63;
+  uint64_t state[12];
+  for(int i=0; i<12; i++) state[i] = 0;
+  state[8] = domsep;
+
+  int nchunks = (N + rate) / rate;       // 10* padding
+  const uint64_t *ptr = input;
+  for(int k=0; k<nchunks-1; k++) {
+    for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , ptr[j] ); }
+    goldilocks_monolith_permutation( state );
+    ptr += rate;
+  }
+
+  int rem = nchunks*rate - N;       // 0 < rem <= rate
+  int ofs = rate - rem; 
+
+  // the last block, with padding
+  uint64_t last[8];
+  for(int i=0    ; i<ofs ; i++) last[i] = ptr[i];
+  for(int i=ofs+1; i<rate; i++) last[i] = 0;
+  last[ofs] = 0x01;
+  for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , last[j] ); }
+  goldilocks_monolith_permutation( state );
+
+  for(int j=0; j<4; j++) { hash[j] = state[j]; }
+}
+
+//--------------------------------------
+
+void goldilocks_monolith_bytes_digest(int rate, int N, const uint8_t *input, uint64_t *hash) {
+
+  assert( (rate == 4) || (rate == 8) );
+
+  uint64_t domsep = rate + 256*12 + 65536*8;
+  uint64_t state[12];
+  for(int i=0; i<12; i++) state[i] = 0;
+  state[8] = domsep;
+
+  uint64_t felts[8];
+
+  int rate_in_bytes  = 31 * (rate>>2);                   // 31 or 62
+  int nchunks = (N + rate_in_bytes) / rate_in_bytes;     // 10* padding
+  const uint8_t *ptr = input;
+  for(int k=0; k<nchunks-1; k++) {
+    goldilocks_convert_bytes_to_field_elements(rate, ptr, felts);
+    for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , felts[j] ); }
+    goldilocks_monolith_permutation( state );
+    ptr += rate_in_bytes;
+  }
+
+  int rem = nchunks*rate_in_bytes - N;       // 0 < rem <= rate_in_bytes 
+  int ofs = rate_in_bytes - rem; 
+  uint8_t last[62];
+
+  // last block, with padding
+  for(int i=0    ; i<ofs          ; i++) last[i] = ptr[i];
+  for(int i=ofs+1; i<rate_in_bytes; i++) last[i] = 0;
+  last[ofs] = 0x01;
+  goldilocks_convert_bytes_to_field_elements(rate, last, felts);
+  for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] ,felts[j] ); }
+  goldilocks_monolith_permutation( state );
+
+  for(int j=0; j<4; j++) { hash[j] = state[j]; }
+}
+
+//------------------------------------------------------------------------------
diff --git a/reference/src/cbits/monolith.h b/reference/src/cbits/monolith.h
new file mode 100644
index 0000000..d8d905e
--- /dev/null
+++ b/reference/src/cbits/monolith.h
@@ -0,0 +1,12 @@
+
+#include <stdint.h>
+
+//------------------------------------------------------------------------------
+
+void goldilocks_monolith_permutation   (uint64_t *state);
+void goldilocks_monolith_keyed_compress(const uint64_t *x, const uint64_t *y, uint64_t key, uint64_t *out);
+void goldilocks_monolith_compress      (const uint64_t *x, const uint64_t *y,               uint64_t *out);
+void goldilocks_monolith_bytes_digest  (int rate, int N, const uint8_t  *input, uint64_t *hash);
+void goldilocks_monolith_felts_digest  (int rate, int N, const uint64_t *input, uint64_t *hash);
+
+//------------------------------------------------------------------------------
diff --git a/reference/src/cbits/monolith.o b/reference/src/cbits/monolith.o
new file mode 100644
index 0000000000000000000000000000000000000000..9603d741f332fa786b649be480ff0a6b974e9c5d
GIT binary patch
literal 9648
zcmds7eOOf0x?g+mf!Q;B4e}u%2L>ILQ9(#LQ9%yiS3=^)WZhG?`BV`Efx#?5MkJ$#
zo-5|L*Dl^8m6k(t^Q4`!V}(aMT2qu^9y<AW$FxGROv4;JAosUsuR$2#`kd$fclX2E
zzxBSq_kGveYrlJ~wfFG(%^z=h6T%T3^yGmO!sDbx!4Zm5?_{0gh*R5mS(QkAaJXb8
z>a-}R5{D$2@;8{|x-1*#K5D!zyK39V$u$~}hIF6<byTGwW(uxDl5$Im%Wnsiq~Y;)
zusC+s_b&no5nRrrAmWvq$_k2CG0r{S2KdusvkYW^Fsm?Nmp`ba{ad>(JJ%#t6hB!|
zoF_>ca|TIT36iBY=9q|ar2PwtAcTb;7V<92p~#ox17j2%u_zjp6Y$##q)ycC(MUp~
zft^t(2<(OWVibDNIxVydS`-oGpFPCB`5odBvQ6cv>Ck_z+$M6W7FFN84n1`0-kOeu
z&^-v9x_@5BLg=PLR|wr2PTdmCML#ccY83K`s(-%IM+7f|*K%S@n;@Mg3PUTec&cR5
z1(K;SreuEfgEF%={Cu%O@pQ>N$j8F>6wgEbS=7HreJX6@P*+BrFOGx$D(Z0&dy223
zK5ltR=G=(V;@a?>MXBLa78$~|#WXg7jUoKiVm@LO^ph8*ghv%mggy%T6zGw2t+X#a
znIBmDD>-X_8fpARAT1=?(3>4==*5_A&x$3zSzG2<o%Ly`d$Rfzs!x;k6RDonBd9)A
z)`w9&s|QnkimV?;^{nno^#)n5rg~QQB)$EWDb}9MzwD)b6m<rxS~T>E7b`pTC<{@h
zbE2h?Q(Fo#witKYYEHCG(-P}yPGyT0N$*fUuj?B-W}rNTQXeGRM9#-n5TvqLsjZIs
zc-zDv^$5Q=h<TfLwO3ONN;=AOJ}OI#kJ_S-hL4YEac_s_!DZgO&8OPCDHf&OL$tK3
z&^MK6TMz{u`nOl5vWXt*5x(hYt9#(ewHdgs23*%=!P};f_iEB3=0e~?AZ=Gfo#vCP
z?Oz}H0OQ12BNe37OG%6(F@JecAk(HjRkLlnmNZ@#)HWZ){t5RMb5>x<hG>WAK^j|8
z+C7MWB&X^&co1WxNJ?u7F>2uNhY|)`e^#eD9p_`2jJ|y@c(oiL{$ATrDtz0*4z!aA
zzxVw(?BJKG3ClXm9mY<R<M^o>Ysmx3rmd+#O@B3z#&|V3xr_7fPPsK;^yB?3KMU#>
z3GFujW59Tl)1>P*taD68j6KIx6Xtg2JKjuEG-*8H=Sh~#<}}^hz<}|~fr0TonJLy^
zA7ME`483}kX!L0TA-$pkGKy1HKt{gkQJk_XAD9oGXWLL8r#!^RDKj6}_=<7J!Gttx
zI@<G1O0jC@%&~sU`CA?&UX~n=M^342GkHH-H+X;VRI7r_wLU}qEw6FjmQRR>We@SR
z#q$bV19I;L_#NXk7EdBrjuVBY4&1z+jU5M2e0g8nIznuO_qF5%^R}ab9+r<tfbDza
zZh&B3+EX3S^m~*yJ^gHdRe0MP)ZP{&?r#rr=WWi*@-|N_*M%Tk>%)Of#2DCA2s}m1
zOM|Kdo8I>Ix5aM^Y>M~wwY5WbJxf1yRd#rW1vD+=RF-mHX(_*1)3GN|VOzy3Z3Vo_
zR)>CFQE4m<9vWLKa!-SKkv?|@qOkEqY1vE!TOLta?C7_i6Kq$ID_6j`@;;XH9B;|(
zOYNY3@w~>OBtEtuAjhMP1YTv?ek-+O4X?HxAZp~jm!$!ACwxR(G4E|_?@#TRw3t|9
z!#Qg^(O5+AHt@xq!a_Ov94zEKY#KZ-8pMe;@YY4d&*`V<{B3&J#OegAl_M5fZ{Pwf
z4Zs;Xh4ne&<>VJ|m=B=SYd!EB-OjIpqr%CaOdrgRzSjQDf^%-fTC!E-Fb|>Z-5lX?
z&P&yXZDM~`$8_MOH$AO-)Vp5CbE-?Rs)un(#&O_M-8hkPJh(JBPQ^F@+#EMf%{T?P
zxlT^*hnJQ1N$!h>)&45icn*cm<Mv6U@f-@RcPS0MD%f4R$IxroB)$>FMZV{p>v_`K
z<;A%B-8jt6U4D$4?Z#ma?+RdCnj43?zH2<==DTrN6S^iaF5Qj8+R+usxJ)+=YfRT9
z#w~N>uoiVqW!xif+(dBG8TXhQHwD~G#z}76G;p&Rm+Qu1t=zl$35N<fqDOwvGMf{-
zU&fr;U%3dm^^g@g)#zORG4}5H)gy2%bp8GRoMQ`+V|wJ+^E~z!oO;ZB8?^IuvvbS6
zK|8gZom(Cb+WETKx#i@b-8eTpxBMKm3wE<}%hf@<FgH85ydAWQaI<sE;X%8JZgy_@
zJZLw?&CV^i2koZ0*}3KUpdH;84!v*xWX=!YYjQ0;$ag#P-Z?J`0Zx?jpERnx%}X~;
z)Hm6ockKmX7dSiy_N#PGW4<oL>&({eJZYrzIU#UVn@zQxlON506EBR-)hfp3c+W98
z_v;u)<FURdvA&4beu2mOB8+(U>3f1*Nm{i|<XQ)6Jgnc=D6Dotp1<u%Lu-~NX&rms
zS%7(Gz7=iVo_9vH%ipnGa)FfC1-|wF)~@1YUx}CX<|=QiL-N5s&KG_2!}V)${ffrn
z^IjO%R<a~=GKs(FPmFYIeiitZ>&O+FGh$n&zsSY;%l%TIUjho>+LcCnqw)F|;?ov(
z1AE|5B`N*UlNfJatM2eb?jYBk>)vTT%yKU{0?v;0;|QIP<h8MPmnSK0!#>rHwc`k#
zf2e*c>D{G<J=J5~I6~(es-F(MFDW(jLytA%2%TT3ekSzeNU6CGdaM^m=zK!;v!D+~
zeC&JeSaXli`Ge~3fj$iJsUGX?5jtN`{r%8KApTA0vDO};^8?k-CcU3wKP=V+PO$P~
zdLkF-6^ZL96o^rUz4|5ON8}_@I*BL7bS~Id!*N%Sa^T9Vw{?Rn+(a5bttE{=3aS>f
zM@?(3&tLXsdCYFj@u}U{8bz*VW4}F{3+c9F?V5hIy5rL|g^rAZ=?*>K8K^!Nyh=&@
zXLEstYQ&}O89vive-@N3s>l)%K3tTXqtV!V;(eqs`s+jo9oy-!$31Msy=-j8xNq8G
zaL{XN-a;HVJmt1s_sOnIW%>QP5$kFS;`CQ8Mh;~nmlh$X9zt$qSo=!Sk?)GZe5bJo
zj(Q;X{K1cdT>$I@;TMG53&!5gqmky8Gp~m2??N4s8d92tG1xVclx8Cj&nrpqMLbLO
zeClBSX*s@C%W<t*4|B48syR72iuBfR<d>-mCM>&`6VuNr$)RY0^9u9%I_$a-I8CBC
ziTe_cae2&(EcqpkYy1|)j(O^w5bT_*ZlDc^5Y%GF_++#3v15Eflx%#!)9X6Thg#}!
zA9e{73i|}H<z?jIf|}3?KPe4|K1SR3aiLzcJqzCN)X0mA>7-FYUgRopUs+xVs7nG@
znuF`4Hu&BWxNz0e+qz+UJ5SUj^8`6X^F-ppy3M#A$*cNWGw#Wcc(+@KJhNf%e;G0G
zzGG>{yHVCNhCcH%q!0P^$CsI38``w3S(7-uU|Qlr<Q9#!8Qg=;+zKtc1pX3Y7>>}~
z@^$7G2RnN0UC`?>hN_S6OFW0VYYZvp$}>8)Js2zVka09)9N%{385>9Y)VAa<)Hx3z
z_Qd#5h&lCNM&w+SGv}5gcOFI#8IeoNkW)*Mb4#$d^B9(M{goeLUUT-9e!6PpkXune
ze6HG>3(N^JFQt9GlU;47YzJHN`PyyY1y6qS@cQ);?S-#@wCIiHfBs<deDi;vf9!Wr
zpGa-7&0F5scfpi%&M)!U(KG$~&F!~p{`Md1t4>r!SNnhbgyWtU=4Nn@)xL256MtL$
zuJ>=6-+5ng{Jkab%nB%}JheIH>c+|~Z=FwDza%i~?6i|fmx5x97hin(YHr8;uI1r9
zCE=+*>w;gI@qqQm^D9@sy>n{Csrr9ReE!9Pobc2d&h$+CzVbugthsObA9otc&Q;Gb
z<nAgfd1>LzKPCSABad&}GVg%><3Ge!ykAh);q`}<Z#t*EAKlfS^4`7Q6sZ2yFZc9z
zpPB#l@I}L<*w|<9PJVc1^nri75H@i1#*<f`IGp#!m21~~>iEs%zPIMz+@jNc`AUfI
z{=b*Mo*1G@*5yAF+`r}Y{DixkLX2O&^}zcd?G4u#*Wdcn{tH(hI==DHy7T0<U(RMe
z(w!L-P!fLn`Qu&3FYn&@eadd{-g7x$g#{&L?YTG5qsla6am;reOSqTDU8=2}Tr*?Y
zS1YC_*m9omeCvzr#^*@=QocC(b7ex+PiMb+Pu#&P|6cyu@iU44-CY?I->`b=wO6(E
zCmnyjv0&-WQ?+R?ZMYHsdBd!C6UV2gHSAq=|E3*ji*FqYaGagF{Hd<StFOHE;;NOW
zxU&<_K3k~zI^n}<*Y>pcd~#F4ulV6)bo{E4qP&8llH9^_X>CbyNl}4mwX`z7$W$)n
z6|Bm~2Vs{sHkU+~Wt;M~4<u{j_1Y;@w6d6#tWDID(Zb|BWy&86BO50tYZLU^sw(CS
z5x&EYELj@%t(K~oo-~ZXUzYV`HxPNRVB#Qbg_0hLGK`3S;tss?Loj^f#_*405tGK?
zix_VhgWtz^x-bb^S8K8{J@3XtOpgQ^n6)g>GyNt_kFj`fHX;sO=V<X2*sRba3z0_~
zzdUS~=+QF<|0w+cj$;nWX#P7He_{+i&zpJ^vH@i@|1)>sjq>MvQicBEQ~ikh|An73
zJTZ;I&tboBU@u3H5&lJY;J1R;5)xAn#5d23IvJDAKm^OEk}-J$5JR0|k}-KTkosi-
z@mo>G<1!{c3Z#BU8Iu<Saf>rDWK1>yaVarLz$l<Ma3Uc^qKqpp`x6oXeJ4-{JPe!+
zYz9sO?#7}^`K>_8R{<&ChjF5D95Sx>6iDNr0CK<`jNi`qIv|a+4oKrH0Mht7ePy|h
zVG%<k!z6|}hTaT&a35)$Ge8=51CaK|B;$(3K-!-;rk}y|;XrENgFK=3Kg+nH6G-jf
zW%`3mzZ*#HtC)TR(`Ny32{DU-_={P96H+(E0K^c*BryF9rq?lj2-9nrUS#?{IHMZV
z1Eg^-<3!`wnZA?hzhL@fK>S{n(I#W^VIcKC2*h8^KAdR(b^vJ{GmzR>$(XzWNbT1F
z@fTyniC%vKkd8~7j4O1&DbV++Wjw`jAHzC^k26eQ=*{o~J~~nV!wk1GEMm9-7y>?t
zp&9cBZNEy!6<I*qF9XvjF?}46#tmfXz{5o4GYs1p?q-O+sFN>ZxR_xAL+oFje4nR`
z7l5>1oeaZ)v|k}IuBhS&nF@X#!#swMGR$CTV3@!#nqfFY4MPRPX5RVyc$yiqdDhdw
zWGzF&&<;QRdA2cZW@u)Z#n8Y|%aAa%6XwsbnW33s7DEF=Ekghe;i(V~YuYr&Gzwvd
z_fGF-Z-uZw_+aq6!3v>IWA?A}SMq(q=J86tZTzwE3gKYryP=0e6@nwAFT@Po7q&m_
zV3<PqIkYF#0l6p45!MHJd*qJDosqDM?29yO6+*qPLAO<><j?5rIG@sW;{2)Z3!LjF
z*H1<pIy25aI)|<ulb<g-<~pimS>c+u_b}NtPtS&-FLbWonJeK-<(Js+&gIk|WjB*w
zVD=v}IgjynCa<M3?v3kr@VSh4#aB>SfgtR6=(%Xm4<!pKdPq#ZEQ_QNNO{+~Brx8!
z&PAXt%DdL(r<v?phkwKTUF%*H^LMSI?=ac5?q)K7*E(Ixc-9^A9gR2S`qa1{+P-Vw
zLGzNzu6;y0<6Zlz8BBKVqdsHy)Q=tqne5spH8I(>FG6T%eAhl`0*mL`*WmXQnwNve
zPi}G&;tl0rapTvx@rMv;sC}oK{r7J6M5-t#HvKk1D$XugpD*PWl;u_wWtY+anpjVy
z(YC+CcF{$WB(1C{&XuItJKByKZP;e|4$+3$4vj`Tn_p&<WCK%)w6Y+-C{M~S%3qsb
zY$_+WUja<<cS&U>75G03=f8zyJK52CPX8;TEWaGSqp^kgPvz4P|C>!tcG(!-Ib{WS
zFCj%zEdG^b^ajfDA0|cl5@HZ3=jr^i68vL~lwDp<ud>WkJi61m{IazbrfgF|$#8?R
zyj-b#ZFW%+4N`8(&MlO3R~JevvkRbMw}04;moQ3C;*DwaW-xNOW%;K3;n7xAWS8ZQ
z?%*8oVN`bB7^be+Qd#cpuHo*M6d^fjm!#C+jz!!(7LhQl?efwx3GGWr6ZzV)30u2v
z)}S-x*5~KmR+Lxd3|G)}8`7a!&H#4}25_VAVmCS#6*Ls@661&U%Jn)r>?Jie7jLn#
RV;By78eE37fjH+}@;_8%pLPHM

literal 0
HcmV?d00001

diff --git a/reference/src/cbits/monolith_constants.inc b/reference/src/cbits/monolith_constants.inc
new file mode 100644
index 0000000..9c3e7c5
--- /dev/null
+++ b/reference/src/cbits/monolith_constants.inc
@@ -0,0 +1,71 @@
+
+#include <stdint.h>
+
+const uint64_t monolith_t12_round_constants[5][12] = 
+  { { 0xbcaf2516e5926dcf
+    , 0x4ec5a76bce1e7676
+    , 0x9d804725bebb56ab
+    , 0x2ec05fca215a5be3
+    , 0xe16274e4acab86a0
+    , 0x80b0fddcc3c4380f
+    , 0xc87c769ad77ffece
+    , 0x37f85ec9117d287c
+    , 0x3b8d825b014c458d
+    , 0xb7a01d0cb850d75e
+    , 0x1333b751bac704bd
+    , 0x7b7ef14183d47b6f
+    }
+  , { 0x2114517643e3b286
+    , 0x542d15ea3cd12ade
+    , 0xe847d363f17a93e9
+    , 0x24f0421c6ff41c56
+    , 0x66e3eda93e2ca216
+    , 0xfb88d475279cb568
+    , 0x7f421c6269938a22
+    , 0xdbb973acce857401
+    , 0xe172409cb1563a6a
+    , 0x996f729f6340447d
+    , 0x925c579738b6fa4a
+    , 0x752e9ec9e0b34686
+    }
+  , { 0xdb419e0bd38469bd
+    , 0xba41cee828bd26d8
+    , 0xd6630f8f0969db39
+    , 0x2340e955ae2f0d94
+    , 0x282f553d35872e2e
+    , 0x77f7c3ff1ae496b3
+    , 0xf5f2efab64bc5eef
+    , 0x47b23a00830284f4
+    , 0xe18a2d2242486fa
+    , 0x3d101838a773dab0
+    , 0x47d686fd16856524
+    , 0x3eb2d254189b3534
+    }
+  , { 0xfe886e291ca8c5bd
+    , 0xb97ec74df1e4b0b6
+    , 0x574fdef3a600e370
+    , 0x8ad61c6f132d4feb
+    , 0x41e69ca4ecc7e8c7
+    , 0x151ad562e1f90ca4
+    , 0x747c051439a5603c
+    , 0x990151d3e52d502c
+    , 0x532c7f258282ea12
+    , 0x65e62cb34275dd5
+    , 0x5288008954f5d0b2
+    , 0xee7c3407cf3d6e02
+    }
+  , { 0xda07029808bad5de
+    , 0x7bebdf38dcc7a673
+    , 0x20a3f252688c312d
+    , 0x9c5248f7bbf8d188
+    , 0xcf1cf778994382d4
+    , 0x8c434b1738b8338c
+    , 0xfe504398813b67a8
+    , 0xe879562fdef813b9
+    , 0xd4666793b2a2f191
+    , 0xd9096b87de22de01
+    , 0xcaf4cea5f22abf34
+    , 0x3128d1e75d0204fa
+    }
+  };
+
diff --git a/reference/src/cbits/monolith_conv_uint64.inc b/reference/src/cbits/monolith_conv_uint64.inc
new file mode 100644
index 0000000..2a688bd
--- /dev/null
+++ b/reference/src/cbits/monolith_conv_uint64.inc
@@ -0,0 +1,267 @@
+
+//
+// circular convolution with the vector [7,8,21,22,6,7,9,10,13,26,8,23] algorithms in uint64_t
+// the idea is that we can split field elements into (lo + 2^32*hi)
+// apply the convolution separately (it won't overflow)
+// then combine and reduce
+//
+// based on the book:
+//
+// Nussbaumer: "Fast Fourier Transform and Convolution Algorithms"
+//
+
+/*
+
+our coefficient vectors:
+
+  [7,8,21,22,6,7,9,10,13,26,8,23]
+
+in CRT rectangle format:
+
+  +----------+
+  |  7  6 13 |
+  |  26 8  7 | 
+  |  9  8 21 |
+  | 22 10 23 |
+  +----------+
+
+*/
+
+#include <stdint.h>
+
+//------------------------------------------------------------------------------
+
+// convolves with:  b2 = { 64 , 32 , 64 };
+//   tgt[0] = 64*x + 64*y + 32*z
+//   tgt[1] = 32*x + 64*y + 64*z
+//   tgt[2] = 64*x + 32*y + 64*z
+void uint64_convolve_with_B2(uint64_t *src, uint64_t *tgt) {
+  uint64_t x = src[0];
+  uint64_t y = src[1];
+  uint64_t z = src[2];
+
+  uint64_t x32 = x << 5;
+  uint64_t y32 = y << 5;
+  uint64_t z32 = z << 5;
+
+  uint64_t s64 = (x32 + y32 + z32) << 1;
+
+  tgt[0] = s64 - z32;
+  tgt[1] = s64 - x32;
+  tgt[2] = s64 - y32;
+}
+
+
+// convolves with:  b3 = { -32 , -4 ,   4 };
+//   tgt[0] = -32*x +  4*y -  4*z
+//   tgt[1] =  -4*x - 32*y + 64*z
+//   tgt[2] =   4*x -  4*y - 32*z
+void uint64_convolve_with_B3(uint64_t *src, uint64_t *tgt) {
+  uint64_t x = src[0];
+  uint64_t y = src[1];
+  uint64_t z = src[2];
+
+  uint64_t x4 = x << 2;
+  uint64_t y4 = y << 2;
+  uint64_t z4 = z << 2;
+
+  uint64_t x32 = x4 << 3;
+  uint64_t y32 = y4 << 3;
+  uint64_t z32 = z4 << 3;
+
+  tgt[0] = - x32 + y4  - z4;
+  tgt[1] = - x4  - y32 + z4; 
+  tgt[2] =   x4  - y4  - z32;
+}
+
+// convolves with:  b4 = { -6 , 0 , 8 };
+//   tgt[0] = - 6*x + 8*y 
+//   tgt[1] =       - 6*y + 8*z
+//   tgt[2] =   8*x       - 6*z
+void uint64_convolve_with_B4(uint64_t *src, uint64_t *tgt) {
+  uint64_t x = src[0];
+  uint64_t y = src[1];
+  uint64_t z = src[2];
+
+  uint64_t x8 = x << 3;
+  uint64_t y8 = y << 3;
+  uint64_t z8 = z << 3;
+
+  uint64_t x6 = x8 - (x + x);
+  uint64_t y6 = y8 - (y + y);
+  uint64_t z6 = z8 - (z + z);
+
+  tgt[0] = - x6 + y8;
+  tgt[1] = - y6 + z8;
+  tgt[2] = - z6 + x8;
+}
+
+// convolves with:  b5 = {   2 , -4 , -24 };
+//   tgt[0] =   2*x - 24*y -  4*z 
+//   tgt[1] =  -4*x +  2*y - 24*z
+//   tgt[2] = -24*x -  4*y +  2*z
+void uint64_convolve_with_B5(uint64_t *src, uint64_t *tgt) {
+  uint64_t x = src[0];
+  uint64_t y = src[1];
+  uint64_t z = src[2];
+
+  uint64_t x2 = x << 1;
+  uint64_t y2 = y << 1;
+  uint64_t z2 = z << 1;
+
+  uint64_t x4 = x2 << 1;
+  uint64_t y4 = y2 << 1;
+  uint64_t z4 = z2 << 1;
+
+  uint64_t x24 = x4*6; // (x4 + x4 + x4) << 1;
+  uint64_t y24 = y4*6; // (y4 + y4 + y4) << 1;
+  uint64_t z24 = z4*6; // (z4 + z4 + z4) << 1;
+
+  tgt[0] =   x2  - y24 - z4 ;
+  tgt[1] = - x4  + y2  - z24;
+  tgt[2] = - x24 - y4  + z2 ;
+}
+
+// convolves with:  b6 = {  -2 , -2 ,  -8 };
+//   tgt[0] = - ( 2*x + 8*y + 2*z ) 
+//   tgt[1] = - ( 2*x + 2*y + 8*z )
+//   tgt[2] = - ( 8*x + 2*y + 2*z )
+void uint64_convolve_with_B6(uint64_t *src, uint64_t *tgt) {
+  uint64_t x = src[0];
+  uint64_t y = src[1];
+  uint64_t z = src[2];
+
+  uint64_t x3 = (x << 2) - x ;
+  uint64_t y3 = (y << 2) - y ;
+  uint64_t z3 = (z << 2) - z ;
+
+  uint64_t s = x + y + z;
+
+  tgt[0] = - ( (s + y3) << 1 );
+  tgt[1] = - ( (s + z3) << 1 );
+  tgt[2] = - ( (s + x3) << 1 );
+}
+
+//------------------------------------------------------------------------------
+
+void uint64_naive_circular_conv( int n, uint64_t *input, uint64_t *coeffs, uint64_t *output ) {
+  for(int k=0; k<n; k++) {
+    uint64_t acc = 0;
+    for(int j=0; j<n; j++) {
+      acc += input[j] * coeffs[ (k+n-j)%n ];
+    }
+    output[k] = acc;
+  }  
+}
+
+//------------------------------------------------------------------------------
+
+void uint64_add_vec3(uint64_t *xs, uint64_t *ys, uint64_t *zs) {
+  for(int i=0; i<3; i++) zs[i] = xs[i] + ys[i];  
+}
+
+void uint64_sub_vec3(uint64_t *xs, uint64_t *ys, uint64_t *zs) {
+  for(int i=0; i<3; i++) zs[i] = xs[i] - ys[i];  
+}
+
+//------------------------------------------------------------------------------
+
+// cyclic convolution of 12 terms via the Agarwal-Cooley algorithm
+// with the fixed vector [7,8,21,22,6,7,9,10,13,26,8,23]
+//
+void uint64_circular_conv_12_with( uint64_t *input , uint64_t *output ) {
+
+  uint64_t input_rect[4][3];       // first index is the outer, second the inner
+
+  for(int k=0; k<12; k++) {
+    input_rect[k%4][k%3] = input [k];
+  }
+
+  uint64_t *input_ptr = (uint64_t*) input_rect;
+
+  uint64_t *x0 = input_ptr    ;
+  uint64_t *x1 = input_ptr + 3;
+  uint64_t *x2 = input_ptr + 6;
+  uint64_t *x3 = input_ptr + 9;
+
+  uint64_t a0[3], a1[3], a2[3], a3[3], a4[3], a5[3], a6[3];
+  for(int j=0; j<3; j++)  {
+    a0[j] = x0[j] + x2[j]; 
+    a1[j] = x1[j] + x3[j]; 
+    a2[j] = a0[j] + a1[j]; 
+    a3[j] = a0[j] - a1[j]; 
+    a4[j] = x0[j] - x2[j]; 
+    a5[j] = x1[j] - x3[j]; 
+    a6[j] = a4[j] + a5[j]; 
+  }
+
+  uint64_t m0[3], m1[3], m2[3], m3[3], m4[3];
+  uint64_convolve_with_B2( a2 , m0 );            // uint64_naive_circular_conv( 3 , a2 , b2 , m0 );
+  uint64_convolve_with_B3( a3 , m1 );            // uint64_naive_circular_conv( 3 , a3 , b3 , m1 );
+  uint64_convolve_with_B4( a4 , m2 );            // uint64_naive_circular_conv( 3 , a4 , b4 , m2 );
+  uint64_convolve_with_B5( a5 , m3 );            // uint64_naive_circular_conv( 3 , a5 , b5 , m3 );
+  uint64_convolve_with_B6( a6 , m4 );            // uint64_naive_circular_conv( 3 , a6 , b6 , m4 );
+
+  uint64_t u0[3], u1[3], u2[3], u3[3];
+  uint64_add_vec3( m0 , m1 , u0 );
+  uint64_sub_vec3( m0 , m1 , u1 );
+  uint64_sub_vec3( m4 , m3 , u2 );
+  uint64_sub_vec3( m4 , m2 , u3 );
+
+  for(int i=0; i<3; i++) {
+    x0[i] = ( u0[i] + 2*u2[i] ) >> 2;
+    x1[i] = ( u1[i] + 2*u3[i] ) >> 2;
+    x2[i] = ( u0[i] - 2*u2[i] ) >> 2;
+    x3[i] = ( u1[i] - 2*u3[i] ) >> 2;
+  }
+ 
+  for(int k=0; k<12; k++) {
+    output[k] = input_rect[k%4][k%3];
+  }
+}
+
+//------------------------------------------------------------------------------
+
+/*
+
+void uint64_test_short_conv_with() {
+  
+  printf("test short convolution algos for uint64\n");
+  
+  uint64_t input    [12];
+  uint64_t coeffs   [12] = {7,8,21,22,6,7,9,10,13,26,8,23};
+  uint64_t output   [12];
+  uint64_t reference[12];
+
+  // generate some "random-looking" numbers
+  uint64_t a=123459;
+  uint64_t b=789013;
+  for(int i=0;i<12;i++) {
+    uint64_t c = (a*b) ^ (a - 12345);
+    uint64_t d = (c*a) ^ (b + 67891);
+    input [i] = c & 0x0fffffff;             // WE WANT NO OVERFLOW!
+    a = b   + c       + 1;
+    b = 3*a - 5*c + d - 3;
+  }
+
+  for(int i=0; i<12; i++) {
+    printf("x[%d] = %016llx  ;  h[%d] = %016llx\n" , i, input[i], i, coeffs[i] );
+  }
+
+  // -----------[ length = 12 ]----------- 
+
+  printf("\n");
+  printf("length = 12\n");
+
+  uint64_naive_circular_conv   ( 12, input, coeffs, reference );
+  uint64_circular_conv_12_with (     input,         output    );
+
+  for(int i=0; i<12; i++) {
+    printf("out[%d] = %016llx  ;  ref[%d] = %016llx\n" , i, output[i], i, reference[i] );
+  }
+}
+
+*/
+
+//------------------------------------------------------------------------------
+