fast C implementation for Goldilocks field + tests for field implementations

This commit is contained in:
Balazs Komuves 2025-10-14 18:09:30 +02:00
parent bf75c153b2
commit 58756dd824
No known key found for this signature in database
GPG Key ID: F63B7AEF18435562
17 changed files with 1532 additions and 4 deletions

View File

@ -0,0 +1,55 @@
module Field.Class where
--------------------------------------------------------------------------------
import Data.Proxy
import System.Random
import qualified Field.Goldilocks as Goldi
import qualified Field.Goldilocks.Extension as GoldiExt
--------------------------------------------------------------------------------
class (Show a, Eq a, Num a, Fractional a) => Field a where
fieldSize :: Proxy a -> Integer
zero :: a
one :: a
isZero :: a -> Bool
isOne :: a -> Bool
square :: a -> a
power :: a -> Integer -> a
power_ :: a -> Int -> a
rndIO :: IO a
inverse :: Field a => a -> a
inverse = recip
--------------------------------------------------------------------------------
instance Field Goldi.F where
fieldSize _ = Goldi.goldilocksPrime
zero = Goldi.zero
one = Goldi.one
isZero = Goldi.isZero
isOne = Goldi.isOne
square = Goldi.sqr
power = Goldi.pow
power_ = Goldi.pow_
rndIO = randomIO
--------------------------------------------------------------------------------
instance Field GoldiExt.FExt where
fieldSize _ = (Goldi.goldilocksPrime ^ 2)
zero = GoldiExt.zero
one = GoldiExt.one
isZero = GoldiExt.isZero
isOne = GoldiExt.isOne
square = GoldiExt.sqr
power = GoldiExt.pow
power_ = GoldiExt.pow_
rndIO = randomIO
--------------------------------------------------------------------------------

View File

@ -1,8 +1,14 @@
module Field.Goldilocks
( module Field.Goldilocks.Slow
)
where
{-# LANGUAGE CPP #-}
#ifdef USE_NAIVE_HASKELL
module Field.Goldilocks ( module Field.Goldilocks.Slow ) where
import Field.Goldilocks.Slow
#else
module Field.Goldilocks ( module Field.Goldilocks.Fast ) where
import Field.Goldilocks.Fast
#endif

View File

@ -17,6 +17,7 @@ import System.Random
import Data.Binary
import Field.Goldilocks ( F )
import qualified Field.Goldilocks as Goldi
--------------------------------------------------------------------------------
@ -58,6 +59,17 @@ instance Random F2 where
--------------------------------------------------------------------------------
zero, one, two :: F2
zero = F2 Goldi.zero Goldi.zero
one = F2 Goldi.one Goldi.zero
two = F2 Goldi.two Goldi.zero
isZero, isOne :: F2 -> Bool
isZero (F2 r i) = Goldi.isZero r && Goldi.isZero i
isOne (F2 r i) = Goldi.isOne r && Goldi.isZero i
--------------------------------------------------------------------------------
inj :: F -> F2
inj r = F2 r 0

View File

@ -0,0 +1,153 @@
-- | Bindings to a C implementation of the Goldilocks prime field
{-# LANGUAGE ForeignFunctionInterface, BangPatterns, NumericUnderscores #-}
module Field.Goldilocks.Fast where
--------------------------------------------------------------------------------
import Prelude hiding ( div )
import qualified Prelude
import Data.Bits
import Data.Word
import Data.Ratio
import Foreign.C
import System.Random
import Data.Binary
import Data.Binary.Get ( getWord64le )
import Data.Binary.Put ( putWord64le )
import Text.Printf
--------------------------------------------------------------------------------
type F = Goldilocks
fromF :: F -> Word64
fromF (MkGoldilocks x) = x
unsafeToF :: Word64 -> F
unsafeToF = MkGoldilocks
toF :: Word64 -> F
toF = mkGoldilocks . fromIntegral
intToF :: Int -> F
intToF = mkGoldilocks . fromIntegral
instance Binary F where
put x = putWord64le (fromF x)
get = toF <$> getWord64le
--------------------------------------------------------------------------------
newtype Goldilocks
= MkGoldilocks Word64
deriving Eq
instance Show Goldilocks where
show (MkGoldilocks k) = printf "0x%016x" k
zero, one, two :: Goldilocks
zero = MkGoldilocks 0
one = MkGoldilocks 1
two = MkGoldilocks 2
isZero, isOne :: Goldilocks -> Bool
isZero (MkGoldilocks x) = x == 0
isOne (MkGoldilocks x) = x == 1
--------------------------------------------------------------------------------
instance Num Goldilocks where
fromInteger = mkGoldilocks
negate = neg
(+) = add
(-) = sub
(*) = mul
abs = id
signum _ = MkGoldilocks 1
instance Fractional Goldilocks where
fromRational y = fromInteger (numerator y) `div` fromInteger (denominator y)
recip = inv
(/) = div
instance Random Goldilocks where
-- random :: RandomGen g => g -> (a, g)
random g = let (x,g') = randomR (0,goldilocksPrimeWord64-1) g in (MkGoldilocks x, g')
randomR = error "randomR/Goldilocks: doesn't make much sense"
--------------------------------------------------------------------------------
-- | @p = 2^64 - 2^32 + 1@
goldilocksPrime :: Integer
goldilocksPrime = 0x_ffff_ffff_0000_0001
goldilocksPrimeWord64 :: Word64
goldilocksPrimeWord64 = 0x_ffff_ffff_0000_0001
modp :: Integer -> Integer
modp a = mod a goldilocksPrime
mkGoldilocks :: Integer -> Goldilocks
mkGoldilocks = MkGoldilocks . fromInteger . modp
-- | A fixed generator of the multiplicative subgroup of the field
theMultiplicativeGenerator :: Goldilocks
theMultiplicativeGenerator = mkGoldilocks 7
--------------------------------------------------------------------------------
foreign import ccall unsafe "goldilocks_neg" c_goldilocks_neg :: Word64 -> Word64
foreign import ccall unsafe "goldilocks_add" c_goldilocks_add :: Word64 -> Word64 -> Word64
foreign import ccall unsafe "goldilocks_sub" c_goldilocks_sub :: Word64 -> Word64 -> Word64
foreign import ccall unsafe "goldilocks_sqr" c_goldilocks_sqr :: Word64 -> Word64
foreign import ccall unsafe "goldilocks_mul" c_goldilocks_mul :: Word64 -> Word64 -> Word64
foreign import ccall unsafe "goldilocks_inv" c_goldilocks_inv :: Word64 -> Word64
foreign import ccall unsafe "goldilocks_div" c_goldilocks_div :: Word64 -> Word64 -> Word64
foreign import ccall unsafe "goldilocks_pow" c_goldilocks_pow :: Word64 -> CInt -> Word64
neg :: Goldilocks -> Goldilocks
neg (MkGoldilocks k) = MkGoldilocks (c_goldilocks_neg k)
add :: Goldilocks -> Goldilocks -> Goldilocks
add (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_add a b)
sub :: Goldilocks -> Goldilocks -> Goldilocks
sub (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_sub a b)
sqr :: Goldilocks -> Goldilocks
sqr (MkGoldilocks a) = MkGoldilocks (c_goldilocks_sqr a)
mul :: Goldilocks -> Goldilocks -> Goldilocks
mul (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_mul a b)
inv :: Goldilocks -> Goldilocks
inv (MkGoldilocks a) = MkGoldilocks (c_goldilocks_inv a)
div :: Goldilocks -> Goldilocks -> Goldilocks
div (MkGoldilocks a) (MkGoldilocks b) = MkGoldilocks (c_goldilocks_div a b)
--------------------------------------------------------------------------------
pow_ :: Goldilocks -> Int -> Goldilocks
pow_ (MkGoldilocks x) e = MkGoldilocks $ c_goldilocks_pow x (fromIntegral e :: CInt)
pow :: Goldilocks -> Integer -> Goldilocks
pow x e
| e == 0 = 1
| e < 0 = pow (inv x) (negate e)
| otherwise = go 1 x e
where
go !acc _ 0 = acc
go !acc !s !expo = case expo .&. 1 of
0 -> go acc (sqr s) (shiftR expo 1)
_ -> go (acc*s) (sqr s) (shiftR expo 1)
--------------------------------------------------------------------------------

View File

@ -47,6 +47,15 @@ newtype Goldilocks
instance Show Goldilocks where
show (MkGoldilocks k) = printf "0x%016x" k
zero, one, two :: Goldilocks
zero = MkGoldilocks 0
one = MkGoldilocks 1
two = MkGoldilocks 2
isZero, isOne :: Goldilocks -> Bool
isZero (MkGoldilocks x) = x == 0
isOne (MkGoldilocks x) = x == 1
--------------------------------------------------------------------------------
instance Num Goldilocks where

View File

@ -0,0 +1,321 @@
-- | Property tests for rings and fields
{-# LANGUAGE ScopedTypeVariables, Rank2Types, TypeApplications, FlexibleInstances, ConstraintKinds #-}
module Field.Properties where
--------------------------------------------------------------------------------
import Data.Proxy
import Data.IORef
import Control.Monad
import System.IO
import System.Random
import Field.Class
--------------------------------------------------------------------------------
-- compatibility hacks
type Ring a = Field a
--------------------------------------------------------------------------------
runFieldTests :: forall a. Field a => IORef Bool -> Int -> Proxy a -> IO ()
runFieldTests okflag n pxy = do
runRingTests okflag n pxy
runFieldOnlyTests okflag n pxy
runRingTests :: forall a. Ring a => IORef Bool -> Int -> Proxy a -> IO ()
runRingTests okflag n pxy = do
forM_ ringProps $ \prop -> case prop of
RingProp1 test name -> doTests okflag n name $ do
x <- rndIO @a
return (test x)
RingProp2 test name -> doTests okflag n name $ do
x <- rndIO @a
y <- rndIO @a
return (test x y)
RingProp3 test name -> doTests okflag n name $ do
x <- rndIO @a
y <- rndIO @a
z <- rndIO @a
return (test x y z)
runFieldOnlyTests :: forall a. Field a => IORef Bool -> Int -> Proxy a -> IO ()
runFieldOnlyTests okflag n pxy = do
forM_ fieldOnlyProps $ \prop -> case prop of
FieldProp1 test name -> doTests okflag n name $ do
x <- rndIO @a
return (test x)
FieldProp2 test name -> doTests okflag n name $ do
x <- rndIO @a
y <- rndIO @a
return (test x y)
FieldProp3 test name -> doTests okflag n name $ do
x <- rndIO @a
y <- rndIO @a
z <- rndIO @a
return (test x y z)
FieldPropE test name -> doTests okflag n name $ do
x <- rndIO @a
e <- randomRIO (-1000,1000::Int)
return (test x e)
--------------------------------------------------------------------------------
doTests :: IORef Bool -> Int -> String -> IO Bool -> IO Bool
doTests okflag n name testAction =
do
let str = " - " ++ name ++ "... "
putStr $ str ++ replicate (30 - length str) ' '
hFlush stdout
oks <- forM [1..n] $ \i -> testAction
let ok = and oks
case ok of
True -> putStrLn $ "ok (passed " ++ show n ++ " tests)"
False -> do
writeIORef okflag False
putStrLn $ "FAILED!! (FAILED " ++ show (countFalses oks) ++ " tests!)"
return ok
where
countFalses :: [Bool] -> Int
countFalses = length . filter (==False)
--------------------------------------------------------------------------------
data RingProp
= RingProp1 (forall a. Ring a => a -> Bool ) String
| RingProp2 (forall a. Ring a => a -> a -> Bool ) String
| RingProp3 (forall a. Ring a => a -> a -> a -> Bool) String
data FieldProp
= FieldProp1 (forall a. Field a => a -> Bool ) String
| FieldProp2 (forall a. Field a => a -> a -> Bool ) String
| FieldProp3 (forall a. Field a => a -> a -> a -> Bool) String
| FieldPropE (forall a. Field a => a -> Int -> Bool ) String
--------------------------------------------------------------------------------
ringProps :: [RingProp]
ringProps =
[ RingProp1 prop_add_left_unit "add left unit"
, RingProp1 prop_add_right_unit "add right unit"
, RingProp1 prop_add_left_inv "add left inv"
, RingProp1 prop_add_right_inv "add right inv"
, RingProp2 prop_add_commutative "add comm"
, RingProp3 prop_add_associative "add assoc"
, RingProp2 prop_sub_def "sub def"
, RingProp3 prop_add_sub_associative_1 "add-sub assoc /1"
, RingProp3 prop_add_sub_associative_2 "add-sub assoc /2"
, RingProp3 prop_add_sub_associative_3 "add-sub assoc /3"
, RingProp1 prop_is_zero "is zero"
, RingProp1 prop_is_one "is one"
, RingProp1 prop_is_equal "is equal"
, RingProp1 prop_mul_left_unit "mul left unit"
, RingProp1 prop_mul_right_unit "mul right unit"
, RingProp2 prop_mul_commutative "mul comm"
, RingProp3 prop_mul_associative "mul assoc"
, RingProp1 prop_square_def "square def"
, RingProp2 prop_square_distrib "square distributive"
, RingProp3 prop_add_mul_left_distributive "add+mul left distr"
, RingProp3 prop_add_mul_right_distributive "add+mul right distr"
, RingProp3 prop_sub_mul_left_distributive "sub+mul left distr"
, RingProp3 prop_sub_mul_right_distributive "sub+mul right distr"
, RingProp1 prop_power_0 "0-th power"
, RingProp1 prop_power_1 "1-th power"
, RingProp1 prop_power_2 "2-th power"
, RingProp1 prop_power_3 "3-th power"
, RingProp1 prop_power_4 "4-th power"
, RingProp1 prop_power_5 "5-th power"
]
fieldOnlyProps :: [FieldProp]
fieldOnlyProps =
[ FieldProp1 prop_mul_left_inv "mul left inf"
, FieldProp1 prop_mul_right_inv "mul right inf"
, FieldProp2 prop_div_def "div def"
, FieldProp1 prop_inv_def "inv def"
, FieldProp2 prop_div_test "div defining prop."
, FieldProp1 prop_inv_fermat "inv == fermat"
, FieldProp1 prop_fermat_1 "fermat/1"
, FieldProp1 prop_fermat_2 "fermat/2"
, FieldPropE prop_power_vs_power_ "power vs. power_"
, FieldProp1 prop_power_neg "negative power"
, FieldProp3 prop_mul_div_associative_1 "mul-div assoc /1"
, FieldProp3 prop_mul_div_associative_2 "mul-div assoc /2"
, FieldProp3 prop_mul_div_associative_3 "mul-div assoc /3"
-- , FieldProp3 prop_batch_inverse "batch inverse"
-- , FieldProp1 prop_frobenius "frobenius == frobeniusNaive"
]
--------------------------------------------------------------------------------
-- * Ring properties
prop_add_left_unit :: Ring a => a -> Bool
prop_add_left_unit x = zero + x == x
prop_add_right_unit :: Ring a => a -> Bool
prop_add_right_unit x = x + zero == x
prop_add_left_inv :: Ring a => a -> Bool
prop_add_left_inv x = (negate x) + x == zero
prop_add_right_inv :: Ring a => a -> Bool
prop_add_right_inv x = x + (negate x) == zero
prop_add_commutative :: Ring a => a -> a -> Bool
prop_add_commutative x y = (x + y == y + x)
prop_add_associative :: Ring a => a -> a -> a -> Bool
prop_add_associative x y z = ((x + y) + z) == (x + (y + z))
prop_sub_def :: Ring a => a -> a -> Bool
prop_sub_def x y = (x + (negate y) == x - y)
prop_add_sub_associative_1 :: Ring a => a -> a -> a -> Bool
prop_add_sub_associative_1 x y z = ((x + y) - z) == (x + (y - z))
prop_add_sub_associative_2 :: Ring a => a -> a -> a -> Bool
prop_add_sub_associative_2 x y z = ((x - y) + z) == (x - (y - z))
prop_add_sub_associative_3 :: Ring a => a -> a -> a -> Bool
prop_add_sub_associative_3 x y z = ((x - y) - z) == (x - (y + z))
----------------------------------------
prop_is_zero :: forall a. Ring a => a -> Bool
prop_is_zero x = isZero (zero @a) && isZero x == (x == 0)
prop_is_one :: forall a. Ring a => a -> Bool
prop_is_one x = isOne (one @a) && isOne x == (x == 1)
prop_is_equal :: forall a. Ring a => a -> Bool
prop_is_equal x = and
[ zero == zero @a
, zero /= one @a
, one /= zero @a
, one == one @a
, x == x
, (x+1) /= x
, x /= (x+1)
]
----------------------------------------
prop_mul_left_unit :: Ring a => a -> Bool
prop_mul_left_unit x = (one * x == x)
prop_mul_right_unit :: Ring a => a -> Bool
prop_mul_right_unit x = (x * one == x)
prop_mul_commutative :: Ring a => a -> a -> Bool
prop_mul_commutative x y = (x * y == y * x)
prop_mul_associative :: Ring a => a -> a -> a -> Bool
prop_mul_associative x y z = ((x * y) * z) == (x * (y * z))
prop_square_def :: Ring a => a -> Bool
prop_square_def x = (square x == x*x)
prop_square_distrib :: Ring a => a -> a -> Bool
prop_square_distrib x y = (square (x+y) == square x + 2*x*y + square y)
&& (square (x-y) == square x - 2*x*y + square y)
----------------------------------------
prop_add_mul_left_distributive :: Ring a => a -> a -> a -> Bool
prop_add_mul_left_distributive x y z = (x + y) * z == x*z + y*z
prop_add_mul_right_distributive :: Ring a => a -> a -> a -> Bool
prop_add_mul_right_distributive x y z = x * (y + z) == x*y + x*z
prop_sub_mul_left_distributive :: Ring a => a -> a -> a -> Bool
prop_sub_mul_left_distributive x y z = (x - y) * z == x*z - y*z
prop_sub_mul_right_distributive :: Ring a => a -> a -> a -> Bool
prop_sub_mul_right_distributive x y z = x * (y - z) == x*y - x*z
--------------------------------------------------------------------------------
prop_power_0 :: Ring a => a -> Bool
prop_power_0 x = power x 0 == (if x == 0 then zero else one)
prop_power_1 :: Ring a => a -> Bool
prop_power_1 x = power x 1 == x
prop_power_2 :: Ring a => a -> Bool
prop_power_2 x = power x 2 == x *x
prop_power_3 :: Ring a => a -> Bool
prop_power_3 x = power x 3 == x*x*x
prop_power_4 :: Ring a => a -> Bool
prop_power_4 x = power x 4 == x*x*x*x
prop_power_5 :: Ring a => a -> Bool
prop_power_5 x = power x 5 == x*x*x*x*x
--------------------------------------------------------------------------------
-- * Field properties
prop_mul_left_inv :: Field a => a -> Bool
prop_mul_left_inv x = isZero x || (inverse x) * x == one
prop_mul_right_inv :: Field a => a -> Bool
prop_mul_right_inv x = isZero x || x * (inverse x) == one
prop_div_def :: Field a => a -> a -> Bool
prop_div_def x y = (x * (inverse y) == x / y)
prop_inv_def :: Field a => a -> Bool
prop_inv_def x = (inverse x == 1 / x)
prop_div_test :: Field a => a -> a -> Bool
prop_div_test x y = isZero y || (x/y)*y == x
prop_inv_fermat :: forall a. Field a => a -> Bool
prop_inv_fermat x = (inverse x) == power x (p - 2) where p = fieldSize (Proxy @a)
prop_fermat_1 :: forall a. Field a => a -> Bool
prop_fermat_1 x = power x p == x where p = fieldSize (Proxy @a)
prop_fermat_2 :: forall a. Field a => a -> Bool
prop_fermat_2 x = power x (p - 1) == one where p = fieldSize (Proxy @a)
prop_power_vs_power_ :: forall a. Field a => a -> Int -> Bool
prop_power_vs_power_ x e = power x (fromIntegral e) == power_ x e
prop_power_neg :: forall a. Field a => a -> Bool
prop_power_neg x = power x (-1) == inverse x
prop_mul_div_associative_1 :: Field a => a -> a -> a -> Bool
prop_mul_div_associative_1 x y z = ((x * y) / z) == (x * (y / z))
prop_mul_div_associative_2 :: Field a => a -> a -> a -> Bool
prop_mul_div_associative_2 x y z = ((x / y) * z) == (x / (y / z))
prop_mul_div_associative_3 :: Field a => a -> a -> a -> Bool
prop_mul_div_associative_3 x y z = ((x / y) / z) == (x / (y * z))
-- prop_batch_inverse :: Field a => a -> a -> a -> Bool
-- prop_batch_inverse x y z = any (==0) as || (map recip as == bs) where
-- as = [ x,y,z, x+y, y+z, z+x, x+y+z ]
-- bs = batchInverse as
-- prop_frobenius :: Field a => a -> Bool
-- prop_frobenius x = (frobenius x == frobeniusNaive x)
--------------------------------------------------------------------------------

View File

@ -0,0 +1,47 @@
module Field.Tests where
--------------------------------------------------------------------------------
import Control.Monad
import Data.Proxy
import Data.IORef
import Field.Class
import Field.Properties
import Field.Goldilocks ( F )
import Field.Goldilocks.Extension ( FExt )
--------------------------------------------------------------------------------
nn = 1000
runMyFieldTests :: IO Bool
runMyFieldTests = do
ok1 <- runGoldilocksTests
ok2 <- runGoldilocksExtensionTests
return (ok1 && ok2)
--------------------------------------------------------------------------------
runGoldilocksTests :: IO Bool
runGoldilocksTests = do
putStrLn "\nTests for the Goldilocks field:"
putStrLn "==============================="
okflag <- newIORef True
runFieldTests okflag nn (Proxy @F)
readIORef okflag
runGoldilocksExtensionTests :: IO Bool
runGoldilocksExtensionTests = do
putStrLn "\nTests for the Goldilocks quadratic extension field:"
putStrLn "==================================================="
okflag <- newIORef True
runFieldTests okflag nn (Proxy @FExt)
readIORef okflag
--------------------------------------------------------------------------------

View File

@ -0,0 +1,44 @@
{-# LANGUAGE StrictData, RecordWildCards #-}
module Outsource.Types where
--------------------------------------------------------------------------------
import FRI.Types
--------------------------------------------------------------------------------
-- | The type parameter is only there, because in the proof, we don't want to
-- repeat the FRI configuration (which is already included in the FRI proof).
--
-- It's a bit ugly, but hey this is just a prototype anyway!
--
data OutsourceConfig' friconfig = MkOutsourceConfig
{ outsrcFriConfig :: friconfig -- ^ the FRI protocol configuration
, outsrcKeepParity :: Log2 -- ^ how much parity data to keep: Original data size times @2^(-k)@
}
deriving (Eq,Show)
type OutsourceConfigFull = OutsourceConfig' FriConfig
type OutsourceConfig_ = OutsourceConfig' ()
-- | The size of the rows (= number of columns in the data matrix)
outSrcNColumns :: OutsourceConfigFull -> Int
outSrcNColumns = friNColumns . outsrcFriConfig
--------------------------------------------------------------------------------
-- | Proof that the outsourcing of Reed-Solomon is done correctly.
--
-- This is checked against the original data Merkle root and RS-encoded Merkle root
data OutsourceProof = MkOutsourceProof
{ outsrcConfig :: OutsourceConfig' () -- ^ we don't want to repeat the FRI configuration...
, outsrcFriProof :: FriProof -- ^ ...which is already included in the FRI proof
, outsrcConnection :: ConnectionProof -- ^ connect the original data to the parity data
}
deriving (Eq,Show)
data ConnectionProof = MkConnectionProof
-- TODO
--------------------------------------------------------------------------------

4
reference/src/cbits/compile.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
gcc -c -O2 goldilocks.c
gcc -c -O2 monolith.c

View File

@ -0,0 +1,250 @@
#include <stdint.h>
#include <stdio.h> // for testing only
#include <assert.h>
#include "goldilocks.h"
//------------------------------------------------------------------------------
#define GOLDILOCKS_HALFPRIME_PLUS1 0x7fffffff80000001
//------------------------------------------------------------------------------
// *** Goldilocks field ***
int goldilocks_isvalid(uint64_t x) {
return (x < GOLDILOCKS_PRIME);
}
uint64_t goldilocks_neg(uint64_t x) {
return (x==0) ? 0 : (GOLDILOCKS_PRIME - x);
}
uint64_t goldilocks_add(uint64_t x, uint64_t y) {
uint64_t z = x + y;
return ( (z >= GOLDILOCKS_PRIME) || (z<x) ) ? (z - GOLDILOCKS_PRIME) : z;
}
uint64_t goldilocks_add_to_uint64(uint64_t x, uint64_t y) {
uint64_t z = x + y;
return (z<x) ? (z - GOLDILOCKS_PRIME) : z;
}
uint64_t goldilocks_sub(uint64_t x, uint64_t y) {
uint64_t z = x - y;
return (z > x) ? (z + GOLDILOCKS_PRIME) : z;
}
uint64_t goldilocks_sub_safe(uint64_t x, uint64_t y) {
return goldilocks_add( x , goldilocks_neg(y) );
}
//--------------------------------------
uint64_t goldilocks_rdc(__uint128_t x) {
// x = n0 + 2^64 * n1 + 2^96 * n2
uint64_t n0 = (uint64_t)x;
uint64_t n1 = (x >> 64) & 0xffffffff;
uint64_t n2 = (x >> 96);
uint64_t mid = (n1 << 32) - n1; // (2^32 - 1) * n1
uint64_t tmp = n0 + mid;
if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
uint64_t res = tmp - n2;
if (res > tmp) { res += GOLDILOCKS_PRIME; }
return (res >= GOLDILOCKS_PRIME) ? (res - GOLDILOCKS_PRIME) : res;
}
// reduce to 64-bit, but it can be still bigger than `p`
uint64_t goldilocks_rdc_to_uint64(__uint128_t x) {
// x = n0 + 2^64 * n1 + 2^96 * n2
uint64_t n0 = (uint64_t)x;
uint64_t n1 = (x >> 64) & 0xffffffff;
uint64_t n2 = (x >> 96);
uint64_t mid = (n1 << 32) - n1; // (2^32 - 1) * n1
uint64_t tmp = n0 + mid;
if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
uint64_t res = tmp - n2;
if (res > tmp) { res += GOLDILOCKS_PRIME; }
return res;
}
// we assume x < 2^96
uint64_t goldilocks_rdc_small(__uint128_t x) {
// x = n0 + 2^64 * n1
uint64_t n0 = (uint64_t)x;
uint64_t n1 = (x >> 64);
uint64_t mid = (n1 << 32) - n1; // (2^32 - 1) * n1
uint64_t tmp = n0 + mid;
if (tmp < n0) { tmp -= GOLDILOCKS_PRIME; }
uint64_t res = tmp;
return (res >= GOLDILOCKS_PRIME) ? (res - GOLDILOCKS_PRIME) : res;
}
//--------------------------------------
uint64_t goldilocks_mul(uint64_t x, uint64_t y) {
__uint128_t z = (__uint128_t)x * (__uint128_t)y;
return goldilocks_rdc(z);
}
uint64_t goldilocks_mul_to_uint64(uint64_t x, uint64_t y) {
__uint128_t z = (__uint128_t)x * (__uint128_t)y;
return goldilocks_rdc_to_uint64(z);
}
uint64_t goldilocks_mul_add128(uint64_t x, uint64_t y, __uint128_t z) {
__uint128_t w = (__uint128_t)x * (__uint128_t)y + z;
return goldilocks_rdc(w);
}
uint64_t goldilocks_sqr(uint64_t x) {
__uint128_t z = (__uint128_t)x * (__uint128_t)x;
return goldilocks_rdc(z);
}
uint64_t goldilocks_sqr_add(uint64_t x, uint64_t y) {
__uint128_t z = (__uint128_t)x * x + y;
return goldilocks_rdc(z);
}
// only reduce to uint64, not to [0..p-1]
uint64_t goldilocks_sqr_add_to_uint64(uint64_t x, uint64_t y) {
__uint128_t z = (__uint128_t)x * x + y;
return goldilocks_rdc_to_uint64(z);
}
uint64_t goldilocks_mul_small(uint64_t x, uint32_t y) {
__uint128_t z = (__uint128_t)x * (__uint128_t)y;
return goldilocks_rdc_small(z);
}
//------------------------------------------------------------------------------
uint64_t goldilocks_euclid(uint64_t x0, uint64_t y0, uint64_t u0, uint64_t v0) {
uint64_t x = x0;
uint64_t y = y0;
uint64_t u = u0;
uint64_t v = v0;
while( ( (u!=1) && (v!=1) ) ) {
while (!(u & 1ull)) {
u = u >> 1;
int odd = x & 1ull;
x = x >> 1;
if (odd) { x += GOLDILOCKS_HALFPRIME_PLUS1; }
}
while (!(v & 1ull)) {
v = v >> 1;
int odd = y & 1ull;
y = y >> 1;
if (odd) { y += GOLDILOCKS_HALFPRIME_PLUS1; }
}
if (u < v) {
// u-v < 0, that is, u < v
v = v - u;
y = goldilocks_sub(y , x);
}
else {
// u-v >= 0, that is, u >= v
u = u - v;
x = goldilocks_sub(x , y);
}
}
if (u == 1) {
return x;
}
else {
return y;
}
}
uint64_t goldilocks_div(uint64_t a, uint64_t b) {
return goldilocks_euclid(a,0,b,GOLDILOCKS_PRIME);
}
uint64_t goldilocks_inv(uint64_t a) {
return goldilocks_div(1, a);
}
//------------------------------------------------------------------------------
uint64_t goldilocks_pow(uint64_t base, int expo) {
if (expo == 0) { return 1; }
if (expo < 0) { return goldilocks_pow( goldilocks_inv(base) , -expo ); }
int e = expo;
uint64_t sq = base;
uint64_t acc = 1;
while (e != 0) {
if ((e & 1) != 0) {
acc = goldilocks_mul( acc, sq );
}
if (e > 0) {
sq = goldilocks_mul( sq , sq );
e = e >> 1;
}
}
return acc;
}
//==============================================================================
// *** debugging ***
void debug_print_state(const char *msg, int n, uint64_t *state) {
printf("-----------------\n");
printf("%s\n",msg);
for(int i=0;i<n;i++) {
printf(" - 0x%016llx = %llu\n",state[i],state[i]);
}
}
//------------------------------------------------------------------------------
#define MASK 0x3fffffffffffffffULL
// NOTE: we assume a little-endian architecture
void goldilocks_convert_31_bytes_to_4_field_elements(const uint8_t *ptr, uint64_t *felts) {
const uint64_t *q0 = (const uint64_t*)(ptr );
const uint64_t *q7 = (const uint64_t*)(ptr+ 7);
const uint64_t *q15 = (const uint64_t*)(ptr+15);
const uint64_t *q23 = (const uint64_t*)(ptr+23);
felts[0] = (q0 [0]) & MASK;
felts[1] = ((q7 [0]) >> 6) | ((uint64_t)(ptr[15] & 0x0f) << 58);
felts[2] = ((q15[0]) >> 4) | ((uint64_t)(ptr[23] & 0x03) << 60);
felts[3] = ((q23[0]) >> 2);
}
void goldilocks_convert_bytes_to_field_elements(int rate, const uint8_t *ptr, uint64_t *felts) {
switch(rate) {
case 4:
goldilocks_convert_31_bytes_to_4_field_elements(ptr, felts);
break;
case 8:
goldilocks_convert_31_bytes_to_4_field_elements(ptr , felts );
goldilocks_convert_31_bytes_to_4_field_elements(ptr+31, felts+4);
break;
default:
assert( 0 );
break;
}
}
//------------------------------------------------------------------------------

View File

@ -0,0 +1,39 @@
#include <stdint.h>
//------------------------------------------------------------------------------
#define GOLDILOCKS_PRIME 0xffffffff00000001
//------------------------------------------------------------------------------
int goldilocks_isvalid(uint64_t x);
uint64_t goldilocks_neg(uint64_t x);
uint64_t goldilocks_add(uint64_t x, uint64_t y);
uint64_t goldilocks_sub(uint64_t x, uint64_t y);
uint64_t goldilocks_sqr(uint64_t x);
uint64_t goldilocks_mul(uint64_t x, uint64_t y);
uint64_t goldilocks_mul_small(uint64_t x, uint32_t y);
uint64_t goldilocks_inv(uint64_t a);
uint64_t goldilocks_div(uint64_t a, uint64_t b);
uint64_t goldilocks_pow(uint64_t b, int e);
//------------------------------------------------------------------------------
uint64_t goldilocks_rdc (__uint128_t x);
uint64_t goldilocks_rdc_to_uint64(__uint128_t x);
uint64_t goldilocks_rdc_small (__uint128_t x);
uint64_t goldilocks_mul_to_uint64 (uint64_t x, uint64_t y);
uint64_t goldilocks_mul_add128 (uint64_t x, uint64_t y, __uint128_t z);
uint64_t goldilocks_sqr_add (uint64_t x, uint64_t y);
uint64_t goldilocks_sqr_add_to_uint64(uint64_t x, uint64_t y);
uint64_t goldilocks_mul_small (uint64_t x, uint32_t y);
//------------------------------------------------------------------------------
void goldilocks_convert_31_bytes_to_4_field_elements ( const uint8_t *ptr, uint64_t *felts );
void goldilocks_convert_bytes_to_field_elements ( int rate, const uint8_t *ptr, uint64_t *felts );
//------------------------------------------------------------------------------

Binary file not shown.

View File

@ -0,0 +1,238 @@
#include <assert.h>
#include "goldilocks.h"
#include "monolith.h"
//==============================================================================
// *** Monolith hash ***
//
// compatible with <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo>
//
/*
monolith test vector (permutation of [0..11])
---------------------------------------------
from <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo/-/blob/master/plain_impls/src/monolith_64/monolith_64.rs?ref_type=heads#L653>
0x516dd661e959f541 = 5867581605548782913
0x082c137169707901 = 588867029099903233
0x53dff3fd9f0a5beb = 6043817495575026667
0x0b2ebaa261590650 = 805786589926590032
0x89aadb57e2969cb6 = 9919982299747097782
0x5d3d6905970259bd = 6718641691835914685
0x6e5ac1a4c0cfa0fe = 7951881005429661950
0xd674b7736abfc5ce = 15453177927755089358
0x0d8697e1cd9a235f = 974633365445157727
0x85fc4017c247136e = 9654662171963364206
0x572bafd76e511424 = 6281307445101925412
0xbec1638e28eae57f = 13745376999934453119
*/
//--------------------------------------
// ** sbox layer
// based on the reference implementation from
// <https://extgit.iaik.tugraz.at/krypto/zkfriendlyhashzoo>
uint64_t goldilocks_monolith_single_bar(uint64_t x) {
// uint64_t y1 = ((x & 0x8080808080808080) >> 7) | ((x & 0x7F7F7F7F7F7F7F7F) << 1);
// uint64_t y2 = ((x & 0xC0C0C0C0C0C0C0C0) >> 6) | ((x & 0x3F3F3F3F3F3F3F3F) << 2);
// uint64_t y3 = ((x & 0xE0E0E0E0E0E0E0E0) >> 5) | ((x & 0x1F1F1F1F1F1F1F1F) << 3);
// uint64_t z = x ^ ((~y1) & y2 & y3);
// uint64_t r = ((z & 0x8080808080808080) >> 7) | ((z & 0x7F7F7F7F7F7F7F7F) << 1);
const uint64_t mask80 = 0x8080808080808080;
const uint64_t mask7F = ~mask80;
uint64_t y1 = ((x & mask80) >> 7) | ((x & mask7F) << 1);
uint64_t y2 = ((y1 & mask80) >> 7) | ((y1 & mask7F) << 1);
uint64_t y3 = ((y2 & mask80) >> 7) | ((y2 & mask7F) << 1);
uint64_t z = x ^ ((~y1) & y2 & y3);
uint64_t r = ((z & mask80) >> 7) | ((z & mask7F) << 1);
return r;
}
// the sbox-layer (note: it's only applied to the first 4 field elements!)
void goldilocks_monolith_bars(uint64_t *state) {
for(int j=0; j<4; j++) { state[j] = goldilocks_monolith_single_bar(state[j]); }
}
//--------------------------------------
// ** nonlinear layer
// the nonlinear layer
//
// remark: since the next layer is always the linear diffusion, it's enough
// to reduce to 64 bit, don't have to reduce to [0..p-1].
// As in the linear layer we split into two 32 bit words anyway.
void goldilocks_monolith_bricks(uint64_t *state) {
for(int i=11; i>0; i--) state[i] = goldilocks_sqr_add_to_uint64( state[i-1] , state[i] );
}
//--------------------------------------
// ** fast diffusion layer
#include "monolith_conv_uint64.inc"
// we split the input to low and high 32 bit words
// do circular convolution on them, which safe because there is no overflow in 64 bit words
// but should be much faster as there are no modulo operations just 64-bit machine word ops
// then reconstruct and reduce at the end
void goldilocks_monolith_concrete(uint64_t *state) {
uint64_t lo[12];
uint64_t hi[12];
for(int i=0; i<12; i++) {
uint64_t x = state[i];
lo[i] = x & 0xffffffff;
hi[i] = x >> 32;
}
uint64_circular_conv_12_with( lo , lo );
uint64_circular_conv_12_with( hi , hi );
for(int i=0; i<12; i++) {
__uint128_t x = (((__uint128_t)hi[i]) << 32) + lo[i];
state[i] = goldilocks_rdc_small(x);
}
}
void goldilocks_monolith_concrete_rc(uint64_t *state, const uint64_t *rc) {
uint64_t lo[12];
uint64_t hi[12];
for(int i=0; i<12; i++) {
uint64_t x = state[i];
lo[i] = x & 0xffffffff;
hi[i] = x >> 32;
}
uint64_circular_conv_12_with( lo , lo );
uint64_circular_conv_12_with( hi , hi );
for(int i=0; i<12; i++) {
__uint128_t x = (((__uint128_t)hi[i]) << 32) + lo[i] + rc[i];
state[i] = goldilocks_rdc_small(x);
}
}
//--------------------------------------
// ** rounds
#include "monolith_constants.inc"
void goldilocks_monolith_round(int round_idx, uint64_t *state) {
goldilocks_monolith_bars (state);
goldilocks_monolith_bricks (state);
goldilocks_monolith_concrete_rc(state , &(monolith_t12_round_constants[round_idx][0]) );
}
void goldilocks_monolith_permutation(uint64_t *state) {
// initial layer
goldilocks_monolith_concrete(state);
// five rounds with RC
for(int r=0; r<5; r++) {
goldilocks_monolith_round(r, state);
}
// last round, no RC
goldilocks_monolith_bars (state);
goldilocks_monolith_bricks (state);
goldilocks_monolith_concrete(state);
}
//------------------------------------------------------------------------------
// compression function: input is two 4-element vector of field elements,
// and the output is a vector of 4 field elements
void goldilocks_monolith_keyed_compress(const uint64_t *x, const uint64_t *y, uint64_t key, uint64_t *out) {
uint64_t state[12];
for(int i=0; i<4; i++) {
state[i ] = x[i];
state[i+4] = y[i];
state[i+8] = 0;
}
state[8] = key;
goldilocks_monolith_permutation(state);
for(int i=0; i<4; i++) {
out[i] = state[i];
}
}
void goldilocks_monolith_compress(const uint64_t *x, const uint64_t *y, uint64_t *out) {
goldilocks_monolith_keyed_compress(x, y, 0, out);
}
//------------------------------------------------------------------------------
// hash a sequence of field elements into a digest of 4 field elements
void goldilocks_monolith_felts_digest(int rate, int N, const uint64_t *input, uint64_t *hash) {
assert( (rate >= 1) && (rate <= 8) );
uint64_t domsep = rate + 256*12 + 65536*63;
uint64_t state[12];
for(int i=0; i<12; i++) state[i] = 0;
state[8] = domsep;
int nchunks = (N + rate) / rate; // 10* padding
const uint64_t *ptr = input;
for(int k=0; k<nchunks-1; k++) {
for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , ptr[j] ); }
goldilocks_monolith_permutation( state );
ptr += rate;
}
int rem = nchunks*rate - N; // 0 < rem <= rate
int ofs = rate - rem;
// the last block, with padding
uint64_t last[8];
for(int i=0 ; i<ofs ; i++) last[i] = ptr[i];
for(int i=ofs+1; i<rate; i++) last[i] = 0;
last[ofs] = 0x01;
for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , last[j] ); }
goldilocks_monolith_permutation( state );
for(int j=0; j<4; j++) { hash[j] = state[j]; }
}
//--------------------------------------
void goldilocks_monolith_bytes_digest(int rate, int N, const uint8_t *input, uint64_t *hash) {
assert( (rate == 4) || (rate == 8) );
uint64_t domsep = rate + 256*12 + 65536*8;
uint64_t state[12];
for(int i=0; i<12; i++) state[i] = 0;
state[8] = domsep;
uint64_t felts[8];
int rate_in_bytes = 31 * (rate>>2); // 31 or 62
int nchunks = (N + rate_in_bytes) / rate_in_bytes; // 10* padding
const uint8_t *ptr = input;
for(int k=0; k<nchunks-1; k++) {
goldilocks_convert_bytes_to_field_elements(rate, ptr, felts);
for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] , felts[j] ); }
goldilocks_monolith_permutation( state );
ptr += rate_in_bytes;
}
int rem = nchunks*rate_in_bytes - N; // 0 < rem <= rate_in_bytes
int ofs = rate_in_bytes - rem;
uint8_t last[62];
// last block, with padding
for(int i=0 ; i<ofs ; i++) last[i] = ptr[i];
for(int i=ofs+1; i<rate_in_bytes; i++) last[i] = 0;
last[ofs] = 0x01;
goldilocks_convert_bytes_to_field_elements(rate, last, felts);
for(int j=0; j<rate; j++) { state[j] = goldilocks_add( state[j] ,felts[j] ); }
goldilocks_monolith_permutation( state );
for(int j=0; j<4; j++) { hash[j] = state[j]; }
}
//------------------------------------------------------------------------------

View File

@ -0,0 +1,12 @@
#include <stdint.h>
//------------------------------------------------------------------------------
void goldilocks_monolith_permutation (uint64_t *state);
void goldilocks_monolith_keyed_compress(const uint64_t *x, const uint64_t *y, uint64_t key, uint64_t *out);
void goldilocks_monolith_compress (const uint64_t *x, const uint64_t *y, uint64_t *out);
void goldilocks_monolith_bytes_digest (int rate, int N, const uint8_t *input, uint64_t *hash);
void goldilocks_monolith_felts_digest (int rate, int N, const uint64_t *input, uint64_t *hash);
//------------------------------------------------------------------------------

Binary file not shown.

View File

@ -0,0 +1,71 @@
#include <stdint.h>
const uint64_t monolith_t12_round_constants[5][12] =
{ { 0xbcaf2516e5926dcf
, 0x4ec5a76bce1e7676
, 0x9d804725bebb56ab
, 0x2ec05fca215a5be3
, 0xe16274e4acab86a0
, 0x80b0fddcc3c4380f
, 0xc87c769ad77ffece
, 0x37f85ec9117d287c
, 0x3b8d825b014c458d
, 0xb7a01d0cb850d75e
, 0x1333b751bac704bd
, 0x7b7ef14183d47b6f
}
, { 0x2114517643e3b286
, 0x542d15ea3cd12ade
, 0xe847d363f17a93e9
, 0x24f0421c6ff41c56
, 0x66e3eda93e2ca216
, 0xfb88d475279cb568
, 0x7f421c6269938a22
, 0xdbb973acce857401
, 0xe172409cb1563a6a
, 0x996f729f6340447d
, 0x925c579738b6fa4a
, 0x752e9ec9e0b34686
}
, { 0xdb419e0bd38469bd
, 0xba41cee828bd26d8
, 0xd6630f8f0969db39
, 0x2340e955ae2f0d94
, 0x282f553d35872e2e
, 0x77f7c3ff1ae496b3
, 0xf5f2efab64bc5eef
, 0x47b23a00830284f4
, 0xe18a2d2242486fa
, 0x3d101838a773dab0
, 0x47d686fd16856524
, 0x3eb2d254189b3534
}
, { 0xfe886e291ca8c5bd
, 0xb97ec74df1e4b0b6
, 0x574fdef3a600e370
, 0x8ad61c6f132d4feb
, 0x41e69ca4ecc7e8c7
, 0x151ad562e1f90ca4
, 0x747c051439a5603c
, 0x990151d3e52d502c
, 0x532c7f258282ea12
, 0x65e62cb34275dd5
, 0x5288008954f5d0b2
, 0xee7c3407cf3d6e02
}
, { 0xda07029808bad5de
, 0x7bebdf38dcc7a673
, 0x20a3f252688c312d
, 0x9c5248f7bbf8d188
, 0xcf1cf778994382d4
, 0x8c434b1738b8338c
, 0xfe504398813b67a8
, 0xe879562fdef813b9
, 0xd4666793b2a2f191
, 0xd9096b87de22de01
, 0xcaf4cea5f22abf34
, 0x3128d1e75d0204fa
}
};

View File

@ -0,0 +1,267 @@
//
// circular convolution with the vector [7,8,21,22,6,7,9,10,13,26,8,23] algorithms in uint64_t
// the idea is that we can split field elements into (lo + 2^32*hi)
// apply the convolution separately (it won't overflow)
// then combine and reduce
//
// based on the book:
//
// Nussbaumer: "Fast Fourier Transform and Convolution Algorithms"
//
/*
our coefficient vectors:
[7,8,21,22,6,7,9,10,13,26,8,23]
in CRT rectangle format:
+----------+
| 7 6 13 |
| 26 8 7 |
| 9 8 21 |
| 22 10 23 |
+----------+
*/
#include <stdint.h>
//------------------------------------------------------------------------------
// convolves with: b2 = { 64 , 32 , 64 };
// tgt[0] = 64*x + 64*y + 32*z
// tgt[1] = 32*x + 64*y + 64*z
// tgt[2] = 64*x + 32*y + 64*z
void uint64_convolve_with_B2(uint64_t *src, uint64_t *tgt) {
uint64_t x = src[0];
uint64_t y = src[1];
uint64_t z = src[2];
uint64_t x32 = x << 5;
uint64_t y32 = y << 5;
uint64_t z32 = z << 5;
uint64_t s64 = (x32 + y32 + z32) << 1;
tgt[0] = s64 - z32;
tgt[1] = s64 - x32;
tgt[2] = s64 - y32;
}
// convolves with: b3 = { -32 , -4 , 4 };
// tgt[0] = -32*x + 4*y - 4*z
// tgt[1] = -4*x - 32*y + 64*z
// tgt[2] = 4*x - 4*y - 32*z
void uint64_convolve_with_B3(uint64_t *src, uint64_t *tgt) {
uint64_t x = src[0];
uint64_t y = src[1];
uint64_t z = src[2];
uint64_t x4 = x << 2;
uint64_t y4 = y << 2;
uint64_t z4 = z << 2;
uint64_t x32 = x4 << 3;
uint64_t y32 = y4 << 3;
uint64_t z32 = z4 << 3;
tgt[0] = - x32 + y4 - z4;
tgt[1] = - x4 - y32 + z4;
tgt[2] = x4 - y4 - z32;
}
// convolves with: b4 = { -6 , 0 , 8 };
// tgt[0] = - 6*x + 8*y
// tgt[1] = - 6*y + 8*z
// tgt[2] = 8*x - 6*z
void uint64_convolve_with_B4(uint64_t *src, uint64_t *tgt) {
uint64_t x = src[0];
uint64_t y = src[1];
uint64_t z = src[2];
uint64_t x8 = x << 3;
uint64_t y8 = y << 3;
uint64_t z8 = z << 3;
uint64_t x6 = x8 - (x + x);
uint64_t y6 = y8 - (y + y);
uint64_t z6 = z8 - (z + z);
tgt[0] = - x6 + y8;
tgt[1] = - y6 + z8;
tgt[2] = - z6 + x8;
}
// convolves with: b5 = { 2 , -4 , -24 };
// tgt[0] = 2*x - 24*y - 4*z
// tgt[1] = -4*x + 2*y - 24*z
// tgt[2] = -24*x - 4*y + 2*z
void uint64_convolve_with_B5(uint64_t *src, uint64_t *tgt) {
uint64_t x = src[0];
uint64_t y = src[1];
uint64_t z = src[2];
uint64_t x2 = x << 1;
uint64_t y2 = y << 1;
uint64_t z2 = z << 1;
uint64_t x4 = x2 << 1;
uint64_t y4 = y2 << 1;
uint64_t z4 = z2 << 1;
uint64_t x24 = x4*6; // (x4 + x4 + x4) << 1;
uint64_t y24 = y4*6; // (y4 + y4 + y4) << 1;
uint64_t z24 = z4*6; // (z4 + z4 + z4) << 1;
tgt[0] = x2 - y24 - z4 ;
tgt[1] = - x4 + y2 - z24;
tgt[2] = - x24 - y4 + z2 ;
}
// convolves with: b6 = { -2 , -2 , -8 };
// tgt[0] = - ( 2*x + 8*y + 2*z )
// tgt[1] = - ( 2*x + 2*y + 8*z )
// tgt[2] = - ( 8*x + 2*y + 2*z )
void uint64_convolve_with_B6(uint64_t *src, uint64_t *tgt) {
uint64_t x = src[0];
uint64_t y = src[1];
uint64_t z = src[2];
uint64_t x3 = (x << 2) - x ;
uint64_t y3 = (y << 2) - y ;
uint64_t z3 = (z << 2) - z ;
uint64_t s = x + y + z;
tgt[0] = - ( (s + y3) << 1 );
tgt[1] = - ( (s + z3) << 1 );
tgt[2] = - ( (s + x3) << 1 );
}
//------------------------------------------------------------------------------
void uint64_naive_circular_conv( int n, uint64_t *input, uint64_t *coeffs, uint64_t *output ) {
for(int k=0; k<n; k++) {
uint64_t acc = 0;
for(int j=0; j<n; j++) {
acc += input[j] * coeffs[ (k+n-j)%n ];
}
output[k] = acc;
}
}
//------------------------------------------------------------------------------
void uint64_add_vec3(uint64_t *xs, uint64_t *ys, uint64_t *zs) {
for(int i=0; i<3; i++) zs[i] = xs[i] + ys[i];
}
void uint64_sub_vec3(uint64_t *xs, uint64_t *ys, uint64_t *zs) {
for(int i=0; i<3; i++) zs[i] = xs[i] - ys[i];
}
//------------------------------------------------------------------------------
// cyclic convolution of 12 terms via the Agarwal-Cooley algorithm
// with the fixed vector [7,8,21,22,6,7,9,10,13,26,8,23]
//
void uint64_circular_conv_12_with( uint64_t *input , uint64_t *output ) {
uint64_t input_rect[4][3]; // first index is the outer, second the inner
for(int k=0; k<12; k++) {
input_rect[k%4][k%3] = input [k];
}
uint64_t *input_ptr = (uint64_t*) input_rect;
uint64_t *x0 = input_ptr ;
uint64_t *x1 = input_ptr + 3;
uint64_t *x2 = input_ptr + 6;
uint64_t *x3 = input_ptr + 9;
uint64_t a0[3], a1[3], a2[3], a3[3], a4[3], a5[3], a6[3];
for(int j=0; j<3; j++) {
a0[j] = x0[j] + x2[j];
a1[j] = x1[j] + x3[j];
a2[j] = a0[j] + a1[j];
a3[j] = a0[j] - a1[j];
a4[j] = x0[j] - x2[j];
a5[j] = x1[j] - x3[j];
a6[j] = a4[j] + a5[j];
}
uint64_t m0[3], m1[3], m2[3], m3[3], m4[3];
uint64_convolve_with_B2( a2 , m0 ); // uint64_naive_circular_conv( 3 , a2 , b2 , m0 );
uint64_convolve_with_B3( a3 , m1 ); // uint64_naive_circular_conv( 3 , a3 , b3 , m1 );
uint64_convolve_with_B4( a4 , m2 ); // uint64_naive_circular_conv( 3 , a4 , b4 , m2 );
uint64_convolve_with_B5( a5 , m3 ); // uint64_naive_circular_conv( 3 , a5 , b5 , m3 );
uint64_convolve_with_B6( a6 , m4 ); // uint64_naive_circular_conv( 3 , a6 , b6 , m4 );
uint64_t u0[3], u1[3], u2[3], u3[3];
uint64_add_vec3( m0 , m1 , u0 );
uint64_sub_vec3( m0 , m1 , u1 );
uint64_sub_vec3( m4 , m3 , u2 );
uint64_sub_vec3( m4 , m2 , u3 );
for(int i=0; i<3; i++) {
x0[i] = ( u0[i] + 2*u2[i] ) >> 2;
x1[i] = ( u1[i] + 2*u3[i] ) >> 2;
x2[i] = ( u0[i] - 2*u2[i] ) >> 2;
x3[i] = ( u1[i] - 2*u3[i] ) >> 2;
}
for(int k=0; k<12; k++) {
output[k] = input_rect[k%4][k%3];
}
}
//------------------------------------------------------------------------------
/*
void uint64_test_short_conv_with() {
printf("test short convolution algos for uint64\n");
uint64_t input [12];
uint64_t coeffs [12] = {7,8,21,22,6,7,9,10,13,26,8,23};
uint64_t output [12];
uint64_t reference[12];
// generate some "random-looking" numbers
uint64_t a=123459;
uint64_t b=789013;
for(int i=0;i<12;i++) {
uint64_t c = (a*b) ^ (a - 12345);
uint64_t d = (c*a) ^ (b + 67891);
input [i] = c & 0x0fffffff; // WE WANT NO OVERFLOW!
a = b + c + 1;
b = 3*a - 5*c + d - 3;
}
for(int i=0; i<12; i++) {
printf("x[%d] = %016llx ; h[%d] = %016llx\n" , i, input[i], i, coeffs[i] );
}
// -----------[ length = 12 ]-----------
printf("\n");
printf("length = 12\n");
uint64_naive_circular_conv ( 12, input, coeffs, reference );
uint64_circular_conv_12_with ( input, output );
for(int i=0; i<12; i++) {
printf("out[%d] = %016llx ; ref[%d] = %016llx\n" , i, output[i], i, reference[i] );
}
}
*/
//------------------------------------------------------------------------------