diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..85e7aa2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.DS_Store
+.ghc.environment*
+*.hi
+*.o
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..32c6f31
--- /dev/null
+++ b/README.md
@@ -0,0 +1,85 @@
+
+Goldilocks field emulation in `circom`
+--------------------------------------
+
+Experimenting with Goldilocks field emulation (the native field being the BN254 scalar field).
+
+The "Goldilocks" prime is `P = 2^64 - 2^32 + 1`, and the corresponding
+prime field is used for example by the [Plonky2](https://github.com/0xPolygonZero/plonky2/)
+proof system, which is our primary motivation here.
+
+We want to do computations in the Goldilocks field in a `circom` circuit 
+instantiated over say the BN254 curve's scalar field (a 254 bit prime field).
+
+
+### Basic approach
+
+We are in a relatively simple situation, as the field we want to emulate is
+much smaller than the native field we are working inside. In particular, 
+even the product of 3 Goldilocks elements fit inside a BN254 element without
+any danger of overflowing.
+
+The critical part is thus that we need range checks for the range `[0..P-1]`.
+
+To start with, we can do a simple bit-decomposition based range check for the range `[0..2^64-1]`.
+
+Then for example we could do another such range check for `x + 2^32 - 1` instead of `x`,
+and that would be sufficient; however, this doubles the number of range checks.
+
+Instead, we can exploit the special form the prime `P`: Observe, that in binary
+decomposition, `P - 1` looks like this:
+
+        P-1  =  2^64 - 2^32  =  0x FFFF FFFF FFFF FFFF 0000 0000 0000 0000 
+
+This means, that if we already know that `x` fits into 64 bits, then it's enough
+to check that IF the top 32 bits are all 1, THEN the bottom 32 bits are all 0
+(if the top bits are anything else, then obviously we are already in the desired range).
+
+This is relatively easy to do in `circom`.
+
+
+### Type safety in `circom`
+
+In recent versions of `circom`, we can use so called "buses" and "tags" to
+enforce invariants. 
+
+For example, we can define a Goldilocks "bus" (read: type);
+a safe casting function from arbitray signals into Goldilocks signal, which checks
+that the input is in the range `[0..P-1]`; and then write all operations such
+that they expect their inputs have Goldilocks type (that is, already checked),
+and they themselves enforce that their output is also reduced modulo P (and
+has the right type to show it).
+
+This approach (which is very standard in normal statically typed programming languages)
+makes using these sub-circuits _much_ safer.
+
+
+### Soundness testing
+
+We have the [`r1cs-solver`](https://github.com/faulhornlabs/r1cs-solver) soundness 
+testing tool at our disposal, which can, for any given concrete inputs, try to solve 
+the equations specialized to those inputs.
+
+In general this has a good chance of succeeding for small circuits, so we
+can actually _prove_ that the circuit behaves as expected, at least for _particular inputs_.
+
+Unfortunately, this tool currently only supports very small prime fields (for
+example, `P' = 65537`). The reason for this is mainly performance: Both performance
+of "normal" operations like addition and multiplication, and probably more importantly,
+we also need efficient square roots (as the tool solves systems of quadratic equations).
+With such a small field, we can simply store a table of square roots in memory. 
+With a ~256 bit field, this would be impossible, and a square root algorithm
+would be most probably way too slow.
+
+What we can still do though, is to make the operations we want to test generic over 
+[Solinas primes](https://en.wikipedia.org/wiki/Solinas_prime)
+of the form `P = 2^a - 2^b + 1` with `a > b > 0`.
+
+For example `2^8 - 2^4 + 1 = 241` is a prime. We can probably do 
+_exhaustive_ testing over this small field, checking the _soundness_ over
+every single possible inputs for the basic operations, which would give us
+a very high confidence that there are no bugs in the implementation.
+
+Furthermore, only inversion and division needs an actual prime modulus; 
+the rest of the operations can be tested with any modulus.
+
diff --git a/circuit/.gitignore b/circuit/.gitignore
new file mode 100644
index 0000000..d163863
--- /dev/null
+++ b/circuit/.gitignore
@@ -0,0 +1 @@
+build/
\ No newline at end of file
diff --git a/circuit/binary_compare.circom b/circuit/binary_compare.circom
new file mode 100644
index 0000000..464a3cc
--- /dev/null
+++ b/circuit/binary_compare.circom
@@ -0,0 +1,98 @@
+
+pragma circom 2.2.0;
+
+//------------------------------------------------------------------------------
+
+//
+// given two numbers in `n`-bit binary decomposition
+// (least significant bit first), we compute
+//
+//             /  -1   if   A <  B
+//   out  :=  {    0   if   A == B
+//             \  +1   if   A >  B
+//
+// NOTE: we don't check that the digits are indeed binary;
+//       that's the responsibility of the caller!
+//
+// This version uses `(3*n-1)` nonlinear constraints.
+// Question: can we do better? (note that this has to work with n >= 254 digits too!)
+//
+
+template BinaryCompare(n) {
+  signal input  A[n];
+  signal input  B[n];
+  signal output out;
+
+  signal eq[n];
+  signal aux[n];
+
+  signal jump[n+1];
+  jump[n] <== 1;
+
+  var sum = 0;
+  for(var k=n-1; k>=0; k--) {
+    var y = A[k] - B[k];
+    eq[k]   <== 1 - y*y;                      // (A[k] == B[k]) ? 1 : 0
+    jump[k] <== eq[k] * jump[k+1];            // this jumps from 1 to 0 at the highest inequal digit
+    aux[k]  <== (jump[k+1] - jump[k]) * y;    // where we store whether A was greater or less
+    sum += aux[k];
+  }
+
+  out <== sum;
+}
+
+//------------------------------------------------------------------------------
+
+//
+// given two numbers in `n`-bit binary decomposition (little-endian), we compute
+//
+//   out  :=  (A <= B) ? 1 : 0
+//
+// NOTE: we don't check that the digits are indeed binary;
+//       that's the responsibility of the caller!
+//
+
+template BinaryLessOrEqual(n) {
+  signal input  A[n];
+  signal input  B[n];
+  signal output out;
+
+  var phalf = 1/2;       // +1/2 as field element
+  var mhalf = -phalf;    // -1/2 as field element
+
+  component cmp = BinaryCompare(n);
+  cmp.A <== A;
+  cmp.B <== B;
+
+  var x = cmp.out;
+  out <== mhalf * (x-1) * (x+2);
+}
+
+//------------------------------------------------------------------------------
+
+//
+// given two numbers in `n`-bit binary decomposition (little-endian), we compute
+//
+//   out  :=  (A >= B) ? 1 : 0
+//
+// NOTE: we don't check that the digits are indeed binary;
+//       that's the responsibility of the caller!
+//
+
+template BinaryGreaterOrEqual(n) {
+  signal input  A[n];
+  signal input  B[n];
+  signal output out;
+
+  var phalf = 1/2;       // +1/2 as field element
+  var mhalf = -phalf;    // -1/2 as field element
+
+  component cmp = BinaryCompare(n);
+  cmp.A <== A;
+  cmp.B <== B;
+
+  var x = cmp.out;
+  out <== mhalf * (x+1) * (x-2);
+}
+
+//------------------------------------------------------------------------------
diff --git a/circuit/field_params.circom b/circuit/field_params.circom
new file mode 100644
index 0000000..8eef699
--- /dev/null
+++ b/circuit/field_params.circom
@@ -0,0 +1,29 @@
+
+//
+// for soundness testing we use the `r1cs-solver` testing framework, which 
+// currently uses the ambient prime 65537 (instead of BN254); with realistic
+// primes it would be too slow (needs fast square root for example) 
+//
+// thus, as the second best option, we want to test the soundness of the field 
+// emulation by testing it on the "tiny-goldilocks" prime `P = 2^8 - 2^4 + 1`
+//
+// hence we try and make all this "parametric" over the Solinas primes `P(a,b) := 2^b - 2^a + 1`
+//
+// unfortunately, circom does not support global constants, so we need
+// to do some hacking to hack around this limitation.
+//
+
+
+pragma circom 2.2.0;
+
+//------------------------------------------------------------------------------
+
+// function SolinasExpoBig()   { return 64; }
+// function SolinasExpoSmall() { return 32; }
+
+function SolinasExpoBig()   { return 8; }
+function SolinasExpoSmall() { return 4; }
+
+function FieldPrime()       { return (2**SolinasExpoBig() - 2**SolinasExpoSmall() + 1); }
+
+//------------------------------------------------------------------------------
diff --git a/circuit/goldilocks.circom b/circuit/goldilocks.circom
new file mode 100644
index 0000000..b65f783
--- /dev/null
+++ b/circuit/goldilocks.circom
@@ -0,0 +1,270 @@
+
+pragma circom 2.2.0;
+
+include "field_params.circom";
+include "goldilocks_func.circom";
+include "misc.circom";
+
+//------------------------------------------------------------------------------
+
+// a type for bit-decomposed numbers
+// the invariant to respect is that the value is in the range [0..2^n-1]
+//
+bus Binary(n) {
+  signal val;
+  signal bits[n];
+}
+
+// the type of Goldilocks field elements.
+// the invariant to respect is that the value is in the range [0..P-1]
+//
+bus Goldilocks() {
+  signal val;
+}
+
+//------------------------------------------------------------------------------
+
+// n-bit binary decomposition
+template ToBinary(n) {
+  signal input  inp;
+  output Binary(n) out;
+
+  component tobits = ToBits(n);
+  tobits.inp <== inp;
+  tobits.out ==> out.bits;
+  inp        ==> out.val;
+}
+
+//------------------------------------------------------------------------------
+
+//
+// do a range check for [0,P-1] where `P = 2^64 - 2^32 + 1`
+//
+
+template ToGoldilocks() {
+  signal input  inp;
+  output Goldilocks() out;
+
+  // first we do a range check and bit decomposition to 64 bits
+
+  var nbits = SolinasExpoBig();
+  Binary(nbits) bin <== ToBinary(nbits)( inp );
+
+  // compute the low and high 32 bit words
+
+  var sum_lo = 0;
+  for(var i=0; i < SolinasExpoSmall(); i++) {
+    sum_lo += (2**i) * bin.bits[i];
+  }
+
+  var expo_jump = SolinasExpoBig() - SolinasExpoSmall();
+
+  var sum_hi = 0;
+  for(var i=0; i < expo_jump; i++) {
+    sum_hi += (2**i) * bin.bits[ SolinasExpoSmall() + i ];
+  }
+
+  // now, since we have p-1 = 2^64 - 2^32 = 0xff...ff00...00
+  //
+  // we need to check that IF hi == 2^32-1 THEN lo == 0
+
+  signal iseq <== IsEqual()( sum_hi , 2**expo_jump - 1 );
+
+  iseq * sum_lo === 0;           // either (hi < 2^32-1) OR (lo == 0)
+
+  out.val <== bin.val;
+}
+
+
+//------------------------------------------------------------------------------
+
+//
+// negation in the Goldilocks field
+//
+
+template Neg() {
+  input  Goldilocks() A;
+  output Goldilocks() C;
+
+  signal isz <== IsZero()( A.val );
+
+  C.val <== (1 - isz) * (FieldPrime() - A.val);
+}
+
+//--------------------------------------
+
+//
+// addition in the Goldilocks field
+//
+
+template Add() {
+  input  Goldilocks() A,B;
+  output Goldilocks() C;
+
+  var P = FieldPrime();
+
+  // A + B = C + P * bit
+
+  var overflow = (A.val + B.val >= P);
+
+  signal quot <-- (overflow ? 1 : 0);
+  signal rem  <-- A.val + B.val - quot * P;
+
+  quot*(1-quot) === 0;                   // `quot` must be either zero or one
+  A.val + B.val === rem + P * quot;      // check the addition
+  C <== ToGoldilocks()(rem);             // range check on C
+}
+
+//
+// subtraction in the Goldilocks field
+//
+
+template Sub() {
+  input  Goldilocks() A,B;
+  output Goldilocks() C;
+
+  var P = FieldPrime();
+
+  // A - B = C - P * bit
+
+  var overflow = (A.val - B.val < 0);
+
+  signal aquot <-- (overflow ? 1 : 0);         // absolute value of the quotient
+  signal rem   <-- A.val - B.val + aquot * P;
+
+  aquot*(1-aquot) === 0;                       // `aquot` must be either zero or one
+  A.val - B.val === rem - P * aquot;           // check the subtraction
+  C <== ToGoldilocks()(rem);                   // range check on C
+}
+
+//------------------------------------------------------------------------------
+
+//
+// summation in the Goldilocks field
+//
+
+template Sum(k) {
+  input  Goldilocks() A[k];
+  output Goldilocks() C;
+
+  var P = FieldPrime();
+
+  // sum A[i] = C + P * Q
+  // since all A[i] < 2^64, Q will have at most as many bits as `k` have
+  // so we can do a simple binary range-check on Q
+
+  var sum = 0;
+  for(var i=0; i<k; i++) { sum += A[k].val; }
+  signal quot <-- sum \ k;                       // integer division, not field division!
+  signal rem  <-- sum - quot*P;
+
+  // check that quot is in the expected range
+  component bits = ToBits( CeilLog2(k) );
+  bits.inp <== quot;                             
+
+  sum === rem + P * quot;           // check the sum equation
+  C <== ToGoldilocks()(rem);        // range check on C
+}
+
+//------------------------------------------------------------------------------
+
+//
+// multiplication in the Goldilocks field
+//
+
+template Mul() {
+  input  Goldilocks() A,B;
+  output Goldilocks() C;
+
+  var P = FieldPrime();
+
+  // A * B = C + P * Q
+
+  var product = A.val * B.val;
+  signal quot <-- product \ P;
+  signal rem  <-- product % P;
+
+  // check that `quot` is in a 64 bit range!
+  component bits = ToBits( SolinasExpoBig() );
+  bits.inp <== quot;                             
+
+  A.val * B.val === rem + P * quot;     // check the multiplication equation
+  C <== ToGoldilocks()(rem);            // range check on C
+}
+
+
+//
+// multiplication of 3 Goldilocks field elements
+//
+
+template Mul3() {
+  input  Goldilocks() A,B,C;
+  output Goldilocks() D;
+
+  var P = FieldPrime();
+
+  // A * B * C = D + P * Q
+
+  var product = A.val * B.val * C.val;
+  signal quot <-- product \ P;
+  signal rem  <-- product % P;
+
+  // check that `quot` is in a 128 bit range!
+  component bits = ToBits( 2 * SolinasExpoBig() );
+  bits.inp <== quot;                             
+
+  A.val * B.val * C.val === rem + P * quot;     // check the multiplication equation
+  D <== ToGoldilocks()(rem);                    // range check on D
+}
+
+//------------------------------------------------------------------------------
+
+//
+// inversion in the Goldilocks field
+// (maybe this could be optimized a bit?)
+// 
+
+template Inv() {
+  input  Goldilocks() A;
+  output Goldilocks() C;
+
+  // guess the result, and range-check it
+  signal candidate <-- goldilocks_inv(A.val);
+  C <== ToGoldilocks()(candidate);
+
+  // is A = 0?
+  signal isz <== IsZero()(A.val);
+
+  // multiply back, and check the equation (but only when A != 0)
+  Goldilocks() AC <== Mul()( A , C );
+  (1 - isz) * (AC.val - 1) === 0;
+
+  // if A=0, we have to enforce C=0 too
+  isz * C.val === 0;
+}
+
+//
+// division in the Goldilocks field
+// (maybe this could be optimized a bit?)
+//
+
+template Div() {
+  input  Goldilocks() A,B;
+  output Goldilocks() C;
+
+  // guess the result, and range-check it
+  signal candidate <-- goldilocks_div(A.val, B.val);
+  C <== ToGoldilocks()(candidate);
+
+  // is B = 0?
+  signal isz <== IsZero()(B.val);
+ 
+  // multiply back, and check the equation (but only when B != 0)
+  Goldilocks() BC <== Mul()( B , C );
+  (1 - isz) * (BC.val - A.val) === 0;
+
+  // if B=0, we have to enforce C=0 too
+  isz * C.val === 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/circuit/goldilocks_func.circom b/circuit/goldilocks_func.circom
new file mode 100644
index 0000000..cf17566
--- /dev/null
+++ b/circuit/goldilocks_func.circom
@@ -0,0 +1,50 @@
+
+//
+// compile time functions calculating in the Goldilocks field
+//
+// all functions assume that the inputs are in the range [0..P-1]
+//
+
+pragma circom 2.2.0;
+
+include "field_params.circom";
+
+//------------------------------------------------------------------------------
+
+function goldilocks_neg(x) {
+  return ( (x > 0) ? (FieldPrime() - x) : 0 );
+}
+
+function goldilocks_add(x,y) {
+  return (x+y) % FieldPrime();
+}
+
+function goldilocks_sub(x,y) {
+  return (x-y+FieldPrime()) % FieldPrime();      // note: % behaves the wrong way in most mainstream languages
+}
+
+function goldilocks_mul(x,y) {
+  return (x*y) % FieldPrime();
+}
+
+function goldilocks_pow(x0,expo) {
+  var acc = 1;
+  var s = x0;
+  var e = expo;
+  while( e>0 ) {
+    acc = ((e & 1) == 1) ? goldilocks_mul(acc, s) : acc;
+    e = e >> 1;
+    s = goldilocks_mul(s,s);
+  }
+  return acc;
+}
+
+function goldilocks_inv(x) {
+  return goldilocks_pow( x , FieldPrime() - 2 );
+}
+
+function goldilocks_div(x,y ) {
+  return goldilocks_mul( x , goldilocks_inv(y) );
+}
+
+//------------------------------------------------------------------------------
diff --git a/circuit/misc.circom b/circuit/misc.circom
new file mode 100644
index 0000000..cce13b7
--- /dev/null
+++ b/circuit/misc.circom
@@ -0,0 +1,62 @@
+
+pragma circom 2.2.0;
+
+//------------------------------------------------------------------------------
+
+function FloorLog2(n) {
+  return (n==0) ? -1 : (1 + FloorLog2(n>>1));
+}
+
+function CeilLog2(n) {
+  return (n==0) ? 0 : (1 + FloorLog2(n-1));
+}
+
+//------------------------------------------------------------------------------
+// decompose an n-bit number into bits (least significant bit first)
+
+template ToBits(n) {
+  signal input  inp;
+  signal output out[n];
+
+  var sum = 0;
+  for(var i=0; i<n; i++) {
+    out[i] <-- (inp >> i) & 1;
+    out[i] * (1-out[i]) === 0;
+    sum += (1<<i) * out[i];
+  }
+
+  inp === sum;
+}
+
+//------------------------------------------------------------------------------
+// check equality to zero; that is, compute `(inp==0) ? 1 : 0`
+
+template IsZero() {
+  signal input  inp;
+  signal output out;
+
+  // guess the inverse
+  signal inv;
+  inv <-- (inp != 0) ? (1/inp) : 0 ;
+
+  // if `inp==0`, then by definition `out==1`
+  // if `out==0`, then the inverse must must exist, so `inp!=0`
+  out <== 1 - inp * inv;
+
+  // enfore that either `inp` or `out` must be zero
+  inp*out === 0;
+}
+
+//------------------------------------------------------------------------------
+// check equality of two field elements; that is, computes `(A==B) ? 1 : 0`
+
+template IsEqual() {
+  signal input  A,B;
+  signal output out;
+
+  component isz = IsZero();
+  isz.inp <== A - B;
+  isz.out ==> out;
+}
+
+//------------------------------------------------------------------------------
diff --git a/circuit/test_wrapper.circom b/circuit/test_wrapper.circom
new file mode 100644
index 0000000..7f3bd70
--- /dev/null
+++ b/circuit/test_wrapper.circom
@@ -0,0 +1,97 @@
+
+// wrappers around the Goldilocks templates, so that input and output are classic
+// signals, not Bus-es. The testing framework does not handle Bus inputs/outputs yet
+
+pragma circom 2.2.0;
+
+include "goldilocks.circom";
+
+//------------------------------------------------------------------------------
+
+template ToGoldilocksWrapper() {
+  signal input  inp;
+  signal output out;
+
+  Goldilocks() goldi <== ToGoldilocks()(inp);
+  goldi.val ==> out;
+}
+
+//------------------------------------------------------------------------------
+
+template NegWrapper() {
+  signal input  A;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() C1; 
+
+  C1 <== Neg()( A1 );
+  C1.val ==> C;
+}
+
+template AddWrapper() {
+  signal input  A,B;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() B1; B1.val <== B;
+  Goldilocks() C1; 
+
+  C1 <== Add()( A1 , B1 );
+  C1.val ==> C;
+
+//  log("A = ",A, " | B = ",B, " | C = ",C, " | expected = ", goldilocks_add(A,B));
+}
+
+template SubWrapper() {
+  signal input  A,B;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() B1; B1.val <== B;
+  Goldilocks() C1; 
+
+  C1 <== Sub()( A1 , B1 );
+  C1.val ==> C;
+}
+
+//------------------------------------------------------------------------------
+
+template MulWrapper() {
+  signal input  A,B;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() B1; B1.val <== B;
+  Goldilocks() C1; 
+
+  C1 <== Mul()( A1 , B1 );
+  C1.val ==> C;
+}
+
+template InvWrapper() {
+  signal input  A;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() C1; 
+
+  C1 <== Inv()( A1 );
+  C1.val ==> C;
+
+  log("A = ",A, " | C = ",C, " | expected = ", goldilocks_inv(A));
+}
+
+template DivWrapper() {
+  signal input  A,B;
+  signal output C;
+
+  Goldilocks() A1; A1.val <== A;
+  Goldilocks() B1; B1.val <== B;
+  Goldilocks() C1; 
+
+  C1 <== Div()( A1 , B1 );
+  C1.val ==> C;
+}
+
+//------------------------------------------------------------------------------
diff --git a/circuit/testmain.circom b/circuit/testmain.circom
new file mode 100644
index 0000000..12b494b
--- /dev/null
+++ b/circuit/testmain.circom
@@ -0,0 +1,8 @@
+
+pragma circom 2.2.0;
+
+include "test_wrapper.circom";
+
+//component main { public [A,B] } = AddWrapper();
+
+component main { public [A] } = InvWrapper();
diff --git a/tests/Common.hs b/tests/Common.hs
new file mode 100644
index 0000000..4c271de
--- /dev/null
+++ b/tests/Common.hs
@@ -0,0 +1,19 @@
+
+module Common
+  ( module R1CS
+  , module System.FilePath
+  , circuitSourceDir
+  )
+  where 
+
+--------------------------------------------------------------------------------
+
+import System.FilePath
+import R1CS
+
+--------------------------------------------------------------------------------
+
+circuitSourceDir :: FilePath
+circuitSourceDir = "../circuit/"
+
+--------------------------------------------------------------------------------
diff --git a/tests/Main b/tests/Main
new file mode 100755
index 0000000..9665eb4
Binary files /dev/null and b/tests/Main differ
diff --git a/tests/Main.hs b/tests/Main.hs
new file mode 100644
index 0000000..506aba0
--- /dev/null
+++ b/tests/Main.hs
@@ -0,0 +1,38 @@
+
+-- | Testing the soundness of the Goldilock field emulation templates
+--
+
+module Main where
+
+--------------------------------------------------------------------------------
+
+import R1CS
+
+import TestGoldilocks
+
+--------------------------------------------------------------------------------
+
+testGoldilocks :: IO ()
+testGoldilocks = testGoldilocks' Field24 Silent
+
+testGoldilocks' :: FieldChoice -> Verbosity -> IO ()
+testGoldilocks' fld verbosity = runWithField fld $ \pxy -> do
+
+  let runSpec     what = testSemantics     pxy what verbosity
+  let runSpecMany what = testSemanticsMany pxy what verbosity
+
+  runSpec   specIsZero
+  runSpec   specToGoldi
+  runSpec $ specUnary  Neg semantics_neg
+  runSpec $ specBinary Add semantics_add
+  runSpec $ specBinary Sub semantics_sub
+  runSpec $ specUnary  Inv semantics_inv
+
+  -- these are very slow so we don't do exhaustive testing
+  runSpec $ specBinarySmall Mul semantics_mul
+  runSpec $ specBinarySmall Div semantics_div
+
+--------------------------------------------------------------------------------
+
+main = do
+  testGoldilocks
\ No newline at end of file
diff --git a/tests/Semantics.hs b/tests/Semantics.hs
new file mode 100644
index 0000000..743cd16
--- /dev/null
+++ b/tests/Semantics.hs
@@ -0,0 +1,56 @@
+
+module Semantics where
+
+--------------------------------------------------------------------------------
+
+import Prelude hiding (div)
+
+--------------------------------------------------------------------------------
+
+type F = Int
+
+fieldPrime :: Int
+fieldPrime = 2^8 - 2^4 + 1
+
+modp :: Int -> F
+modp x = mod x fieldPrime
+
+--------------------------------------------------------------------------------
+
+neg :: F -> F
+neg 0 = 0
+neg x = fieldPrime - x
+
+add :: F -> F -> F
+add x y = modp (x + y)
+
+sub :: F -> F -> F
+sub x y = modp (x - y)
+
+mul :: F -> F -> F
+mul x y = modp (x * y)
+
+sqr :: F -> F 
+sqr x = mul x x
+
+mul3 :: F -> F -> F -> F
+mul3 x y z = mul (mul x y) z
+
+pow :: F -> Int -> F
+pow x0 expo
+  | expo < 0  = error "pow: negative exponent"
+  | expo == 0 = 1
+  | otherwise = go 1 x0 expo
+  where
+    go acc s 0 = acc
+    go acc s e = case divMod e 2 of
+      (e', 0) -> go      acc    (sqr s) e'
+      (e' ,1) -> go (mul acc s) (sqr s) e'
+
+inv :: F -> F
+inv x = pow x (fieldPrime - 2)      
+
+div :: F -> F -> F
+div x y = mul x (inv y)
+
+--------------------------------------------------------------------------------
diff --git a/tests/TestGoldilocks.hs b/tests/TestGoldilocks.hs
new file mode 100644
index 0000000..5fc95f5
--- /dev/null
+++ b/tests/TestGoldilocks.hs
@@ -0,0 +1,158 @@
+
+
+module TestGoldilocks where
+
+--------------------------------------------------------------------------------
+
+import Semantics
+
+import Common
+
+--------------------------------------------------------------------------------
+-- global parameters
+
+circomFile :: FilePath
+circomFile = circuitSourceDir </> "test_wrapper.circom"
+
+data Op
+  = Neg
+  | Add
+  | Sub
+  | Mul
+  | Inv
+  | Div
+  deriving (Eq,Show,Bounded,Enum)
+
+enumerateOps :: [Op]
+enumerateOps = enumFromTo minBound maxBound
+
+----------------------------------------
+
+mainComponent :: Op -> MainComponent
+mainComponent op = 
+  case op of
+    Neg -> unary  "Neg"
+    Add -> binary "Add"
+    Sub -> binary "Sub"
+    Mul -> binary "Mul"
+    Inv -> unary  "Inv"
+    Div -> binary "Div"
+  where
+
+    unary name = MainComponent 
+      { _templateName   = name ++ "Wrapper"
+      , _templateParams = []
+      , _publicInputs   = ["A"]
+      }
+
+    binary name = MainComponent 
+      { _templateName   = name ++ "Wrapper"
+      , _templateParams = []
+      , _publicInputs   = ["A","B"]
+      }
+
+--------------------------------------------------------------------------------
+-- test cases and expected semantics
+
+type TestCase1 = Int
+type TestCase2 = (Int,Int)
+
+type Output = Int
+
+testCasesUnary :: [TestCase1]
+testCasesUnary = [0..fieldPrime-1]
+
+testCasesBinary :: [TestCase2]
+testCasesBinary = [ (a,b) | a<-testCasesUnary , b<-testCasesUnary ]
+
+-- | Multiplication and division is too slow to test exhaustively
+testCasesBinarySmall :: [TestCase2]
+testCasesBinarySmall = [ (a,b) | a<-testset, b<-testset ] where
+  testset = [0..17] ++ [63,64,65] ++ [127,128,129] ++ [191,192,193] ++ [fieldPrime-18..fieldPrime-1]
+
+----------------------------------------
+
+semantics_neg :: F -> Expected F
+semantics_neg x = Expecting $ Semantics.neg x
+
+semantics_add :: (F,F) -> Expected F
+semantics_add (x,y) = Expecting $ Semantics.add x y
+
+semantics_sub :: (F,F) -> Expected F
+semantics_sub (x,y) = Expecting $ Semantics.sub x y
+
+semantics_mul :: (F,F) -> Expected F
+semantics_mul (x,y) = Expecting $ Semantics.mul x y
+
+semantics_inv :: F -> Expected F
+semantics_inv x = Expecting $ Semantics.inv x
+
+semantics_div :: (F,F) -> Expected F
+semantics_div (x,y) = Expecting $ Semantics.div x y
+
+--------------------------------------------------------------------------------
+-- inputs and outputs
+
+inputsA :: TestCase1 -> Inputs Name Integer
+inputsA a = Inputs $  toMapping "A" a
+                    
+inputsAB :: TestCase2 -> Inputs Name Integer
+inputsAB (a,b) = Inputs $  toMapping "A" a
+                        <> toMapping "B" b
+
+outputsC :: Output -> Outputs Name Integer
+outputsC y = Outputs $ toMapping "C" y
+
+--------------------------------------------------------------------------------
+
+specUnary :: Op -> (F -> Expected F) -> TestSpec TestCase1 Output
+specUnary op semantics = TestSpec circomFile (mainComponent op) inputsA outputsC semantics testCasesUnary
+
+specBinary :: Op -> ((F,F) -> Expected F) -> TestSpec TestCase2 Output
+specBinary op semantics = TestSpec circomFile (mainComponent op) inputsAB outputsC semantics testCasesBinary
+
+specBinarySmall :: Op -> ((F,F) -> Expected F) -> TestSpec TestCase2 Output
+specBinarySmall op semantics = TestSpec circomFile (mainComponent op) inputsAB outputsC semantics testCasesBinarySmall
+
+--------------------------------------------------------------------------------
+
+input :: TestCase1 -> Inputs Name Integer
+input x = Inputs $  toMapping "inp" x
+
+output :: Output -> Outputs Name Integer
+output y = Outputs $ toMapping "out" y
+
+
+semantics_toGoldi :: Int -> Expected Int
+semantics_toGoldi k
+  | k < 0            = ShouldFail
+  | k >= fieldPrime  = ShouldFail
+  | otherwise        = Expecting k
+
+main_toGoldi :: MainComponent
+main_toGoldi = MainComponent 
+  { _templateName   = "ToGoldilocksWrapper"
+  , _templateParams = []
+  , _publicInputs   = ["inp"]
+  }
+
+specToGoldi :: TestSpec TestCase1 Output
+specToGoldi = TestSpec circomFile main_toGoldi input output semantics_toGoldi [-10..300] 
+
+--------------------------------------------------------------------------------
+
+semantics_isZero :: Int -> Expected Int
+semantics_isZero k = Expecting (if k == 0 then 1 else 0)
+
+main_isZero :: MainComponent
+main_isZero = MainComponent 
+  { _templateName   = "IsZero"
+  , _templateParams = []
+  , _publicInputs   = ["inp"]
+  }
+
+specIsZero :: TestSpec Int Int
+specIsZero = TestSpec circomFile main_isZero input output semantics_isZero [-50..300] 
+
+--------------------------------------------------------------------------------
+