diff --git a/nim.cfg b/nim.cfg
new file mode 100644
index 0000000..45d538b
--- /dev/null
+++ b/nim.cfg
@@ -0,0 +1 @@
+nimcache = "build/nimcache/$projectName"
diff --git a/stint.nimble b/stint.nimble
index 1ed3a39..ce4954e 100644
--- a/stint.nimble
+++ b/stint.nimble
@@ -10,24 +10,42 @@ skipDirs      = @["tests", "benchmarks"]
 requires "nim >= 1.6.12",
          "stew"
 
-proc test(args, path: string) =
-  if not dirExists "build":
-    mkDir "build"
+let nimc = getEnv("NIMC", "nim") # Which nim compiler to use
+let lang = getEnv("NIMLANG", "c") # Which backend (c/cpp/js)
+let flags = getEnv("NIMFLAGS", "") # Extra flags for the compiler
+let verbose = getEnv("V", "") notin ["", "0"]
 
-  exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args &
-    " --outdir:build -r --hints:off --warnings:off --skipParentCfg" &
-    " --styleCheck:usages --styleCheck:error " & path
+from os import quoteShell
+
+let cfg =
+  " --styleCheck:usages --styleCheck:error" &
+  (if verbose: "" else: " --verbosity:0 --hints:off") &
+  " --skipParentCfg --skipUserCfg --outdir:build " &
+  quoteShell("--nimcache:build/nimcache/$projectName")
+
+
+proc build(args, path: string) =
+  exec nimc & " " & lang & " " & cfg & " " & flags & " " & args & " " & path
+
+proc run(args, path: string) =
+  build args & " -r", path
   if (NimMajor, NimMinor) > (1, 6):
-    exec "nim " & getEnv("TEST_LANG", "c") & " " & getEnv("NIMFLAGS") & " " & args &
-      " --outdir:build -r --mm:refc --hints:off --warnings:off --skipParentCfg" &
-      " --styleCheck:usages --styleCheck:error " & path
+    build args & " --mm:refc -r", path
+
+proc test(path: string) =
+  for config in ["", "-d:stintNoIntrinsics"]:
+    for mode in ["-d:debug", "-d:release"]:
+      run(config & " " & mode, path)
 
 task test_internal, "Run tests for internal procs":
-  test "", "tests/internal"
+  test "tests/internal"
 
 task test_public_api, "Run all tests - prod implementation (StUint[64] = uint64":
-  test "", "tests/all_tests"
+  test "tests/all_tests"
 
 task test, "Run all tests":
-  exec "nimble test_internal"
-  exec "nimble test_public_api"
+  test "tests/internal"
+  test "tests/all_tests"
+
+  # Smoke-test wasm32 compiles
+  build "--cpu:wasm32 -c", "tests/all_tests"
diff --git a/stint/config.nim b/stint/config.nim
new file mode 100644
index 0000000..5d08f0a
--- /dev/null
+++ b/stint/config.nim
@@ -0,0 +1,14 @@
+# Stint
+# Copyright 2023 Status Research & Development GmbH
+# Licensed under either of
+#
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
+#
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+const
+  stintNoIntrinsics* {.booldefine.} = false
+    ## Use only native Nim code without intrinsics, emit or asm - useful for
+    ## targets such as wasm and compilers with no native int128 support (and the
+    ## vm!)
diff --git a/stint/private/datatypes.nim b/stint/private/datatypes.nim
index fcbaa80..0cb0298 100644
--- a/stint/private/datatypes.nim
+++ b/stint/private/datatypes.nim
@@ -53,6 +53,7 @@ const X86* = defined(amd64) or defined(i386)
 when sizeof(int) == 8 and GCC_Compatible:
   type
     uint128*{.importc: "unsigned __int128".} = object
+      hi, lo: uint64
 
 # Accessors
 # --------------------------------------------------------
diff --git a/stint/private/primitives/addcarry_subborrow.nim b/stint/private/primitives/addcarry_subborrow.nim
index 40c48d3..0f9c807 100644
--- a/stint/private/primitives/addcarry_subborrow.nim
+++ b/stint/private/primitives/addcarry_subborrow.nim
@@ -7,7 +7,7 @@
 #
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import ../datatypes, ./compiletime_fallback
+import ../../config, ../datatypes, ./compiletime_fallback
 
 # ############################################################
 #
@@ -84,7 +84,11 @@ import ../datatypes, ./compiletime_fallback
 # Note: GCC before 2017 had incorrect codegen in some cases:
 # - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81300
 
-when X86:
+const
+  useIntrinsics = X86 and not stintNoIntrinsics
+  useInt128 = GCC_Compatible and sizeof(int) == 8 and not stintNoIntrinsics
+
+when useIntrinsics:
   when defined(windows):
     {.pragma: intrinsics, header:"<intrin.h>", nodecl.}
   else:
@@ -105,37 +109,35 @@ when X86:
 func addC*(cOut: var Carry, sum: var uint32, a, b: uint32, cIn: Carry) {.inline.} =
   ## Addition with carry
   ## (CarryOut, Sum) <- a + b + CarryIn
-  when nimvm:
+  template native =
     let dblPrec = uint64(cIn) + uint64(a) + uint64(b)
     sum = uint32(dblPrec and uint32.high)
     cOut = Carry(dblPrec shr 32)
+
+  when nimvm:
+    native
   else:
-    when X86:
+    when useIntrinsics:
       cOut = addcarry_u32(cIn, a, b, sum)
     else:
-      # on arch e.g. arm: nim will complaints "Error: redefinition of 'dblPrec'"
-      # so we use dlbPrec2 here
-      let dblPrec2 = uint64(cIn) + uint64(a) + uint64(b)
-      sum = uint32(dblPrec2)
-      cOut = Carry(dblPrec2 shr 32)
+      native
 
 func subB*(bOut: var Borrow, diff: var uint32, a, b: uint32, bIn: Borrow) {.inline.} =
   ## Substraction with borrow
   ## (BorrowOut, Diff) <- a - b - borrowIn
-  when nimvm:
+  template native =
     let dblPrec = uint64(a) - uint64(b) - uint64(bIn)
     diff = uint32(dblPrec and uint32.high)
     # On borrow the high word will be 0b1111...1111 and needs to be masked
     bOut = Borrow((dblPrec shr 32) and 1)
+
+  when nimvm:
+    native
   else:
-    when X86:
+    when useIntrinsics:
       bOut = subborrow_u32(bIn, a, b, diff)
     else:
-      # ditto
-      let dblPrec2 = uint64(a) - uint64(b) - uint64(bIn)
-      diff = uint32(dblPrec2)
-      # On borrow the high word will be 0b1111...1111 and needs to be masked
-      bOut = Borrow((dblPrec2 shr 32) and 1)
+      native
 
 func addC*(cOut: var Carry, sum: var uint64, a, b: uint64, cIn: Carry) {.inline.} =
   ## Addition with carry
@@ -143,24 +145,21 @@ func addC*(cOut: var Carry, sum: var uint64, a, b: uint64, cIn: Carry) {.inline.
   when nimvm:
     addC_nim(cOut, sum, a, b, cIn)
   else:
-    when X86:
+    when useIntrinsics:
       cOut = addcarry_u64(cIn, a, b, sum)
+    elif useInt128:
+      var dblPrec {.noInit.}: uint128
+      {.emit:[dblPrec, " = (unsigned __int128)", a," + (unsigned __int128)", b, " + (unsigned __int128)",cIn,";"].}
+
+      # Don't forget to dereference the var param in C mode
+      when defined(cpp):
+        {.emit:[cOut, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
+        {.emit:[sum, " = (NU64)", dblPrec,";"].}
+      else:
+        {.emit:["*",cOut, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
+        {.emit:["*",sum, " = (NU64)", dblPrec,";"].}
     else:
-      block:
-        static:
-          doAssert GCC_Compatible
-          doAssert sizeof(int) == 8
-
-        var dblPrec {.noInit.}: uint128
-        {.emit:[dblPrec, " = (unsigned __int128)", a," + (unsigned __int128)", b, " + (unsigned __int128)",cIn,";"].}
-
-        # Don't forget to dereference the var param in C mode
-        when defined(cpp):
-          {.emit:[cOut, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
-          {.emit:[sum, " = (NU64)", dblPrec,";"].}
-        else:
-          {.emit:["*",cOut, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
-          {.emit:["*",sum, " = (NU64)", dblPrec,";"].}
+      addC_nim(cOut, sum, a, b, cIn)
 
 func subB*(bOut: var Borrow, diff: var uint64, a, b: uint64, bIn: Borrow) {.inline.} =
   ## Substraction with borrow
@@ -168,22 +167,19 @@ func subB*(bOut: var Borrow, diff: var uint64, a, b: uint64, bIn: Borrow) {.inli
   when nimvm:
     subB_nim(bOut, diff, a, b, bIn)
   else:
-    when X86:
+    when useIntrinsics:
       bOut = subborrow_u64(bIn, a, b, diff)
+    elif useInt128:
+      var dblPrec {.noInit.}: uint128
+      {.emit:[dblPrec, " = (unsigned __int128)", a," - (unsigned __int128)", b, " - (unsigned __int128)",bIn,";"].}
+
+      # Don't forget to dereference the var param in C mode
+      # On borrow the high word will be 0b1111...1111 and needs to be masked
+      when defined(cpp):
+        {.emit:[bOut, " = (NU64)(", dblPrec," >> ", 64'u64, ") & 1;"].}
+        {.emit:[diff, " = (NU64)", dblPrec,";"].}
+      else:
+        {.emit:["*",bOut, " = (NU64)(", dblPrec," >> ", 64'u64, ") & 1;"].}
+        {.emit:["*",diff, " = (NU64)", dblPrec,";"].}
     else:
-      block:
-        static:
-          doAssert GCC_Compatible
-          doAssert sizeof(int) == 8
-
-        var dblPrec {.noInit.}: uint128
-        {.emit:[dblPrec, " = (unsigned __int128)", a," - (unsigned __int128)", b, " - (unsigned __int128)",bIn,";"].}
-
-        # Don't forget to dereference the var param in C mode
-        # On borrow the high word will be 0b1111...1111 and needs to be masked
-        when defined(cpp):
-          {.emit:[bOut, " = (NU64)(", dblPrec," >> ", 64'u64, ") & 1;"].}
-          {.emit:[diff, " = (NU64)", dblPrec,";"].}
-        else:
-          {.emit:["*",bOut, " = (NU64)(", dblPrec," >> ", 64'u64, ") & 1;"].}
-          {.emit:["*",diff, " = (NU64)", dblPrec,";"].}
+      subB_nim(bOut, diff, a, b, bIn)
diff --git a/stint/private/primitives/extended_precision.nim b/stint/private/primitives/extended_precision.nim
index 4f58e65..1ef30c3 100644
--- a/stint/private/primitives/extended_precision.nim
+++ b/stint/private/primitives/extended_precision.nim
@@ -82,16 +82,19 @@ func muladd2*(hi, lo: var uint32, a, b, c1, c2: uint32) {.inline.}=
 # ############################################################
 
 when sizeof(int) == 8 and not defined(Stint32):
+  import   ../../config
+
   from ./compiletime_fallback import div2n1n_nim, mul_nim, muladd1_nim, muladd2_nim
 
-  when defined(vcc):
-    from ./extended_precision_x86_64_msvc import div2n1n_128, mul_128, muladd1_128, muladd2_128
-  elif GCC_Compatible:
-    when X86:
-      from ./extended_precision_x86_64_gcc import div2n1n_128
-      from ./extended_precision_64bit_uint128 import mul_128, muladd1_128, muladd2_128
+  when not stintNoIntrinsics:
+    when defined(vcc):
+      from ./extended_precision_x86_64_msvc import div2n1n_128, mul_128, muladd1_128, muladd2_128
     else:
-      from ./extended_precision_64bit_uint128 import div2n1n_128, mul_128, muladd1_128, muladd2_128
+      when defined(amd64):
+        from ./extended_precision_x86_64_gcc import div2n1n_128
+        from ./extended_precision_64bit_uint128 import mul_128, muladd1_128, muladd2_128
+      else:
+        from ./extended_precision_64bit_uint128 import div2n1n_128, mul_128, muladd1_128, muladd2_128
 
   func mul*(hi, lo: var uint64, u, v: uint64) {.inline.}=
     ## Extended precision multiplication
@@ -99,7 +102,10 @@ when sizeof(int) == 8 and not defined(Stint32):
     when nimvm:
       mul_nim(hi, lo, u, v)
     else:
-      mul_128(hi, lo, u, v)
+      when stintNoIntrinsics:
+        mul_nim(hi, lo, u, v)
+      else:
+        mul_128(hi, lo, u, v)
 
   func muladd1*(hi, lo: var uint64, a, b, c: uint64) {.inline.}=
     ## Extended precision multiplication + addition
@@ -110,7 +116,10 @@ when sizeof(int) == 8 and not defined(Stint32):
     when nimvm:
       muladd1_nim(hi, lo, a, b, c)
     else:
-      muladd1_128(hi, lo, a, b, c)
+      when stintNoIntrinsics:
+        muladd1_nim(hi, lo, a, b, c)
+      else:
+        muladd1_128(hi, lo, a, b, c)
 
   func muladd2*(hi, lo: var uint64, a, b, c1, c2: uint64) {.inline.}=
     ## Extended precision multiplication + addition + addition
@@ -122,7 +131,10 @@ when sizeof(int) == 8 and not defined(Stint32):
     when nimvm:
       muladd2_nim(hi, lo, a, b, c1, c2)
     else:
-      muladd2_128(hi, lo, a, b, c1, c2)
+      when stintNoIntrinsics:
+        muladd2_nim(hi, lo, a, b, c1, c2)
+      else:
+        muladd2_128(hi, lo, a, b, c1, c2)
 
   func div2n1n*(q, r: var uint64, n_hi, n_lo, d: uint64) {.inline.}=
     ## Division uint128 by uint64
@@ -132,7 +144,10 @@ when sizeof(int) == 8 and not defined(Stint32):
     when nimvm:
       div2n1n_nim(q, r, n_hi, n_lo, d)
     else:
-      div2n1n_128(q, r, n_hi, n_lo, d)
+      when stintNoIntrinsics:
+        div2n1n_nim(q, r, n_hi, n_lo, d)
+      else:
+        div2n1n_128(q, r, n_hi, n_lo, d)
 
 # ############################################################
 #
diff --git a/tests/internal_uint_div.nim b/tests/internal_uint_div.nim
index b9b405d..5e61e67 100644
--- a/tests/internal_uint_div.nim
+++ b/tests/internal_uint_div.nim
@@ -9,6 +9,8 @@
 
 # Test implementation of internal proc:
 
+{.used.}
+
 include ../stint/private/uint_div
 import unittest