diff --git a/src/private/bithacks.nim b/src/private/bithacks.nim
index 51aff33..6fbed1b 100644
--- a/src/private/bithacks.nim
+++ b/src/private/bithacks.nim
@@ -7,43 +7,11 @@
 #
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import  ./uint_type
+import  ./uint_type, bitops
 
-# Compiler defined const: https://github.com/nim-lang/Nim/wiki/Consts-defined-by-the-compiler
-const withBuiltins = defined(gcc) or defined(clang)
-
-when withBuiltins:
-  proc builtin_clz(n: cuint): cint {.importc: "__builtin_clz", nodecl.}
-  proc builtin_clz(n: culong): cint {.importc: "__builtin_clzl", nodecl.}
-  proc builtin_clz(n: culonglong): cint {.importc: "__builtin_clzll", nodecl.}
-  type TbuiltinSupported = cuint or culong or culonglong
-  # Warning ⚠: if n = 0, clz is undefined
-
-template bit_length_impl[T: SomeUnsignedInt or Natural or int](n: T, result: int) =
-  # For some reason using "SomeUnsignedInt or Natural" directly makes Nim compiler
-  # throw a type mismatch in a proc, we use a template as a workaround
-  # Plus the template doesn't match natural with int :/
-  when withBuiltins and T is TbuiltinSupported:
-    result = if n == T(0): 0                    # Removing this branch would make divmod 4x faster :/
-             else: T.sizeof * 8 - builtin_clz(n)
-
-  else:
-    # The biggest optimization for the naive implementation
-    # is making sure this is inline
-    # This is the difference between returning in 2ms or 10+ second
-    # on a 1000000000 times test loop.
-    var x = n
-    while x != T(0):
-      x = x shr 1
-      inc(result)
-
-proc bit_length*(n: SomeUnsignedInt): int {.noSideEffect, inline.}=
-  ## Calculates how many bits are necessary to represent the number
-  bit_length_impl(n, result)
-
-proc bit_length*(n: Natural): int {.noSideEffect, inline.}=
-  ## Calculates how many bits are necessary to represent the number
-  bit_length_impl(n, result)
+proc bit_length*(x: SomeInteger): int {.inline, noSideEffect.}=
+  if x == 0: 0
+  else: fastlog2(x)
 
 proc bit_length*(n: MpUintImpl): int {.noSideEffect.}=
   ## Calculates how many bits are necessary to represent the number
diff --git a/src/private/conversion.nim b/src/private/conversion.nim
index ad4f517..3430853 100644
--- a/src/private/conversion.nim
+++ b/src/private/conversion.nim
@@ -10,15 +10,13 @@
 import  ./uint_type,
         macros
 
-template convBool(typ: typedesc): untyped =
-  # needed for carry conversion
-  converter boolMpUint*(b: bool): MpUintImpl[typ] {.noSideEffect, inline.}=
-    result.lo = b.typ
 
-convBool(uint8)
-convBool(uint16)
-convBool(uint32)
-convBool(uint64)
+proc toSubtype*[T: SomeInteger](b: bool, typ: typedesc[T]): T {.noSideEffect, inline.}=
+  b.T
+
+proc toSubtype*[T: MpUintImpl](b: bool, typ: typedesc[T]): T {.noSideEffect, inline.}=
+  type SubTy = type result.lo
+  result.lo = toSubtype(b, SubTy)
 
 proc zero*(typ: typedesc[BaseUint]): typ {.compileTime.} =
   typ()
diff --git a/src/private/uint_binary_ops.nim b/src/private/uint_binary_ops.nim
index fb471d3..f041280 100644
--- a/src/private/uint_binary_ops.nim
+++ b/src/private/uint_binary_ops.nim
@@ -21,7 +21,7 @@ proc `+=`*(x: var MpUintImpl, y: MpUintImpl) {.noSideEffect, inline.}=
   let tmp = x.lo
 
   x.lo += y.lo
-  x.hi += SubTy(x.lo < tmp) + y.hi
+  x.hi += (x.lo < tmp).toSubtype(SubTy) + y.hi
 
 proc `+`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit, inline.}=
   # Addition for multi-precision unsigned int
@@ -37,14 +37,33 @@ proc `-=`*(x: var MpUintImpl, y: MpUintImpl) {.noSideEffect, inline.}=
   let tmp = x.lo
 
   x.lo -= y.lo
-  x.hi -= SubTy(x.lo > tmp) + y.hi
+  x.hi -= (x.lo > tmp).toSubtype(SubTy) + y.hi
 
 proc `-`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit, inline.}=
   # Substraction for multi-precision unsigned int
   result = x
   result -= y
 
-template naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] =
+
+proc naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}
+  # Forward declaration
+
+proc naiveMul[T: BaseUint](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}=
+  ## Naive multiplication algorithm with extended precision
+
+  when T.sizeof in {1, 2, 4}:
+    # Use types twice bigger to do the multiplication
+    cast[type result](x.asDoubleUint * y.asDoubleUint)
+
+  elif T.sizeof == 8: # uint64 or MpUint[uint32]
+    # We cannot double uint64 to uint128
+    naiveMulImpl(x.toMpUintImpl, y.toMpUintImpl)
+  else:
+    # Case: at least uint128 * uint128 --> uint256
+    naiveMulImpl(x, y)
+
+
+proc naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}=
   # See details at
   # https://en.wikipedia.org/wiki/Karatsuba_algorithm
   # https://locklessinc.com/articles/256bit_arithmetic/
@@ -66,27 +85,12 @@ template naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] =
 
   var z1 = tmp
   z1 += naiveMul(x.hi, y.lo)
-  let z2 = (z1 < tmp).T + naiveMul(x.hi, y.hi)
+  let z2 = (z1 < tmp).toSubtype(T) + naiveMul(x.hi, y.hi)
 
   let tmp2  = z1.lo shl halfSize
   result.lo = tmp2
   result.lo += z0
-  result.hi = (result.lo < tmp2).T + z2 + z1.hi
-
-proc naiveMul[T: BaseUint](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}=
-  ## Naive multiplication algorithm with extended precision
-
-  when T.sizeof in {1, 2, 4}:
-    # Use types twice bigger to do the multiplication
-    cast[type result](x.asDoubleUint * y.asDoubleUint)
-
-  elif T.sizeof == 8: # uint64 or MpUint[uint32]
-    # We cannot double uint64 to uint128
-    naiveMulImpl(x.toMpUint, y.toMpUint)
-  else:
-    # Case: at least uint128 * uint128 --> uint256
-    naiveMulImpl(x, y)
-
+  result.hi = (result.lo < tmp2).toSubtype(T) + z2 + z1.hi
 
 proc `*`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit.}=
   ## Multiplication for multi-precision unsigned uint
diff --git a/src/private/uint_comparison.nim b/src/private/uint_comparison.nim
index 36ae35e..919a62b 100644
--- a/src/private/uint_comparison.nim
+++ b/src/private/uint_comparison.nim
@@ -17,8 +17,8 @@ proc `<=`*(x, y: MpUintImpl): bool {.noSideEffect, noInit, inline.}=
   result = if x == y: true
            else: x < y
 
-proc isZero[T: SomeUnsignedInt](n: T): bool {.noSideEffect,inline.} =
-  n == 0.T
+proc isZero*(n: SomeUnsignedInt): bool {.noSideEffect,inline.} =
+  n == 0
 
 proc isZero*(n: MpUintImpl): bool {.noSideEffect,inline.} =
   n.lo.isZero and n.hi.isZero
diff --git a/src/uint_init.nim b/src/uint_init.nim
index b8861cb..9aa8986 100644
--- a/src/uint_init.nim
+++ b/src/uint_init.nim
@@ -14,15 +14,15 @@ import  ./private/bithacks, ./private/conversion,
 
 import typetraits
 
-proc initMpUint*(n: SomeUnsignedInt, bits: static[int]): MpUint[bits] {.noSideEffect.} =
-
+proc initMpUint*(n: SomeInteger, bits: static[int]): MpUint[bits] {.noSideEffect.} =
+  assert n >= 0
   when result.data is MpuintImpl:
     type SubTy = type result.data.lo
 
     let len = n.bit_length
     if len > bits:
-      # Todo print n
-      raise newException(ValueError, "Input cannot be stored in a multi-precision " & $bits & "-bit integer." &
+      raise newException(ValueError, "Input " & $n & " cannot be stored in a multi-precision " &
+                                    $bits & "-bit integer." &
                                     "\nIt requires at least " & $len & " bits of precision")
     elif len < bits div 2:
       result.data.lo = SubTy(n)
@@ -35,12 +35,6 @@ proc initMpUint*(n: SomeUnsignedInt, bits: static[int]): MpUint[bits] {.noSideEf
       elif bits == 64:
         result.data = toMpUintImpl n.uint64
       else:
-        {.fatal, "unreachable".}
+        raise newException(ValueError, "Fatal")
   else:
     result.data = (type result.data)(n)
-
-proc u128*(n: SomeUnsignedInt): MpUint[128] {.noSideEffect, inline, noInit.}=
-  initMpUint[128](n)
-
-proc u256*(n: SomeUnsignedInt): MpUint[256] {.noSideEffect, inline, noInit.}=
-  initMpUint[256](n)
diff --git a/src/uint_public.nim b/src/uint_public.nim
index 51e18e5..83adac5 100644
--- a/src/uint_public.nim
+++ b/src/uint_public.nim
@@ -14,6 +14,13 @@ type
   UInt128* = MpUint[128]
   UInt256* = MpUint[256]
 
+template make_conv(conv_name: untyped, size: int): untyped =
+  proc `convname`*(n: SomeInteger): MpUint[size] {.noSideEffect, inline, noInit.}=
+    n.initMpUint(size)
+
+make_conv(u128, 128)
+make_conv(u256, 256)
+
 template make_unary(op, ResultTy): untyped =
   proc `op`*(x: MpUint): ResultTy {.noInit, inline, noSideEffect.} =
     when resultTy is MpUint: