endians2: use copyMem instead of loop outside of vm (#95)

Looking at generated assembly, it turns out the optimizer is not smart enough to get rid of the loop - use `copyMem` instead. At least the compiler is smart enough to constant-propagate runtime endian direction, resolving the review comment. Also clarify why a minimum length is enfored - it could perhaps be revisited, but that would leave a slightly odd API. the `array` overloads are actually unnecessary with an optimizing compiler - as long as it can prove the length, any extra checks will go away on their own also add `initCopyFrom` * document optimizations
2022-01-03 14:53:01 +01:00 · 2022-01-03 14:53:01 +01:00 · 4e223b95a7
parent 48666d9c65
commit 4e223b95a7
3 changed files with 47 additions and 41 deletions
--- a/stew/arrayops.nim
+++ b/stew/arrayops.nim
@ -46,7 +46,7 @@ func mnot*[N: static int; T](x: var array[N, T], y: array[N, T]) =
  eachElement(x, x, `not`)

 func copyFrom*[T](
-    v: var openArray[T], src: openArray[T]): Natural {.raises: [Defect].} =
+    v: var openArray[T], src: openArray[T]): Natural =
  ## Copy `src` contents into `v` - this is a permissive assignment where `src`
  ## may contain both fewer and more elements than `v`. Returns the number of
  ## elements copied which may be less than N when `src` is shorter than v
@ -54,6 +54,13 @@ func copyFrom*[T](
  assign(v.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
  elems

+func initCopyFrom*[N: static[int], T](
+    A: type array[N, T], src: openArray[T]): A =
+  ## Copy `src` contents into an array - this is a permissive copy where `src`
+  ## may contain both fewer and more elements than `N`.
+  let elems = min(N, src.len)
+  assign(result.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
+
 func initArrayWith*[N: static[int], T](value: T): array[N, T] {.noinit, inline.}=
  result.fill(value)

--- a/stew/endians2.nim
+++ b/stew/endians2.nim
@ -98,9 +98,11 @@ func toBytes*(x: SomeEndianInt, endian: Endianness = system.cpuEndian):
    if endian == system.cpuEndian: x
    else: swapBytes(x)

-  # Loop since vm can't copymem - let's hope optimizer is smart here :)
-  for i in 0..<sizeof(result):
-    result[i] = byte((v shr (i * 8)) and 0xff)
+  when nimvm: # No copyMem in vm
+    for i in 0..<sizeof(result):
+      result[i] = byte((v shr (i * 8)) and 0xff)
+  else:
+    copyMem(addr result, unsafeAddr v, sizeof(result))

 func toBytesLE*(x: SomeEndianInt):
    array[sizeof(x), byte] {.inline.} =
@ -112,43 +114,38 @@ func toBytesBE*(x: SomeEndianInt):
  ## Convert a native endian integer to a native endian byte sequence
  toBytes(x, bigEndian)

-func fromBytes*(
-    T: typedesc[SomeEndianInt],
-    x: array[sizeof(T), byte],
-    endian: Endianness = system.cpuEndian): T {.inline.} =
-  ## Convert a byte sequence to a native endian integer. By default, native
-  ## endianness is used which is not portable!
-  for i in 0..<sizeof(result): # No copymem in vm
-    result = result or T(x[i]) shl (i * 8)
-
-  if endian != system.cpuEndian:
-    result = swapBytes(result)
-
 func fromBytes*(
    T: typedesc[SomeEndianInt],
    x: openArray[byte],
    endian: Endianness = system.cpuEndian): T {.inline.} =
-  ## Read bytes and convert to an integer according to the given endianness. At
-  ## runtime, v must contain at least sizeof(T) bytes. By default, native
-  ## endianness is used which is not portable!
+  ## Read bytes and convert to an integer according to the given endianness.
  ##
-  ## REVIEW COMMENT (zah)
-  ## This API is very strange. Why can't I pass an open array of 3 bytes
-  ## to be interpreted as a LE number? Also, why is `endian` left as a
-  ## run-time parameter (with such short functions, it could easily be static).
+  ## Note: The default value of `system.cpuEndian` is not portable across
+  ## machines.
+  ##
+  ## Panics when `x.len < sizeof(T)` - for shorter buffers, copy the data to
+  ## an `array` first using `arrayops.initCopyFrom`, taking care to zero-fill
+  ## at the right end - usually the beginning for big endian and the end for
+  ## little endian, but this depends on the serialization of the bytes.

-  const ts = sizeof(T) # Nim bug: can't use sizeof directly
-  var tmp: array[ts, byte]
-  for i in 0..<tmp.len: # Loop since vm can't copymem
-    tmp[i] = x[i]
-  fromBytes(T, tmp, endian)
+  # This check gets optimized away when the compiler can prove that the length
+  # is large enough - passing in an `array` or using a construct like
+  # ` toOpenArray(pos, pos + sizeof(T) - 1)` are two ways that this happens
+  doAssert x.len >= sizeof(T), "Not enough bytes for endian conversion"

-func fromBytesBE*(
-    T: typedesc[SomeEndianInt],
-    x: array[sizeof(T), byte]): T {.inline.} =
-  ## Read big endian bytes and convert to an integer. By default, native
-  ## endianness is used which is not portable!
-  fromBytes(T, x, bigEndian)
+  when nimvm: # No copyMem in vm
+    for i in 0..<sizeof(result):
+      result = result or (T(x[i]) shl (i * 8))
+  else:
+    # `copyMem` helps compilers optimize the copy into a single instruction, when
+    # alignment etc permits
+    copyMem(addr result, unsafeAddr x[0], sizeof(result))
+
+  if endian != system.cpuEndian:
+    # The swap is turned into a CPU-specific instruction and/or combined with
+    # the copy above, again when conditions permit it - for example, on X86
+    # fromBytesBE gets compiled into a single `MOVBE` instruction
+    result = swapBytes(result)

 func fromBytesBE*(
    T: typedesc[SomeEndianInt],
@ -169,13 +166,6 @@ func fromBE*[T: SomeEndianInt](x: T): T {.inline.} =
  # there's no difference between this and toBE, except when reading the code
  toBE(x)

-func fromBytesLE*(
-    T: typedesc[SomeEndianInt],
-    x: array[sizeof(T), byte]): T {.inline.} =
-  ## Read little endian bytes and convert to an integer. By default, native
-  ## endianness is used which is not portable!
-  fromBytes(T, x, littleEndian)
-
 func fromBytesLE*(
    T: typedesc[SomeEndianInt],
    x: openArray[byte]): T {.inline.} =
--- a/tests/test_arrayops.nim
+++ b/tests/test_arrayops.nim
@ -24,3 +24,12 @@ suite "arrayops":
      (a or b) == [a[0] or b[0], a[1] or b[1]]
      (a xor b) == [a[0] xor b[0], a[1] xor b[1]]
      (not a) == [not a[0], not a[1]]
+
+  test "copyFrom":
+    let
+      a = [byte 4, 5]
+
+    check:
+      array[4, byte].initCopyFrom(a) == [byte 4, 5, 0, 0]
+      array[1, byte].initCopyFrom(a) == [byte 4]
+      array[2, byte].initCopyFrom(a) == [byte 4, 5]