endians2: use copyMem instead of loop outside of vm (#95)

Looking at generated assembly, it turns out the optimizer is not smart
enough to get rid of the loop - use `copyMem` instead.

At least the compiler is smart enough to constant-propagate runtime
endian direction, resolving the review comment.

Also clarify why a minimum length is enfored - it could perhaps be
revisited, but that would leave a slightly odd API.

the `array` overloads are actually unnecessary with an optimizing
compiler - as long as it can prove the length, any extra checks will go
away on their own

also add `initCopyFrom`

* document optimizations
This commit is contained in:
Jacek Sieka 2022-01-03 14:53:01 +01:00 committed by GitHub
parent 48666d9c65
commit 4e223b95a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 41 deletions

View File

@ -46,7 +46,7 @@ func mnot*[N: static int; T](x: var array[N, T], y: array[N, T]) =
eachElement(x, x, `not`)
func copyFrom*[T](
v: var openArray[T], src: openArray[T]): Natural {.raises: [Defect].} =
v: var openArray[T], src: openArray[T]): Natural =
## Copy `src` contents into `v` - this is a permissive assignment where `src`
## may contain both fewer and more elements than `v`. Returns the number of
## elements copied which may be less than N when `src` is shorter than v
@ -54,6 +54,13 @@ func copyFrom*[T](
assign(v.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
elems
func initCopyFrom*[N: static[int], T](
A: type array[N, T], src: openArray[T]): A =
## Copy `src` contents into an array - this is a permissive copy where `src`
## may contain both fewer and more elements than `N`.
let elems = min(N, src.len)
assign(result.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
func initArrayWith*[N: static[int], T](value: T): array[N, T] {.noinit, inline.}=
result.fill(value)

View File

@ -98,9 +98,11 @@ func toBytes*(x: SomeEndianInt, endian: Endianness = system.cpuEndian):
if endian == system.cpuEndian: x
else: swapBytes(x)
# Loop since vm can't copymem - let's hope optimizer is smart here :)
for i in 0..<sizeof(result):
result[i] = byte((v shr (i * 8)) and 0xff)
when nimvm: # No copyMem in vm
for i in 0..<sizeof(result):
result[i] = byte((v shr (i * 8)) and 0xff)
else:
copyMem(addr result, unsafeAddr v, sizeof(result))
func toBytesLE*(x: SomeEndianInt):
array[sizeof(x), byte] {.inline.} =
@ -112,43 +114,38 @@ func toBytesBE*(x: SomeEndianInt):
## Convert a native endian integer to a native endian byte sequence
toBytes(x, bigEndian)
func fromBytes*(
T: typedesc[SomeEndianInt],
x: array[sizeof(T), byte],
endian: Endianness = system.cpuEndian): T {.inline.} =
## Convert a byte sequence to a native endian integer. By default, native
## endianness is used which is not portable!
for i in 0..<sizeof(result): # No copymem in vm
result = result or T(x[i]) shl (i * 8)
if endian != system.cpuEndian:
result = swapBytes(result)
func fromBytes*(
T: typedesc[SomeEndianInt],
x: openArray[byte],
endian: Endianness = system.cpuEndian): T {.inline.} =
## Read bytes and convert to an integer according to the given endianness. At
## runtime, v must contain at least sizeof(T) bytes. By default, native
## endianness is used which is not portable!
## Read bytes and convert to an integer according to the given endianness.
##
## REVIEW COMMENT (zah)
## This API is very strange. Why can't I pass an open array of 3 bytes
## to be interpreted as a LE number? Also, why is `endian` left as a
## run-time parameter (with such short functions, it could easily be static).
## Note: The default value of `system.cpuEndian` is not portable across
## machines.
##
## Panics when `x.len < sizeof(T)` - for shorter buffers, copy the data to
## an `array` first using `arrayops.initCopyFrom`, taking care to zero-fill
## at the right end - usually the beginning for big endian and the end for
## little endian, but this depends on the serialization of the bytes.
const ts = sizeof(T) # Nim bug: can't use sizeof directly
var tmp: array[ts, byte]
for i in 0..<tmp.len: # Loop since vm can't copymem
tmp[i] = x[i]
fromBytes(T, tmp, endian)
# This check gets optimized away when the compiler can prove that the length
# is large enough - passing in an `array` or using a construct like
# ` toOpenArray(pos, pos + sizeof(T) - 1)` are two ways that this happens
doAssert x.len >= sizeof(T), "Not enough bytes for endian conversion"
func fromBytesBE*(
T: typedesc[SomeEndianInt],
x: array[sizeof(T), byte]): T {.inline.} =
## Read big endian bytes and convert to an integer. By default, native
## endianness is used which is not portable!
fromBytes(T, x, bigEndian)
when nimvm: # No copyMem in vm
for i in 0..<sizeof(result):
result = result or (T(x[i]) shl (i * 8))
else:
# `copyMem` helps compilers optimize the copy into a single instruction, when
# alignment etc permits
copyMem(addr result, unsafeAddr x[0], sizeof(result))
if endian != system.cpuEndian:
# The swap is turned into a CPU-specific instruction and/or combined with
# the copy above, again when conditions permit it - for example, on X86
# fromBytesBE gets compiled into a single `MOVBE` instruction
result = swapBytes(result)
func fromBytesBE*(
T: typedesc[SomeEndianInt],
@ -169,13 +166,6 @@ func fromBE*[T: SomeEndianInt](x: T): T {.inline.} =
# there's no difference between this and toBE, except when reading the code
toBE(x)
func fromBytesLE*(
T: typedesc[SomeEndianInt],
x: array[sizeof(T), byte]): T {.inline.} =
## Read little endian bytes and convert to an integer. By default, native
## endianness is used which is not portable!
fromBytes(T, x, littleEndian)
func fromBytesLE*(
T: typedesc[SomeEndianInt],
x: openArray[byte]): T {.inline.} =

View File

@ -24,3 +24,12 @@ suite "arrayops":
(a or b) == [a[0] or b[0], a[1] or b[1]]
(a xor b) == [a[0] xor b[0], a[1] xor b[1]]
(not a) == [not a[0], not a[1]]
test "copyFrom":
let
a = [byte 4, 5]
check:
array[4, byte].initCopyFrom(a) == [byte 4, 5, 0, 0]
array[1, byte].initCopyFrom(a) == [byte 4]
array[2, byte].initCopyFrom(a) == [byte 4, 5]