endians2: use copyMem instead of loop outside of vm (#95)
Looking at generated assembly, it turns out the optimizer is not smart enough to get rid of the loop - use `copyMem` instead. At least the compiler is smart enough to constant-propagate runtime endian direction, resolving the review comment. Also clarify why a minimum length is enfored - it could perhaps be revisited, but that would leave a slightly odd API. the `array` overloads are actually unnecessary with an optimizing compiler - as long as it can prove the length, any extra checks will go away on their own also add `initCopyFrom` * document optimizations
This commit is contained in:
parent
48666d9c65
commit
4e223b95a7
|
@ -46,7 +46,7 @@ func mnot*[N: static int; T](x: var array[N, T], y: array[N, T]) =
|
|||
eachElement(x, x, `not`)
|
||||
|
||||
func copyFrom*[T](
|
||||
v: var openArray[T], src: openArray[T]): Natural {.raises: [Defect].} =
|
||||
v: var openArray[T], src: openArray[T]): Natural =
|
||||
## Copy `src` contents into `v` - this is a permissive assignment where `src`
|
||||
## may contain both fewer and more elements than `v`. Returns the number of
|
||||
## elements copied which may be less than N when `src` is shorter than v
|
||||
|
@ -54,6 +54,13 @@ func copyFrom*[T](
|
|||
assign(v.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
|
||||
elems
|
||||
|
||||
func initCopyFrom*[N: static[int], T](
|
||||
A: type array[N, T], src: openArray[T]): A =
|
||||
## Copy `src` contents into an array - this is a permissive copy where `src`
|
||||
## may contain both fewer and more elements than `N`.
|
||||
let elems = min(N, src.len)
|
||||
assign(result.toOpenArray(0, elems - 1), src.toOpenArray(0, elems - 1))
|
||||
|
||||
func initArrayWith*[N: static[int], T](value: T): array[N, T] {.noinit, inline.}=
|
||||
result.fill(value)
|
||||
|
||||
|
|
|
@ -98,9 +98,11 @@ func toBytes*(x: SomeEndianInt, endian: Endianness = system.cpuEndian):
|
|||
if endian == system.cpuEndian: x
|
||||
else: swapBytes(x)
|
||||
|
||||
# Loop since vm can't copymem - let's hope optimizer is smart here :)
|
||||
for i in 0..<sizeof(result):
|
||||
result[i] = byte((v shr (i * 8)) and 0xff)
|
||||
when nimvm: # No copyMem in vm
|
||||
for i in 0..<sizeof(result):
|
||||
result[i] = byte((v shr (i * 8)) and 0xff)
|
||||
else:
|
||||
copyMem(addr result, unsafeAddr v, sizeof(result))
|
||||
|
||||
func toBytesLE*(x: SomeEndianInt):
|
||||
array[sizeof(x), byte] {.inline.} =
|
||||
|
@ -112,43 +114,38 @@ func toBytesBE*(x: SomeEndianInt):
|
|||
## Convert a native endian integer to a native endian byte sequence
|
||||
toBytes(x, bigEndian)
|
||||
|
||||
func fromBytes*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
x: array[sizeof(T), byte],
|
||||
endian: Endianness = system.cpuEndian): T {.inline.} =
|
||||
## Convert a byte sequence to a native endian integer. By default, native
|
||||
## endianness is used which is not portable!
|
||||
for i in 0..<sizeof(result): # No copymem in vm
|
||||
result = result or T(x[i]) shl (i * 8)
|
||||
|
||||
if endian != system.cpuEndian:
|
||||
result = swapBytes(result)
|
||||
|
||||
func fromBytes*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
x: openArray[byte],
|
||||
endian: Endianness = system.cpuEndian): T {.inline.} =
|
||||
## Read bytes and convert to an integer according to the given endianness. At
|
||||
## runtime, v must contain at least sizeof(T) bytes. By default, native
|
||||
## endianness is used which is not portable!
|
||||
## Read bytes and convert to an integer according to the given endianness.
|
||||
##
|
||||
## REVIEW COMMENT (zah)
|
||||
## This API is very strange. Why can't I pass an open array of 3 bytes
|
||||
## to be interpreted as a LE number? Also, why is `endian` left as a
|
||||
## run-time parameter (with such short functions, it could easily be static).
|
||||
## Note: The default value of `system.cpuEndian` is not portable across
|
||||
## machines.
|
||||
##
|
||||
## Panics when `x.len < sizeof(T)` - for shorter buffers, copy the data to
|
||||
## an `array` first using `arrayops.initCopyFrom`, taking care to zero-fill
|
||||
## at the right end - usually the beginning for big endian and the end for
|
||||
## little endian, but this depends on the serialization of the bytes.
|
||||
|
||||
const ts = sizeof(T) # Nim bug: can't use sizeof directly
|
||||
var tmp: array[ts, byte]
|
||||
for i in 0..<tmp.len: # Loop since vm can't copymem
|
||||
tmp[i] = x[i]
|
||||
fromBytes(T, tmp, endian)
|
||||
# This check gets optimized away when the compiler can prove that the length
|
||||
# is large enough - passing in an `array` or using a construct like
|
||||
# ` toOpenArray(pos, pos + sizeof(T) - 1)` are two ways that this happens
|
||||
doAssert x.len >= sizeof(T), "Not enough bytes for endian conversion"
|
||||
|
||||
func fromBytesBE*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
x: array[sizeof(T), byte]): T {.inline.} =
|
||||
## Read big endian bytes and convert to an integer. By default, native
|
||||
## endianness is used which is not portable!
|
||||
fromBytes(T, x, bigEndian)
|
||||
when nimvm: # No copyMem in vm
|
||||
for i in 0..<sizeof(result):
|
||||
result = result or (T(x[i]) shl (i * 8))
|
||||
else:
|
||||
# `copyMem` helps compilers optimize the copy into a single instruction, when
|
||||
# alignment etc permits
|
||||
copyMem(addr result, unsafeAddr x[0], sizeof(result))
|
||||
|
||||
if endian != system.cpuEndian:
|
||||
# The swap is turned into a CPU-specific instruction and/or combined with
|
||||
# the copy above, again when conditions permit it - for example, on X86
|
||||
# fromBytesBE gets compiled into a single `MOVBE` instruction
|
||||
result = swapBytes(result)
|
||||
|
||||
func fromBytesBE*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
|
@ -169,13 +166,6 @@ func fromBE*[T: SomeEndianInt](x: T): T {.inline.} =
|
|||
# there's no difference between this and toBE, except when reading the code
|
||||
toBE(x)
|
||||
|
||||
func fromBytesLE*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
x: array[sizeof(T), byte]): T {.inline.} =
|
||||
## Read little endian bytes and convert to an integer. By default, native
|
||||
## endianness is used which is not portable!
|
||||
fromBytes(T, x, littleEndian)
|
||||
|
||||
func fromBytesLE*(
|
||||
T: typedesc[SomeEndianInt],
|
||||
x: openArray[byte]): T {.inline.} =
|
||||
|
|
|
@ -24,3 +24,12 @@ suite "arrayops":
|
|||
(a or b) == [a[0] or b[0], a[1] or b[1]]
|
||||
(a xor b) == [a[0] xor b[0], a[1] xor b[1]]
|
||||
(not a) == [not a[0], not a[1]]
|
||||
|
||||
test "copyFrom":
|
||||
let
|
||||
a = [byte 4, 5]
|
||||
|
||||
check:
|
||||
array[4, byte].initCopyFrom(a) == [byte 4, 5, 0, 0]
|
||||
array[1, byte].initCopyFrom(a) == [byte 4]
|
||||
array[2, byte].initCopyFrom(a) == [byte 4, 5]
|
||||
|
|
Loading…
Reference in New Issue