From 777a84e9f5825ed57d4496a5caebf08181d081cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mamy=20Andr=C3=A9-Ratsimbazafy?= Date: Sat, 13 Jun 2020 16:44:13 +0200 Subject: [PATCH] Implement toHex/fromHex and fix `shl` --- stint/endians2.nim | 136 ++++++++++++++++++++++++++++++----- stint/io.nim | 62 ++++++---------- stint/private/uint_mul.nim | 3 +- stint/private/uint_shift.nim | 93 ++++++++++++++++++++++++ stint/uintops.nim | 92 ++++++++++++------------ 5 files changed, 285 insertions(+), 101 deletions(-) create mode 100644 stint/private/uint_shift.nim diff --git a/stint/endians2.nim b/stint/endians2.nim index 6232a94..20d78bc 100644 --- a/stint/endians2.nim +++ b/stint/endians2.nim @@ -9,27 +9,131 @@ import private/datatypes -import stew/endians2 -export endians2 - {.push raises: [IndexError], noInit, gcsafe.} -func toBytes*[bits: static int](x: StUint[bits], endian: Endianness = system.cpuEndian): - array[bits div 8, byte] {.inline.} = - when endian == system.cpuEndian: - for i in 0 ..< x.limbs.len: - result[i * sizeof(Word)] = x.limbs[i].toBytes() +# Serialization +# ------------------------------------------------------------------------------------------ + +template toByte(x: SomeUnsignedInt): byte = + ## At compile-time, conversion to bytes checks the range + ## we want to ensure this is done at the register level + ## at runtime in a single "mov byte" instruction + when nimvm: + byte(x and 0xFF) else: - for i in 0 ..< x.limbs.len: - result[i * sizeof(Word)] = x.limbs[^i].toBytes() + byte(x) -func toBytesLE*[bits: static int](x: StUint[bits]): - array[bits div 8, byte] {.inline.} = - toBytes(x, littleEndian) +template blobFrom(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) = + ## Write an integer into a raw binary blob + ## Swapping endianness if needed + when endian == cpuEndian: + for i in 0 ..< sizeof(src): + dst[startIdx+i] = toByte((src shr (i * 8))) + else: + for i in 0 ..< sizeof(src): + dst[startIdx+sizeof(src)-1-i] = toByte((src shr (i * 8))) -func toBytesBE*[bits: static int](x: StUint[bits]): - array[bits div 8, byte] {.inline.} = - toBytes(x, bigEndian) +func toBytesLE*[bits: static int](src: StUint[bits]): array[bits div 8, byte] = + var + src_idx, dst_idx = 0 + acc: Word = 0 + acc_len = 0 + + when cpuEndian == bigEndian: + srcIdx = src.limbs.len - 1 + + var tail = result.len + while tail > 0: + when cpuEndian == littleEndian: + let w = if src_idx < src.limbs.len: src.limbs[src_idx] + else: 0 + inc src_idx + else: + let w = if src_idx >= 0: src.limbs[src_idx] + else: 0 + dec src_idx + + if acc_len == 0: + # We need to refill the buffer to output 64-bit + acc = w + acc_len = WordBitWidth + else: + let lo = acc + acc = w + + if tail >= sizeof(Word): + # Unrolled copy + result.blobFrom(src = lo, dst_idx, littleEndian) + dst_idx += sizeof(Word) + tail -= sizeof(Word) + else: + # Process the tail and exit + when cpuEndian == littleEndian: + # When requesting little-endian on little-endian platform + # we can just copy each byte + # tail is inclusive + for i in 0 ..< tail: + result[dst_idx+i] = toByte(lo shr (i*8)) + else: # TODO check this + # We need to copy from the end + for i in 0 ..< tail: + result[dst_idx+i] = toByte(lo shr ((tail-i)*8)) + return + +func toBytesBE*[bits: static int](src: StUint[bits]): array[bits div 8, byte] {.inline.} = + var + src_idx = 0 + acc: Word = 0 + acc_len = 0 + + when cpuEndian == bigEndian: + srcIdx = src.limbs.len - 1 + + var tail = result.len + while tail > 0: + when cpuEndian == littleEndian: + let w = if src_idx < src.limbs.len: src.limbs[src_idx] + else: 0 + inc src_idx + else: + let w = if src_idx >= 0: src.limbs[src_idx] + else: 0 + dec src_idx + + if acc_len == 0: + # We need to refill the buffer to output 64-bit + acc = w + acc_len = WordBitWidth + else: + let lo = acc + acc = w + + if tail >= sizeof(Word): + # Unrolled copy + tail -= sizeof(Word) + result.blobFrom(src = lo, tail, bigEndian) + else: + # Process the tail and exit + when cpuEndian == littleEndian: + # When requesting little-endian on little-endian platform + # we can just copy each byte + # tail is inclusive + for i in 0 ..< tail: + result[tail-1-i] = toByte(lo shr (i*8)) + else: + # We need to copy from the end + for i in 0 ..< tail: + result[tail-1-i] = toByte(lo shr ((tail-i)*8)) + return + +func toBytes*[bits: static int](x: StUint[bits], endian: Endianness = system.cpuEndian): array[bits div 8, byte] {.inline.} = + if endian == littleEndian: + result = x.toBytesLE() + else: + result = x.toBytesBE() + +# Deserialization +# ------------------------------------------------------------------------------------------ func fromBytesBE*[bits: static int]( T: typedesc[StUint[bits]], diff --git a/stint/io.nim b/stint/io.nim index 26bca80..8483a15 100644 --- a/stint/io.nim +++ b/stint/io.nim @@ -8,12 +8,18 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import + # Standard library + typetraits, algorithm, hashes, + # Status libraries + # stew/byteutils, + # Internal ./private/datatypes, # ./private/int_negabs, # ./private/compiletime_helpers, # ./intops, - ./uintops, ./endians2, - typetraits, algorithm, hashes + ./uintops, ./endians2 + +from stew/byteutils import toHex # Why are we exporting readHexChar in byteutils? template static_check_size(T: typedesc[SomeInteger], bits: static[int]) = # To avoid a costly runtime check, we refuse storing into StUint types smaller @@ -356,44 +362,20 @@ func hexToUint*[bits: static[int]](hexString: string): StUint[bits] {.inline.} = # ## Leading zeros are stripped. Use dumpHex instead if you need the in-memory representation # toString(num, 16) -# func dumpHex*(x: Stint or StUint, order: static[Endianness] = bigEndian): string = -# ## Stringify an int to hex. -# ## Note. Leading zeros are not removed. Use toString(n, base = 16)/toHex instead. -# ## -# ## You can specify bigEndian or littleEndian order. -# ## i.e. in bigEndian: -# ## - 1.uint64 will be 00000001 -# ## - (2.uint128)^64 + 1 will be 0000000100000001 -# ## -# ## in littleEndian: -# ## - 1.uint64 will be 01000000 -# ## - (2.uint128)^64 + 1 will be 0100000001000000 - -# const -# hexChars = "0123456789abcdef" -# size = bitsof(x.data) div 8 - -# result = newString(2*size) - -# when nimvm: -# for i in 0 ..< size: -# when order == system.cpuEndian: -# let byte = x.data.getByte(i) -# else: -# let byte = x.data.getByte(size - 1 - i) -# result[2*i] = hexChars[int byte shr 4 and 0xF] -# result[2*i+1] = hexChars[int byte and 0xF] -# else: -# {.pragma: restrict, codegenDecl: "$# __restrict $#".} -# let bytes {.restrict.}= cast[ptr array[size, byte]](x.unsafeaddr) - -# for i in 0 ..< size: -# when order == system.cpuEndian: -# result[2*i] = hexChars[int bytes[i] shr 4 and 0xF] -# result[2*i+1] = hexChars[int bytes[i] and 0xF] -# else: -# result[2*i] = hexChars[int bytes[bytes[].high - i] shr 4 and 0xF] -# result[2*i+1] = hexChars[int bytes[bytes[].high - i] and 0xF] +func dumpHex*(a: Stint or StUint, order: static[Endianness] = bigEndian): string = + ## Stringify an int to hex. + ## Note. Leading zeros are not removed. Use toString(n, base = 16)/toHex instead. + ## + ## You can specify bigEndian or littleEndian order. + ## i.e. in bigEndian: + ## - 1.uint64 will be 00000001 + ## - (2.uint128)^64 + 1 will be 0000000100000001 + ## + ## in littleEndian: + ## - 1.uint64 will be 01000000 + ## - (2.uint128)^64 + 1 will be 0100000001000000 + let bytes = a.toBytes(order) + result = bytes.toHex() proc initFromBytesBE*[bits: static[int]](val: var Stuint[bits], ba: openarray[byte], diff --git a/stint/private/uint_mul.nim b/stint/private/uint_mul.nim index 2b574f8..1155344 100644 --- a/stint/private/uint_mul.nim +++ b/stint/private/uint_mul.nim @@ -11,7 +11,8 @@ import ./datatypes, ./primitives/extended_precision -# ################### Multiplication ################### # +# Multiplication +# -------------------------------------------------------- {.push raises: [], gcsafe.} func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) = diff --git a/stint/private/uint_shift.nim b/stint/private/uint_shift.nim new file mode 100644 index 0000000..12eb944 --- /dev/null +++ b/stint/private/uint_shift.nim @@ -0,0 +1,93 @@ +# Stint +# Copyright 2018-Present Status Research & Development GmbH +# Licensed under either of +# +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) +# +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ./datatypes + +# Shifts +# -------------------------------------------------------- +{.push raises: [], gcsafe.} + +func shrSmall*(r: var Limbs, a: Limbs, k: SomeInteger) = + ## Shift right by k. + ## + ## k MUST be less than the base word size (2^32 or 2^64) + # Note: for speed, loading a[i] and a[i+1] + # instead of a[i-1] and a[i] + # is probably easier to parallelize for the compiler + # (antidependence WAR vs loop-carried dependence RAW) + when cpuEndian == littleEndian: + for i in 0 ..< a.len-1: + r[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k)) + r[^1] = a[^1] shr k + else: + for i in countdown(a.len-1, 1): + r[i] = (a[i] shr k) or (a[i-1] shl (WordBitWidth - k)) + r[0] = a[0] shr k + +func shrLarge*(r: var Limbs, a: Limbs, w, shift: SomeInteger) = + ## Shift right by `w` words + `shift` bits + ## Assumes `r` is 0 initialized + if w > Limbs.len: + return + + when cpuEndian == littleEndian: + for i in w ..< a.len-1: + r[i-w] = (a[i] shr shift) or (a[i+1] shl (WordBitWidth - shift)) + r[^w] = a[^1] shr shift + else: + for i in countdown(a.len-1, 1+w): + r[i-w] = (a[i] shr shift) or (a[i-1] shl (WordBitWidth - k)) + r[0] = a[w] shr shift + +func shrWords*(r: var Limbs, a: Limbs, w: SomeInteger) = + ## Shift right by w word + when cpuEndian == littleEndian: + for i in 0 ..< Limbs.len-w: + r[i] = a[i+w] + else: + for i in countdown(Limbs.len-w, 0): + r[i] = a[i+w] + +func shlSmall*(r: var Limbs, a: Limbs, k: SomeInteger) = + ## Compute the `shift left` operation of x and k + ## + ## k MUST be less than the base word size (2^32 or 2^64) + when cpuEndian == littleEndian: + r[0] = a[0] shl k + for i in 1 ..< a.len: + r[i] = (a[i] shl k) or (a[i-1] shr (WordBitWidth - k)) + else: + r[^1] = a[^1] shl k + for i in countdown(a.len-2, 0): + r[i] = (a[i] shl k) or (a[i+1] shr (WordBitWidth - k)) + +func shlLarge*(r: var Limbs, a: Limbs, w, shift: SomeInteger) = + ## Shift left by `w` words + `shift` bits + ## Assumes `r` is 0 initialized + if w > Limbs.len: + return + + when cpuEndian == littleEndian: + r[w] = a[0] shl shift + for i in 1+w ..< r.len: + r[i] = (a[i-w] shl shift) or (a[i-w-1] shr (WordBitWidth - shift)) + else: + r[^1] = a[^w] shl shift + for i in countdown(a.len-2-w, 0): + r[i+w] = (a[i] shl shift) or (a[i+1] shr (WordBitWidth - shift)) + +func shlWords*(r: var Limbs, a: Limbs, w: SomeInteger) = + ## Shift left by w word + when cpuEndian == littleEndian: + for i in 0 ..< Limbs.len-w: + r[i+w] = a[i] + else: + for i in countdown(Limbs.len-1, 0): + r[i] = a[i-w] diff --git a/stint/uintops.nim b/stint/uintops.nim index 94ee52d..a227613 100644 --- a/stint/uintops.nim +++ b/stint/uintops.nim @@ -12,6 +12,7 @@ import stew/bitops2, # Internal ./private/datatypes, + ./private/uint_shift, ./private/primitives/addcarry_subborrow export StUint @@ -127,69 +128,72 @@ func `xor`*(a, b: Stuint): Stuint = wr = wa xor wb result.clearExtraBits() -func `shr`*(a: Stuint, k: SomeInteger): Stuint = - ## Shift right by k. - ## - ## k MUST be less than the base word size (2^32 or 2^64) - # Note: for speed, loading a[i] and a[i+1] - # instead of a[i-1] and a[i] - # is probably easier to parallelize for the compiler - # (antidependence WAR vs loop-carried dependence RAW) - when cpuEndian == littleEndian: - for i in 0 ..< a.limbs.len-1: - result.limbs[i] = (a.limbs[i] shr k) or (a.limbs[i+1] shl (WordBitWidth - k)) - result.limbs[^1] = a.limbs[^1] shr k - else: - for i in countdown(a.limbs.len-1, 1): - result.limbs[i] = (a.limbs[i] shr k) or (a.limbs[i-1] shl (WordBitWidth - k)) - result.limbs[0] = a.limbs[0] shr k - -func `shl`*(a: Stuint, k: SomeInteger): Stuint = - ## Compute the `shift left` operation of x and k - when cpuEndian == littleEndian: - result.limbs[0] = a.limbs[0] shl k - for i in 1 ..< a.limbs.len: - result.limbs[i] = (a.limbs[i] shl k) or (a.limbs[i-1] shr (WordBitWidth - k)) - else: - result.limbs[^1] = a.limbs[^1] shl k - for i in countdown(a.limbs.len-2, 0): - result.limbs[i] = (a.limbs[i] shl k) or (a.limbs[i+1] shr (WordBitWidth - k)) - result.clearExtraBits() - -func countOnes*(x: Stuint): int {.inline.} = +func countOnes*(a: Stuint): int {.inline.} = result = 0 - for wx in leastToMostSig(x): - result += countOnes(wx) + for wa in leastToMostSig(a): + result += countOnes(wa) -func parity*(x: Stuint): int {.inline.} = - result = parity(x.limbs[0]) - for i in 1 ..< x.limbs.len: - result = result xor parity(x.limbs[i]) +func parity*(a: Stuint): int {.inline.} = + result = parity(a.limbs[0]) + for i in 1 ..< a.limbs.len: + result = result xor parity(a.limbs[i]) -func leadingZeros*(x: Stuint): int {.inline.} = +func leadingZeros*(a: Stuint): int {.inline.} = result = 0 - for word in mostToLeastSig(x): + for word in mostToLeastSig(a): let zeroCount = word.leadingZeros() result += zeroCount if zeroCount != WordBitWidth: return -func trailingZeros*(x: Stuint): int {.inline.} = +func trailingZeros*(a: Stuint): int {.inline.} = result = 0 - for word in leastToMostSig(x): + for word in leastToMostSig(a): let zeroCount = word.leadingZeros() result += zeroCount if zeroCount != WordBitWidth: return -func firstOne*(x: Stuint): int {.inline.} = - result = trailingZeros(x) - if result == x.limbs.len * WordBitWidth: +func firstOne*(a: Stuint): int {.inline.} = + result = trailingZeros(a) + if result == a.limbs.len * WordBitWidth: result = 0 else: result += 1 -{.pop.} +func `shr`*(a: Stuint, k: SomeInteger): Stuint {.inline.} = + ## Shift right by k bits + if k < WordBitWidth: + result.limbs.shrSmall(a.limbs, k) + return + # w = k div WordBitWidth, shift = k mod WordBitWidth + let w = k shr static(log2trunc(uint32(WordBitWidth))) + let shift = k and (WordBitWidth - 1) + + if shift == 0: + result.limbs.shrWords(a.limbs, w) + else: + result.limbs.shrLarge(a.limbs, w, shift) + +func `shl`*(a: Stuint, k: SomeInteger): Stuint {.inline.} = + ## Shift left by k bits + if k < WordBitWidth: + result.limbs.shlSmall(a.limbs, k) + result.clearExtraBits() + return + # w = k div WordBitWidth, shift = k mod WordBitWidth + let w = k shr static(log2trunc(uint32(WordBitWidth))) + let shift = k and (WordBitWidth - 1) + + if shift == 0: + result.limbs.shlWords(a.limbs, w) + else: + result.limbs.shlLarge(a.limbs, w, shift) + + result.clearExtraBits() + +{.pop.} # End inline + # Addsub # -------------------------------------------------------- {.push raises: [], inline, noInit, gcsafe.}