From b83b47e541d2757d29da21189c8af7b32577e7e6 Mon Sep 17 00:00:00 2001
From: Jordan Hrycaj <jordan@status.im>
Date: Tue, 25 May 2021 18:38:39 +0100
Subject: [PATCH] LRU cache tests makeover

why:
  source-local unit tests would hardly be triggered by github CI as rightly
  criticised with the last patch.

details:
  source-local unit tests have been moved to tests folder.

  this version also contains rlp serialisation code so rlp encode/decode
  will apply tranparently. this is not needed in p2p/validate but will be
   useful with the clique protocol.
---
 nimbus/p2p/validate.nim    |   2 +
 nimbus/utils/lru_cache.nim | 237 ++++++++++++++++---------------------
 tests/all_tests.nim        |   3 +-
 tests/test_lru_cache.nim   | 173 +++++++++++++++++++++++++++
 4 files changed, 282 insertions(+), 133 deletions(-)
 create mode 100644 tests/test_lru_cache.nim

diff --git a/nimbus/p2p/validate.nim b/nimbus/p2p/validate.nim
index 79d0315e1..a11e8a436 100644
--- a/nimbus/p2p/validate.nim
+++ b/nimbus/p2p/validate.nim
@@ -52,6 +52,8 @@ type
 
   Hash512 = MDigest[512]
 
+{.push raises: [Defect,CatchableError].}
+
 # ------------------------------------------------------------------------------
 # Private Helpers
 # ------------------------------------------------------------------------------
diff --git a/nimbus/utils/lru_cache.nim b/nimbus/utils/lru_cache.nim
index f13323319..1bdb517c9 100644
--- a/nimbus/utils/lru_cache.nim
+++ b/nimbus/utils/lru_cache.nim
@@ -11,19 +11,17 @@
 ## Hash as hash can: LRU cache
 ## ===========================
 ##
-## Provide last-recently-used cache data structure. The implementation works
-## with the same complexity as the worst case of a nim hash tables operation
-## which is assumed ~O(1) in most cases (so long as the table does not degrade
-## into one-bucket linear mode, or some adjustment algorithm.)
+## This module provides a generic last-recently-used cache data structure.
+##
+## The implementation works with the same complexity as the worst case of a
+## nim hash tables operation. This is is assumed to be O(1) in most cases
+## (so long as the table does not degrade into one-bucket linear mode, or
+## some bucket-adjustment algorithm takes over.)
 ##
-
-const
-   # debugging, enable with: nim c -r -d:noisy:3 ...
-   noisy {.intdefine.}: int = 0
-   isMainOk {.used.} = noisy > 2
 
 import
   math,
+  eth/rlp,
   stew/results,
   tables
 
@@ -31,181 +29,156 @@ export
   results
 
 type
-  LruKey*[T,K] =               ## derive an LRU key from function argument
+  LruKey*[T,K] =                  ## User provided handler function, derives an
+                                  ## LRU `key` from function argument `arg`. The
+                                  ## `key` is used to index the cache data.
     proc(arg: T): K {.gcsafe, raises: [Defect,CatchableError].}
 
-  LruValue*[T,V,E] =           ## derive an LRU value from function argument
+  LruValue*[T,V,E] =              ## User provided handler function, derives an
+                                  ## LRU `value` from function argument `arg`.
     proc(arg: T): Result[V,E] {.gcsafe, raises: [Defect,CatchableError].}
 
-  LruItem[K,V] = tuple
-    prv, nxt: K                ## doubly linked items
-    value: V
+  LruItem*[K,V] =                 ## Doubly linked hash-tab item encapsulating
+                                  ## the `value` (which is the result from
+                                  ## `LruValue` handler function.
+    tuple[prv, nxt: K, value: V]
+
+  # There could be {.rlpCustomSerialization.} annotation for the tab field.
+  # As there was a problem with the automatic Rlp serialisation for generic
+  # type, the easier solution was an all manual read()/append() for the whole
+  # generic LruCacheData[K,V] type.
+  LruData[K,V] = object
+    maxItems: int                 ## Max number of entries
+    first, last: K                ## Doubly linked item list queue
+    tab: TableRef[K,LruItem[K,V]] ## (`key`,encapsulated(`value`)) data table
 
   LruCache*[T,K,V,E] = object
-    maxItems: int              ## max number of entries
-    tab: Table[K,LruItem[K,V]] ## cache data table
-    first, last: K             ## doubly linked item list queue
-    toKey: LruKey[T,K]
-    toValue: LruValue[T,V,E]
+    data*: LruData[K,V]           ## Cache data, can be serialised
+    toKey: LruKey[T,K]            ## Handler function, derives `key`
+    toValue: LruValue[T,V,E]      ## Handler function, derives `value`
 
 {.push raises: [Defect,CatchableError].}
 
+# ------------------------------------------------------------------------------
+# Private functions
+# ------------------------------------------------------------------------------
+
+proc `==`[K,V](a, b: var LruData[K,V]): bool =
+  a.maxItems == b.maxItems and
+    a.first == b.first and
+    a.last == b.last and
+    a.tab == b.tab
+
 # ------------------------------------------------------------------------------
 # Public functions
 # ------------------------------------------------------------------------------
 
+proc clearLruCache*[T,K,V,E](cache: var LruCache[T,K,V,E])
+                                          {.gcsafe, raises: [Defect].} =
+  ## Reset/clear an initialised LRU cache.
+  cache.data.first.reset
+  cache.data.last.reset
+  cache.data.tab = newTable[K,LruItem[K,V]](cache.data.maxItems.nextPowerOfTwo)
+
+
 proc initLruCache*[T,K,V,E](cache: var LruCache[T,K,V,E];
                             toKey: LruKey[T,K], toValue: LruValue[T,V,E];
-                            cacheMaxItems = 10) =
-  ## Initialise new LRU cache
-  cache.maxItems = cacheMaxItems
+                            cacheMaxItems = 10) {.gcsafe, raises: [Defect].} =
+  ## Initialise LRU cache. The handlers `toKey()` and `toValue()` are
+  ## explained at the data type definition.
+  cache.data.maxItems = cacheMaxItems
   cache.toKey = toKey
   cache.toValue = toValue
-  cache.tab = initTable[K,LruItem[K,V]](cacheMaxItems.nextPowerOfTwo)
+  cache.clearLruCache
 
 
-proc getLruItem*[T,K,V,E](cache: var LruCache[T,K,V,E]; arg: T): Result[V,E] =
-  ## Return `toValue(arg)`, preferably from result cached earlier
-  let key = cache.toKey(arg)
+proc getLruItem*[T,K,V,E](lru: var LruCache[T,K,V,E];
+                             arg: T): Result[V,E] {.gcsafe.} =
+  ## Returns `lru.toValue(arg)`, preferably from result cached earlier.
+  let key = lru.toKey(arg)
 
   # Relink item if already in the cache => move to last position
-  if cache.tab.hasKey(key):
-    let lruItem = cache.tab[key]
+  if lru.data.tab.hasKey(key):
+    let lruItem = lru.data.tab[key]
 
-    if key == cache.last:
+    if key == lru.data.last:
       # Nothing to do
       return ok(lruItem.value)
 
     # Unlink key Item
-    if key == cache.first:
-      cache.first = lruItem.nxt
+    if key == lru.data.first:
+      lru.data.first = lruItem.nxt
     else:
-      cache.tab[lruItem.prv].nxt = lruItem.nxt
-      cache.tab[lruItem.nxt].prv = lruItem.prv
+      lru.data.tab[lruItem.prv].nxt = lruItem.nxt
+      lru.data.tab[lruItem.nxt].prv = lruItem.prv
 
     # Append key item
-    cache.tab[cache.last].nxt = key
-    cache.tab[key].prv = cache.last
-    cache.last = key
+    lru.data.tab[lru.data.last].nxt = key
+    lru.data.tab[key].prv = lru.data.last
+    lru.data.last = key
     return ok(lruItem.value)
 
   # Calculate value, pass through error unless OK
-  let rcValue = ? cache.toValue(arg)
+  let rcValue = ? lru.toValue(arg)
 
   # Limit number of cached items
-  if cache.maxItems <= cache.tab.len:
+  if lru.data.maxItems <= lru.data.tab.len:
     # Delete oldest/first entry
-    var nextKey = cache.tab[cache.first].nxt
-    cache.tab.del(cache.first)
-    cache.first = nextKey
+    var nextKey = lru.data.tab[lru.data.first].nxt
+    lru.data.tab.del(lru.data.first)
+    lru.data.first = nextKey
 
   # Add cache entry
   var tabItem: LruItem[K,V]
 
   # Initialise empty queue
-  if cache.tab.len == 0:
-    cache.first = key
-    cache.last = key
+  if lru.data.tab.len == 0:
+    lru.data.first = key
+    lru.data.last = key
   else:
     # Append queue item
-    cache.tab[cache.last].nxt = key
-    tabItem.prv = cache.last
-    cache.last = key
+    lru.data.tab[lru.data.last].nxt = key
+    tabItem.prv = lru.data.last
+    lru.data.last = key
 
   tabItem.value = rcValue
-  cache.tab[key] = tabItem
+  lru.data.tab[key] = tabItem
   result = ok(rcValue)
 
-# ------------------------------------------------------------------------------
-# Debugging/testing
-# ------------------------------------------------------------------------------
 
-when isMainModule and isMainOK:
+proc `==`*[T,K,V,E](a, b: var LruCache[T,K,V,E]): bool =
+  ## Returns `true` if both argument LRU caches contain the same data
+  ## regardless of `toKey()`/`toValue()` handler functions.
+  a.data == b.data
 
-  import
-    strformat
 
-  const
-    cacheLimit = 10
-    keyList = [
-      185, 208,  53,  54, 196, 189, 187, 117,  94,  29,   6, 173, 207,  45,  31,
-      208, 127, 106, 117,  49,  40, 171,   6,  94,  84,  60, 125,  87, 168, 183,
-      200, 155,  34,  27,  67, 107, 108, 223, 249,   4, 113,   9, 205, 100,  77,
-      224,  19, 196,  14,  83, 145, 154,  95,  56, 236,  97, 115, 140, 134,  97,
-      153, 167,  23,  17, 182, 116, 253,  32, 108, 148, 135, 169, 178, 124, 147,
-      231, 236, 174, 211, 247,  22, 118, 144, 224,  68, 124, 200,  92,  63, 183,
-      56,  107,  45, 180, 113, 233,  59, 246,  29, 212, 172, 161, 183, 207, 189,
-      56,  198, 130,  62,  28,  53, 122]
+proc append*[K,V](rw: var RlpWriter; data: LruData[K,V]) {.inline.} =
+  ## Generic support for `rlp.encode(lru.data)` for serialising the data
+  ## part of an LRU cache.
+  rw.append(data.maxItems)
+  rw.append(data.first)
+  rw.append(data.last)
+  rw.startList(data.tab.len)
+  for key,value in data.tab.pairs:
+    rw.append((key, value))
 
-  var
-    getKey: LruKey[int,int] =
-      proc(x: int): int = x
+proc read*[K,V](rlp: var Rlp; Q: type LruData[K,V]): Q {.inline.} =
+  ## Generic support for `rlp.decode(bytes)` for loading the data part
+  ## of an LRU cache from a serialised data stream.
+  result.maxItems = rlp.read(int)
+  result.first = rlp.read(K)
+  result.last = rlp.read(K)
+  result.tab = newTable[K,LruItem[K,V]](result.maxItems.nextPowerOfTwo)
+  for w in rlp.items:
+    let (key,value) = w.read((K,LruItem[K,V]))
+    result.tab[key] = value
 
-    getValue: LruValue[int,string,int] =
-      proc(x: int): Result[string,int] = ok($x)
 
-    cache: LruCache[int,int,string,int]
-
-  cache.initLruCache(getKey, getValue, cacheLimit)
-
-  proc verifyLinks[T,K,V,E](cache: var LruCache[T,K,V,E]) =
-    var key = cache.first
-    if cache.tab.len == 1:
-      doAssert cache.tab.hasKey(key)
-      doAssert key == cache.last
-    elif 1 < cache.tab.len:
-      # forward links
-      for n in 1 ..< cache.tab.len:
-        var curKey = key
-        key = cache.tab[curKey].nxt
-        if cache.tab[key].prv != curKey:
-          echo &">>> ({n}): " &
-            &"cache.tab[{key}].prv == {cache.tab[key].prv} exp {curKey}"
-          doAssert cache.tab[key].prv == curKey
-      doAssert key == cache.last
-      # backward links
-      for n in 1 ..< cache.tab.len:
-        var curKey = key
-        key = cache.tab[curKey].prv
-        if cache.tab[key].nxt != curKey:
-          echo &">>> ({n}): " &
-            &"cache.tab[{key}].nxt == {cache.tab[key].nxt} exp {curKey}"
-          doAssert cache.tab[key].nxt == curKey
-      doAssert key == cache.first
-
-  proc toKeyList[T,K,V,E](cache: var LruCache[T,K,V,E]): seq[K] =
-    cache.verifyLinks
-    if 0 < cache.tab.len:
-      var key = cache.first
-      while key != cache.last:
-        result.add key
-        key = cache.tab[key].nxt
-      result.add cache.last
-
-  proc toValueList[T,K,V,E](cache: var LruCache[T,K,V,E]): seq[V] =
-    cache.verifyLinks
-    if 0 < cache.tab.len:
-      var key = cache.first
-      while key != cache.last:
-        result.add cache.tab[key].value
-        key = cache.tab[key].nxt
-      result.add cache.tab[cache.last].value
-
-  var lastQ: seq[int]
-  for w in keyList:
-    var
-      key = w mod 13
-      reSched = cache.tab.hasKey(key)
-      value = cache.getLruItem(key)
-      queue = cache.toKeyList
-      values = cache.toValueList
-    # verfy key/value pairs
-    for n in 0 ..< queue.len:
-      doAssert $queue[n] == $values[n]
-    if reSched:
-      echo &"+++ rotate {value} => {queue}"
-    else:
-      echo &"*** append {value} => {queue}"
+proc specs*[T,K,V,E](cache: var LruCache[T,K,V,E]):
+                                  (int, K, K, TableRef[K,LruItem[K,V]]) =
+  ## Returns cache data & specs `(maxItems,firstKey,lastKey,tableRef)` for
+  ## debugging and testing.
+  (cache.data.maxItems, cache.data.first, cache.data.last, cache.data.tab)
 
 # ------------------------------------------------------------------------------
 # End
diff --git a/tests/all_tests.nim b/tests/all_tests.nim
index 60e38d324..2d73a2c18 100644
--- a/tests/all_tests.nim
+++ b/tests/all_tests.nim
@@ -115,4 +115,5 @@ cliBuilder:
           ../stateless/test_block_witness,
           ../stateless/test_witness_json,
           ./test_misc,
-          ./test_graphql
+          ./test_graphql,
+          ./test_lru_cache
diff --git a/tests/test_lru_cache.nim b/tests/test_lru_cache.nim
new file mode 100644
index 000000000..87f222387
--- /dev/null
+++ b/tests/test_lru_cache.nim
@@ -0,0 +1,173 @@
+# Nimbus
+# Copyright (c) 2018-2019 Status Research & Development GmbH
+# Licensed under either of
+#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+#    http://www.apache.org/licenses/LICENSE-2.0)
+#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+#    http://opensource.org/licenses/MIT)
+# at your option. This file may not be copied, modified, or distributed except
+# according to those terms.
+
+import
+  ../nimbus/utils/lru_cache,
+  eth/rlp,
+  strformat,
+  tables,
+  unittest2
+
+const
+  cacheLimit = 10
+  keyList = [
+    185, 208,  53,  54, 196, 189, 187, 117,  94,  29,   6, 173, 207,  45,  31,
+    208, 127, 106, 117,  49,  40, 171,   6,  94,  84,  60, 125,  87, 168, 183,
+    200, 155,  34,  27,  67, 107, 108, 223, 249,   4, 113,   9, 205, 100,  77,
+    224,  19, 196,  14,  83, 145, 154,  95,  56, 236,  97, 115, 140, 134,  97,
+    153, 167,  23,  17, 182, 116, 253,  32, 108, 148, 135, 169, 178, 124, 147,
+    231, 236, 174, 211, 247,  22, 118, 144, 224,  68, 124, 200,  92,  63, 183,
+    56,  107,  45, 180, 113, 233,  59, 246,  29, 212, 172, 161, 183, 207, 189,
+    56,  198, 130,  62,  28,  53, 122]
+
+# Debugging output
+proc say(noisy = false; pfx = "***"; args: varargs[string, `$`]) =
+  if noisy:
+    var outText = pfx & " "
+    for a in args.items:
+      outText &= a
+      echo outText
+
+
+# Privy access to LRU internals
+proc maxItems[T,K,V,E](cache: var LruCache[T,K,V,E]): int =
+  cache.specs[0]
+
+proc first[T,K,V,E](cache: var LruCache[T,K,V,E]): K =
+  cache.specs[1]
+
+proc last[T,K,V,E](cache: var LruCache[T,K,V,E]): K =
+  cache.specs[2]
+
+proc tab[T,K,V,E](cache: var LruCache[T,K,V,E]): TableRef[K,LruItem[K,V]] =
+  cache.specs[3]
+
+
+proc verifyLinks[T,K,V,E](lru: var LruCache[T,K,V,E]) =
+  var key = lru.first
+  if lru.tab.len == 1:
+    doAssert lru.tab.hasKey(key)
+    doAssert key == lru.last
+  elif 1 < lru.tab.len:
+    # forward links
+    for n in 1 ..< lru.tab.len:
+      var curKey = key
+      key = lru.tab[curKey].nxt
+      if lru.tab[key].prv != curKey:
+        echo &"({n}): lru.tab[{key}].prv == {lru.tab[key].prv} exp {curKey}"
+        doAssert lru.tab[key].prv == curKey
+    doAssert key == lru.last
+    # backward links
+    for n in 1 ..< lru.tab.len:
+      var curKey = key
+      key = lru.tab[curKey].prv
+      if lru.tab[key].nxt != curKey:
+        echo &"({n}): lru.tab[{key}].nxt == {lru.tab[key].nxt} exp {curKey}"
+        doAssert lru.tab[key].nxt == curKey
+    doAssert key == lru.first
+
+proc toKeyList[T,K,V,E](lru: var LruCache[T,K,V,E]): seq[K] =
+    lru.verifyLinks
+    if 0 < lru.tab.len:
+      var key = lru.first
+      while key != lru.last:
+        result.add key
+        key = lru.tab[key].nxt
+      result.add lru.last
+
+proc toValueList[T,K,V,E](lru: var LruCache[T,K,V,E]): seq[V] =
+  lru.verifyLinks
+  if 0 < lru.tab.len:
+    var key = lru.first
+    while key != lru.last:
+      result.add lru.tab[key].value
+      key = lru.tab[key].nxt
+    result.add lru.tab[lru.last].value
+
+
+proc createTestCache: LruCache[int,int,string,int] =
+  var
+    getKey: LruKey[int,int] =
+      proc(x: int): int = x
+
+    getValue: LruValue[int,string,int] =
+      proc(x: int): Result[string,int] = ok($x)
+
+    cache: LruCache[int,int,string,int]
+
+  # Create LRU cache
+  cache.initLruCache(getKey, getValue, cacheLimit)
+
+  result = cache
+
+
+proc filledTestCache(noisy: bool): LruCache[int,int,string,int] =
+  var
+    cache = createTestCache()
+    lastQ: seq[int]
+
+  for w in keyList:
+    var
+      key = w mod 13
+      reSched = cache.tab.hasKey(key)
+      value = cache.getLruItem(key)
+      queue = cache.toKeyList
+      values = cache.toValueList
+    # verfy key/value pairs
+    for n in 0 ..< queue.len:
+      doAssert $queue[n] == $values[n]
+    if reSched:
+      noisy.say ">>>", &"rotate {value} => {queue}"
+    else:
+      noisy.say "+++", &"append {value} => {queue}"
+
+  result = cache
+
+# ---
+
+proc doFillUpTest(noisy: bool) =
+  discard filledTestCache(noisy)
+
+proc doSerialiserTest(noisy: bool) =
+
+  proc say(a: varargs[string]) =
+    say(noisy = noisy, args = a)
+
+  var
+    c1 = filledTestCache(false)
+    s1 = rlp.encode(c1.data)
+    c2 = createTestCache()
+
+  say &"serialised[{s1.len}]: {s1}"
+
+  c2.clearLruCache
+  doAssert c1 != c2
+
+  c2.data = s1.decode(type c2.data)
+  doAssert c1 == c2
+
+  say &"c2Specs: {c2.maxItems} {c2.first} {c2.last} ..."
+
+  doAssert s1 == rlp.encode(c2.data)
+
+
+proc lruCacheMain*(noisy = defined(debug)) =
+  suite "LRU Cache":
+
+    test "Fill Up":
+      doFillUpTest(noisy)
+
+    test "Rlp Serialise & Load":
+      doSerialiserTest(noisy)
+
+when isMainModule:
+  lruCacheMain()
+
+# End