nimbus-eth1/nimbus/db/aristo/aristo_hashify.nim

# nimbus-eth1
# Copyright (c) 2023 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

## Aristo DB -- Patricia Trie Merkleisation
## ========================================
##
## For the current state of the `Patricia Trie`, keys (equivalent to hashes)
## are associated with the vertex IDs. Existing key associations are checked
## (i.e. recalculated and compared) unless the ID is locked. In the latter
## case, the key is assumed to be correct without checking.
##
## The folllowing properties are required from the top layer cache.
##
## * All recently (i.e. not saved to backend) added entries must have an
##   `lTab[]` entry with `(root-vertex,path,leaf-vertex-ID)`.
##
## * All recently (i.e. not saved to backend) deleted entries must have an
##   `lTab[]` entry with `(root-vertex,path,VertexID(0))`.
##
## * All vertices where the key (aka Merkle hash) has changed must have a
##   top layer cache `kMap[]` entry `(vertex-ID,VOID_HASH_LABEL)` indicating
##   that there is no key available for this vertex. This also applies for
##   backend verices where the key has changed while the structural logic
##   did not change.
##
## The association algorithm is an optimised version of:
##
## * For all leaf vertices which have all child links on the top layer cache
##   where the node keys (aka hashes) can be compiled, proceed with the parent
##   vertex. Note that a top layer cache vertex can only have a key on the top
##   top layer cache (whereas a bachend b
##
##   Apparently, keys (aka hashes) can be compiled for leaf vertices. The same
##   holds for follow up vertices where the child keys were available, alteady.
##   This process stops when a vertex has children on the backend or children
##   lead to a chain not sorted, yet.
##
## * For the remaining vertex chains (where the process stopped) up to the root
##   vertex, set up a width-first schedule starting at the vertex where the
##   previous chain broke off and follow up to the root vertex.
##
## * Follow the width-first schedule fo labelling all vertices with a hash key.
##
## Note that there are some tweaks for `proof` nodes with incomplete tries and
## handling of possible stray vertices on the top layer cache left over from
## deletion processes.
##
{.push raises: [].}

import
  std/[algorithm, sequtils, sets, strutils, tables],
  chronicles,
  eth/common,
  results,
  stew/byteutils,
  "."/[aristo_desc, aristo_get, aristo_hike, aristo_serialise, aristo_utils,
       aristo_vid]

type
  FollowUpVid = object
    ## Link item: VertexID -> VertexID
    root: VertexID                  ## Root vertex, might be void unless known
    toVid: VertexID                 ## Valid next/follow up vertex

  BackVidTab =
    Table[VertexID,FollowUpVid]

  WidthFirstForest = object
    ## Collected width first search trees
    root: HashSet[VertexID]         ## Top level, root targets
    pool: BackVidTab                ## Upper links pool
    base: BackVidTab                ## Width-first leaf level links

  DfReport = object
    ## Depth first traversal report tracing back a hike with
    ## `leafToRootCrawler()`
    legInx: int                     ## First leg that failed to resolve
    unresolved: seq[VertexID]       ## List of unresolved links

const
  SubTreeSearchDepthMax = 64

logScope:
  topics = "aristo-hashify"

# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------

template logTxt(info: static[string]): static[string] =
  "Hashify " & info


func getOrVoid(tab: BackVidTab; vid: VertexID): FollowUpVid =
  tab.getOrDefault(vid, FollowUpVid())

func isValid(w: FollowUpVid): bool =
  w.toVid.isValid

func contains(wff: WidthFirstForest; vid: VertexID): bool =
  vid in wff.base or vid in wff.pool or vid in wff.root

# ------------------------------------------------------------------------------
# Private functions
# ------------------------------------------------------------------------------

proc updateHashKey(
    db: AristoDbRef;                   # Database, top layer
    root: VertexID;                    # Root ID
    vid: VertexID;                     # Vertex ID to check for
    expected: HashKey;                 # Hash key for vertex address by `vid`
    backend: bool;                     # Set `true` id vertex is on backend
      ): Result[void,AristoError] =
  ## Update the argument hash key `expected` for the vertex addressed by `vid`.
  ##
  # If the Merkle hash has been cached locally, already it must match.
  block:
    let key = db.top.kMap.getOrVoid(vid).key
    if key.isValid:
      if key != expected:
        let error = HashifyExistingHashMismatch
        debug logTxt "hash update failed", vid, key, expected, error
        return err(error)
      return ok()

  # If the vertex had been cached locally, there would be no locally cached
  # Merkle hash key. It will be created at the bottom end of the function.
  #
  # So there remains the case when vertex is available on the backend only.
  # The Merkle hash not cached locally. It might be overloaded (and eventually
  # overwitten.)
  if backend:
    # Ok, vertex is on the backend.
    let rc = db.getKeyBE vid
    if rc.isOk:
      if rc.value == expected:
        return ok()

      # Changes on the upper layers overload the lower layers. Some hash keys
      # on the backend will have become obsolete which is corrected here.
      #
      # Proceed `vidAttach()`, below

    elif rc.error != GetKeyNotFound:
      debug logTxt "backend key fetch failed", vid, expected, error=rc.error
      return err(rc.error)

    else:
      discard
      # Proceed `vidAttach()`, below

  # Othwise there is no Merkle hash, so create one with the `expected` key
  # and write it to the top level `pAmk[]` and `kMap[]` tables.
  db.vidAttach(HashLabel(root: root, key: expected), vid)
  ok()


proc leafToRootCrawler(
    db: AristoDbRef;                   # Database, top layer
    hike: Hike;                        # Hike for labelling leaf..root
      ): Result[DfReport,(VertexID,AristoError)] =
  ## Returns the index of the first node that could not be hashed by
  ## vertices all from the top layer cache.
  ##
  for n in (hike.legs.len-1).countDown(0):
    let
      wp = hike.legs[n].wp
      bg = hike.legs[n].backend
      node = wp.vtx.toNode(db, stopEarly=false, beKeyOk=false).valueOr:
        return ok DfReport(legInx: n, unresolved: error)

    # Vertices marked proof nodes need not be checked
    if wp.vid notin db.top.pPrf:

      # Check against existing key, or store new key
      let key = node.digestTo(HashKey)
      db.updateHashKey(hike.root, wp.vid, key, bg).isOkOr:
        return err((wp.vid,error))

  ok DfReport(legInx: -1) # all could be hashed


proc cloudConnect(
    cloud: HashSet[VertexID];          # Vertex IDs to start connecting from
    db: AristoDbRef;                   # Database, top layer
    target: BackVidTab;                # Vertices to arrive to
      ): tuple[paths: WidthFirstForest, unresolved: HashSet[VertexID]] =
  ## For each vertex ID from argument `cloud` find a chain of `FollowUpVid`
  ## type links reaching into argument `target`. The `paths` entry from the
  ## `result` tuple contains the connections to the `target` argument and the
  ## `unresolved` entries the IDs left over from `cloud`.
  if 0 < cloud.len:
    result.unresolved = cloud
    var hold = target
    while 0 < hold.len:
      # Greedily trace back `bottomUp[]` entries for finding parents of
      # unresolved vertices from `cloud`
      var redo: BackVidTab
      for (vid,val) in hold.pairs:
        let vtx = db.getVtx vid
        if vtx.isValid:
          result.paths.pool[vid] = val
          # Grab child links
          for sub in vtx.subVids:
            let w = FollowUpVid(
              root:  val.root,
              toVid: vid)
            if sub notin cloud:
              redo[sub] = w
            else:
              result.paths.base[sub] = w # ok, use this
              result.unresolved.excl sub
              if result.unresolved.len == 0:
                return
      redo.swap hold


proc updateWFF(
    wff: var WidthFirstForest;         # Search tree to update
    hike: Hike;                        # Chain of vertices
    ltr: DfReport;                     # Index and extra vertex IDs for `hike`
      ) =
  ## Use vertices from the `hike` argument and link them leaf-to-root in a way
  ## so so that they can be traversed later in a width-first search.
  ##
  ## The `ltr` argument augments the `hike` path in that it defines a set of
  ## extra vertices where the width-first search is supposed to start.
  ##
  ##                   ..unresolved hash keys | all set here..
  ##                                          |
  ## hike.legs: (leg[0], leg[1], ..leg[legInx], ..)
  ##               |       |         |
  ##               | <---- |  <----- |
  ##               |                 |
  ##               |   wff.pool[]    |
  ##
  ## and the set `unresolved{} × leg[legInx]` will be registered in `base[]`.
  ##
  # Root target to reach via width-first search
  wff.root.incl hike.root

  # Add unresolved nodes for top level links
  for u in 1 .. ltr.legInx:
    let vid = hike.legs[u].wp.vid
    # Make sure that `base[]` and `pool[]` are disjunkt, possibly moving
    # `base[]` entries to the `pool[]`.
    wff.base.del vid
    wff.pool[vid] = FollowUpVid(
      root:  hike.root,
      toVid: hike.legs[u-1].wp.vid)

  # These ones have been resolved, already
  for u in ltr.legInx+1 ..< hike.legs.len:
    let vid = hike.legs[u].wp.vid
    wff.pool.del vid
    wff.base.del vid

  assert 0 < ltr.unresolved.len # debugging, only
  let vid = hike.legs[ltr.legInx].wp.vid
  for sub in ltr.unresolved:
    # Update request for unresolved sub-links by adding a new tail
    # entry (unless registered, already.)
    if sub notin wff:
      wff.base[sub] = FollowUpVid(
        root:  hike.root,
        toVid: vid)

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc hashify*(
    db: AristoDbRef;                   # Database, top layer
      ): Result[HashSet[VertexID],(VertexID,AristoError)] =
  ## Add keys to the  `Patricia Trie` so that it becomes a `Merkle Patricia
  ## Tree`. If successful, the function returns the keys (aka Merkle hash) of
  ## the root vertices.
  var
    deleted = false                    # Need extra check for orphaned vertices
    completed: HashSet[VertexID]       # Root targets reached, already
    wff: WidthFirstForest              # Leaf-to-root traversal structure

  if not db.top.dirty:
    return ok completed

  for (lky,lfVid) in db.top.lTab.pairs:
    let
      rc = lky.hikeUp db
      hike = rc.to(Hike)

    if not lfVid.isValid:
      # Remember that there are left overs from a delete proedure which have
      # to be eventually found before starting width-first processing.
      deleted = true

    if hike.legs.len == 0:
      # Ignore left over path from deleted entry.
      if not lfVid.isValid:
        # FIXME: Is there a case for adding child-to-root links to the `wff`
        #        schedule?
        continue
      if rc.isErr:
        return err((lfVid,rc.error[1]))
      return err((hike.root,HashifyEmptyHike))

    # Hash as much of as possible from `hike` starting at the downmost `leg`
    let ltr = ? db.leafToRootCrawler hike

    if ltr.legInx < 0:
      completed.incl hike.root
    else:
      # Not all could be hashed, merge the rest into `wff` width-first schedule
      wff.updateWFF(hike, ltr)

  # Update unresolved keys left over after delete operations when overlay
  # vertices have been added and there was no `hike` path to capture them.
  #
  # Considering a list of updated paths to these vertices after deleting a
  # `Leaf` vertex is deemed too expensive and more error prone. So it is
  # the task to search for unresolved node keys and add glue paths them to
  # the depth-first schedule.
  if deleted:
    var unresolved: HashSet[VertexID]
    for (vid,lbl) in db.top.kMap.pairs:
      if not lbl.isValid and
         vid notin wff and
         (vid notin db.top.sTab or db.top.sTab.getOrVoid(vid).isValid):
        unresolved.incl vid

    let glue = unresolved.cloudConnect(db, wff.base)
    if 0 < glue.unresolved.len:
      return err((glue.unresolved.toSeq[0],HashifyNodeUnresolved))

    # Add glue items to `wff.base[]` and `wff.pool[]` tables
    for (vid,val) in glue.paths.base.pairs:
      # Add vid to `wff.base[]` list
      wff.base[vid] = val
      # Move tail of VertexID chain to `wff.pool[]`
      var toVid = val.toVid
      while true:
        let w = glue.paths.pool.getOrVoid toVid
        if not w.isValid:
          break
        wff.base.del toVid
        wff.pool[toVid] = w
        toVid = w.toVid

  # Traverse width-first schedule and update remaining hashes.
  while 0 < wff.base.len:
    var redo: BackVidTab
    for (vid,val) in wff.base.pairs:
      block thisVtx:
        let vtx = db.getVtx vid
        # Try to convert the vertex to a node. This is possible only if all
        # link references have Merkle hash keys, already.
        if not vtx.isValid:
          # This might happen when proof nodes (see `snap` protocol) are on
          # an incomplete trie where this `vid` has a key but no vertex yet.
          # Also, the key (as part of the proof data) must be on the backend
          # by the way `leafToRootCrawler()` works. So it is enough to verify
          # the key there.
          discard db.getKeyBE(vid).valueOr:
            return err((vid,HashifyNodeUnresolved))
          break thisVtx

        # Try to resolve the current vertex as node
        let node = vtx.toNode(db).valueOr:
          # Cannot complete with this vertex unless updated, so do it later.
          redo[vid] = val
          break thisVtx
        # End block `thisVtx`

        # Could resolve => update Merkle hash
        let key = node.digestTo(HashKey)
        db.vidAttach(HashLabel(root: val.root, key: key), vid)

      # Proceed with back link
      let nextVal = wff.pool.getOrVoid val.toVid
      if nextVal.isValid:
        # Make sure that we we keep strict hierachial order
        if nextVal.toVid in redo:
          # Push back from `redo[]` to be considered later
          wff.pool[nextVal.toVid] = redo.getOrVoid nextVal.toVid
          redo.del nextVal.toVid
          # And move the next one to `redo[]`
          wff.pool.del val.toVid
          redo[val.toVid] = nextVal
        elif val.toVid notin redo.values.toSeq.mapIt(it.toVid):
          wff.pool.del val.toVid
          redo[val.toVid] = nextVal

    # Restart `wff.base[]`
    wff.base.swap redo

  # Update root nodes
  for vid in wff.root - db.top.pPrf:
    # Convert root vertex to a node.
    let node = db.getVtx(vid).toNode(db,stopEarly=false).valueOr:
      return err((vid,HashifyRootNodeUnresolved))
    db.vidAttach(HashLabel(root: vid, key: node.digestTo(HashKey)), vid)
    completed.incl vid

  db.top.dirty = false
  ok completed

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								# nimbus-eth1
-												Aristo and ledger small updates (#1888)

* Fix debug noise in `hashify()` for perfectly normal situation

why:
  Was previously considered a fixable error

* Fix test sample file names

why:
  The larger test file `goerli68161.txt.gz` is already in the local
  archive. So there is no need to use the smaller one from the external
  repo.

* Activate `accounts_cache` module from `db/ledger`

why:
  A copy of the original `accounts_cache.nim` source to be integrated
  into the `Ledger` module wrapper which allows to switch between
  different `accounts_cache` implementations unser tha same API.

details:
  At a later state, the `db/accounts_cache.nim` wrapper will be
  removed so that there is only one access to that module via
  `db/ledger/accounts_cache.nim`.

* Fix copyright headers in source code
											
										
										
											2023-11-08 16:52:25 +00:00
+								# Copyright (c) 2023 Status Research & Development GmbH
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								# Licensed under either of
 								#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
 								#    http://www.apache.org/licenses/LICENSE-2.0)
 								#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
 								#    http://opensource.org/licenses/MIT)
 								# at your option. This file may not be copied, modified, or distributed
 								# except according to those terms.
 								## Aristo DB -- Patricia Trie Merkleisation
 								## ========================================
 								##
 								## For the current state of the `Patricia Trie`, keys (equivalent to hashes)
 								## are associated with the vertex IDs. Existing key associations are checked
 								## (i.e. recalculated and compared) unless the ID is locked. In the latter
 								## case, the key is assumed to be correct without checking.
 								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								## The folllowing properties are required from the top layer cache.
 								##
 								## * All recently (i.e. not saved to backend) added entries must have an
 								##   `lTab[]` entry with `(root-vertex,path,leaf-vertex-ID)`.
 								##
 								## * All recently (i.e. not saved to backend) deleted entries must have an
 								##   `lTab[]` entry with `(root-vertex,path,VertexID(0))`.
 								##
 								## * All vertices where the key (aka Merkle hash) has changed must have a
 								##   top layer cache `kMap[]` entry `(vertex-ID,VOID_HASH_LABEL)` indicating
 								##   that there is no key available for this vertex. This also applies for
 								##   backend verices where the key has changed while the structural logic
 								##   did not change.
 								##
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								## The association algorithm is an optimised version of:
 								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								## * For all leaf vertices which have all child links on the top layer cache
 								##   where the node keys (aka hashes) can be compiled, proceed with the parent
 								##   vertex. Note that a top layer cache vertex can only have a key on the top
 								##   top layer cache (whereas a bachend b
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								##   Apparently, keys (aka hashes) can be compiled for leaf vertices. The same
 								##   holds for follow up vertices where the child keys were available, alteady.
 								##   This process stops when a vertex has children on the backend or children
 								##   lead to a chain not sorted, yet.
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								## * For the remaining vertex chains (where the process stopped) up to the root
 								##   vertex, set up a width-first schedule starting at the vertex where the
 								##   previous chain broke off and follow up to the root vertex.
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								## * Follow the width-first schedule fo labelling all vertices with a hash key.
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								##
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								## Note that there are some tweaks for `proof` nodes with incomplete tries and
 								## handling of possible stray vertices on the top layer cache left over from
 								## deletion processes.
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								##
 								{.push raises: [].}
 								import
-												Aristo db update delete functionality (#1621)

* Fix missing branch checks in transcoder

why:
  Symmetry problem. `Blobify()` allowed for encoding degenerate branch
  vertices while `Deblobify()` rejected decoding wrongly encoded data.

* Update memory backend so that it rejects storing bogus vertices.

why:
  Error behaviour made similar to the rocks DB backend.

* Make sure that leaf vertex IDs are not repurposed

why:
  This makes it easier to record leaf node changes

* Update error return code for next()/right() traversal

why:
  Returning offending vertex ID (besides error code) helps debugging

* Update Merkle hasher for deleted nodes

why:
  Not implemented, yet

also:
  Provide cache & backend consistency check functions. This was
  partly re-implemented from `hashifyCheck()`

* Simplify some unit tests

* Fix delete function

why:
  Was conceptually wrong
											
										
										
											2023-06-30 23:22:33 +01:00
+								  std/[algorithm, sequtils, sets, strutils, tables],
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								  chronicles,
 								  eth/common,
-												Aristo db api extensions for use as core db backend (#1754)

* Update docu

* Update Aristo/Kvt constructor prototype

why:
  Previous version used an `enum` value to indicate what backend is to
  be used. This was replaced by using the backend object type.

* Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]`

why:
  Better code maintenance. Previously, the `Hike` object was returned. It
  had an internal error field so partial success was also available on
  a failure. This error field has been removed.

* Use `openArray[byte]` rather than `Blob` in functions prototypes

* Provide synchronised multi instance transactions

why:
  The `CoreDB` object was geared towards the legacy DB which used a single
  transaction for the key-value backend DB. Different state roots are
  provided by the backend database, so all instances work directly on the
  same backend.

  Aristo db instances have different in-memory mappings (aka different
  state roots) and the transactions are on top of there mappings. So each
  instance might run different transactions.

  Multi instance transactions are a compromise to converge towards the
  legacy behaviour. The synchronised transactions span over all instances
  available at the time when base transaction was opened. Instances
  created later are unaffected.

* Provide key-value pair database iterator

why:
  Needed in `CoreDB` for `replicate()` emulation

also:
  Some update of internal code

* Extend API (i.e. prototype variants)

why:
  Needed for `CoreDB` geared towards the legacy backend which has a more
  basic API than Aristo.
											
										
										
											2023-09-15 16:23:53 +01:00
+								  results,
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  stew/byteutils,
-												Aristo db update for short nodes key edge cases (#1887)

* Aristo: Provide key-value list signature calculator

detail:
  Simple wrappers around `Aristo` core functionality

* Update new API for `CoreDb`

details:
+ Renamed new API functions `contains()` => `hasKey()` or `hasPath()`
  which disables the `in` operator on non-boolean 	`contains()` functions
+ The functions `get()` and `fetch()` always return a not-found error if
  there is no item, available. The new functions `getOrEmpty()` and
  `mergeOrEmpty()` return an an empty `Blob` if there is no such key
  found.

* Rewrite `core_apps.nim` using new API from `CoreDb`

* Use `Aristo` functionality for calculating Merkle signatures

details:
  For debugging, the `VerifyAristoForMerkleRootCalc` can be set so
  that `Aristo` results will be verified against the legacy versions.

* Provide general interface for Merkle signing key-value tables

details:
  Export `Aristo` wrappers

* Activate `CoreDb` tests

why:
  Now, API seems to be stable enough for general tests.

* Update `toHex()` usage

why:
  Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join`

* Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify`

why:
+ Different modules for different purposes
+ `aristo_serialise`: RLP encoding/decoding
+ `aristo_blobify`: Aristo database encoding/decoding

* Compacted representation of small nodes' links instead of Keccak hashes

why:
  Ethereum MPTs use Keccak hashes as node links if the size of an RLP
  encoded node is at least 32 bytes. Otherwise, the RLP encoded node
  value is used as a pseudo node link (rather than a hash.) Such a node
  is nor stored on key-value database. Rather the RLP encoded node value
  is stored instead of a lode link in a parent node instead. Only for
  the root hash, the top level node is always referred to by the hash.

  This feature needed an abstraction of the `HashKey` object which is now
  either a hash or a blob of length at most 31 bytes. This leaves two
  ways of representing an empty/void `HashKey` type, either as an empty
  blob of zero length, or the hash of an empty blob.

* Update `CoreDb` interface (mainly reducing logger noise)

* Fix copyright years (to make `Lint` happy)
											
										
										
											2023-11-08 12:18:32 +00:00
+								  "."/[aristo_desc, aristo_get, aristo_hike, aristo_serialise, aristo_utils,
-												Aristo db traversal helpers (#1638)

* Misc fixes

detail:
* Fix de-serialisation for account leafs
* Update node recovery from unit tests

* Remove `LegacyAccount` from `PayloadRef` object

why:
  Legacy accounts use a hash key as storage root which is detrimental
  to the working of the Aristo database which uses a vertex ID.

* Dissolve `hashify_helper` into `aristo_utils` and `aristo_transcode`

why:
  Functions are of general interest so they should live in first level
  code files.

* Added left/right iterators over leaf nodes

* Some helper/wrapper functions that might be useful
											
										
										
											2023-07-13 00:03:14 +01:00
+								       aristo_vid]
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								type
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  FollowUpVid = object
 								    ## Link item: VertexID -> VertexID
 								    root: VertexID                  ## Root vertex, might be void unless known
 								    toVid: VertexID                 ## Valid next/follow up vertex
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
 								  BackVidTab =
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    Table[VertexID,FollowUpVid]
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  WidthFirstForest = object
 								    ## Collected width first search trees
 								    root: HashSet[VertexID]         ## Top level, root targets
 								    pool: BackVidTab                ## Upper links pool
 								    base: BackVidTab                ## Width-first leaf level links
-												Aristo db implement distributed backend access (#1688)

* Fix hashing algorithm

why:
  Particular case where a sub-tree is on the backend, linked by an
  Extension vertex to the top level.

* Update backend verification to report `dirty` top layer

* Implement distributed merge of backend filters

* Implement distributed backend access management

details:
  Implemented and tested as described in chapter 5 of the `README.md`
  file.
											
										
										
											2023-08-17 14:42:01 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  DfReport = object
 								    ## Depth first traversal report tracing back a hike with
 								    ## `leafToRootCrawler()`
 								    legInx: int                     ## First leg that failed to resolve
 								    unresolved: seq[VertexID]       ## List of unresolved links
-												Aristo db implement distributed backend access (#1688)

* Fix hashing algorithm

why:
  Particular case where a sub-tree is on the backend, linked by an
  Extension vertex to the top level.

* Update backend verification to report `dirty` top layer

* Implement distributed merge of backend filters

* Implement distributed backend access management

details:
  Implemented and tested as described in chapter 5 of the `README.md`
  file.
											
										
										
											2023-08-17 14:42:01 +01:00
 								const
 								  SubTreeSearchDepthMax = 64
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								logScope:
 								  topics = "aristo-hashify"
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								# ------------------------------------------------------------------------------
 								# Private helpers
 								# ------------------------------------------------------------------------------
 								template logTxt(info: static[string]): static[string] =
 								  "Hashify " & info
-												Aristo db api extensions for use as core db backend (#1754)

* Update docu

* Update Aristo/Kvt constructor prototype

why:
  Previous version used an `enum` value to indicate what backend is to
  be used. This was replaced by using the backend object type.

* Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]`

why:
  Better code maintenance. Previously, the `Hike` object was returned. It
  had an internal error field so partial success was also available on
  a failure. This error field has been removed.

* Use `openArray[byte]` rather than `Blob` in functions prototypes

* Provide synchronised multi instance transactions

why:
  The `CoreDB` object was geared towards the legacy DB which used a single
  transaction for the key-value backend DB. Different state roots are
  provided by the backend database, so all instances work directly on the
  same backend.

  Aristo db instances have different in-memory mappings (aka different
  state roots) and the transactions are on top of there mappings. So each
  instance might run different transactions.

  Multi instance transactions are a compromise to converge towards the
  legacy behaviour. The synchronised transactions span over all instances
  available at the time when base transaction was opened. Instances
  created later are unaffected.

* Provide key-value pair database iterator

why:
  Needed in `CoreDB` for `replicate()` emulation

also:
  Some update of internal code

* Extend API (i.e. prototype variants)

why:
  Needed for `CoreDB` geared towards the legacy backend which has a more
  basic API than Aristo.
											
										
										
											2023-09-15 16:23:53 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								func getOrVoid(tab: BackVidTab; vid: VertexID): FollowUpVid =
 								  tab.getOrDefault(vid, FollowUpVid())
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								func isValid(w: FollowUpVid): bool =
 								  w.toVid.isValid
 								func contains(wff: WidthFirstForest; vid: VertexID): bool =
 								  vid in wff.base or vid in wff.pool or vid in wff.root
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								# ------------------------------------------------------------------------------
 								# Private functions
 								# ------------------------------------------------------------------------------
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								proc updateHashKey(
-												Aristo db tidy up a bit (#1625)

* Slightly tighten some self-check conditions

* Redefined the database descriptor object as reference (to the object)

why:
  The upcoming transaction wrapper will work with a database reference
  rather than the object itself

* Append state before `save()` to the Aristo descriptor

why:
  This stae was previously returned by the function. Appending it to
  a field of the Aristo descriptor seems easier to handle.
											
										
										
											2023-07-04 19:24:03 +01:00
+								    db: AristoDbRef;                   # Database, top layer
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								    root: VertexID;                    # Root ID
 								    vid: VertexID;                     # Vertex ID to check for
 								    expected: HashKey;                 # Hash key for vertex address by `vid`
 								    backend: bool;                     # Set `true` id vertex is on backend
 								      ): Result[void,AristoError] =
 								  ## Update the argument hash key `expected` for the vertex addressed by `vid`.
 								  ##
 								  # If the Merkle hash has been cached locally, already it must match.
 								  block:
 								    let key = db.top.kMap.getOrVoid(vid).key
 								    if key.isValid:
 								      if key != expected:
 								        let error = HashifyExistingHashMismatch
 								        debug logTxt "hash update failed", vid, key, expected, error
 								        return err(error)
 								      return ok()
 								  # If the vertex had been cached locally, there would be no locally cached
 								  # Merkle hash key. It will be created at the bottom end of the function.
 								  #
-												Aristo and ledger small updates (#1888)

* Fix debug noise in `hashify()` for perfectly normal situation

why:
  Was previously considered a fixable error

* Fix test sample file names

why:
  The larger test file `goerli68161.txt.gz` is already in the local
  archive. So there is no need to use the smaller one from the external
  repo.

* Activate `accounts_cache` module from `db/ledger`

why:
  A copy of the original `accounts_cache.nim` source to be integrated
  into the `Ledger` module wrapper which allows to switch between
  different `accounts_cache` implementations unser tha same API.

details:
  At a later state, the `db/accounts_cache.nim` wrapper will be
  removed so that there is only one access to that module via
  `db/ledger/accounts_cache.nim`.

* Fix copyright headers in source code
											
										
										
											2023-11-08 16:52:25 +00:00
+								  # So there remains the case when vertex is available on the backend only.
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								  # The Merkle hash not cached locally. It might be overloaded (and eventually
 								  # overwitten.)
 								  if backend:
 								    # Ok, vertex is on the backend.
-												Aristo db integrate hashify into tx (#1679)

* Renamed type `NoneBackendRef` => `VoidBackendRef`

* Clarify names: `BE=filter+backend` and `UBE=backend (unfiltered)`

why:
  Most functions used full names as `getVtxUnfilteredBackend()` or
  `getKeyBackend()`. After defining abbreviations (and its meaning) it
   seems easier to use `getVtxUBE()` and `getKeyBE()`.

* Integrate `hashify()` process into transaction logic

why:
  Is now transparent unless explicitly controlled.

details:
  Cache changes imply setting a `dirty` flag which in turn triggers
  `hashify()` processing in transaction and `pack()` directives.

* Removed `aristo_tx.exec()` directive

why:
  Inconsistent implementation, functionality will be provided with a
  different paradigm.
											
										
										
											2023-08-11 18:23:57 +01:00
+								    let rc = db.getKeyBE vid
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								    if rc.isOk:
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								      if rc.value == expected:
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								        return ok()
-												Aristo and ledger small updates (#1888)

* Fix debug noise in `hashify()` for perfectly normal situation

why:
  Was previously considered a fixable error

* Fix test sample file names

why:
  The larger test file `goerli68161.txt.gz` is already in the local
  archive. So there is no need to use the smaller one from the external
  repo.

* Activate `accounts_cache` module from `db/ledger`

why:
  A copy of the original `accounts_cache.nim` source to be integrated
  into the `Ledger` module wrapper which allows to switch between
  different `accounts_cache` implementations unser tha same API.

details:
  At a later state, the `db/accounts_cache.nim` wrapper will be
  removed so that there is only one access to that module via
  `db/ledger/accounts_cache.nim`.

* Fix copyright headers in source code
											
										
										
											2023-11-08 16:52:25 +00:00
+								      # Changes on the upper layers overload the lower layers. Some hash keys
 								      # on the backend will have become obsolete which is corrected here.
 								      #
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								      # Proceed `vidAttach()`, below
 								    elif rc.error != GetKeyNotFound:
 								      debug logTxt "backend key fetch failed", vid, expected, error=rc.error
 								      return err(rc.error)
 								    else:
 								      discard
 								      # Proceed `vidAttach()`, below
 								  # Othwise there is no Merkle hash, so create one with the `expected` key
-												Aristo db update for short nodes key edge cases (#1887)

* Aristo: Provide key-value list signature calculator

detail:
  Simple wrappers around `Aristo` core functionality

* Update new API for `CoreDb`

details:
+ Renamed new API functions `contains()` => `hasKey()` or `hasPath()`
  which disables the `in` operator on non-boolean 	`contains()` functions
+ The functions `get()` and `fetch()` always return a not-found error if
  there is no item, available. The new functions `getOrEmpty()` and
  `mergeOrEmpty()` return an an empty `Blob` if there is no such key
  found.

* Rewrite `core_apps.nim` using new API from `CoreDb`

* Use `Aristo` functionality for calculating Merkle signatures

details:
  For debugging, the `VerifyAristoForMerkleRootCalc` can be set so
  that `Aristo` results will be verified against the legacy versions.

* Provide general interface for Merkle signing key-value tables

details:
  Export `Aristo` wrappers

* Activate `CoreDb` tests

why:
  Now, API seems to be stable enough for general tests.

* Update `toHex()` usage

why:
  Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join`

* Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify`

why:
+ Different modules for different purposes
+ `aristo_serialise`: RLP encoding/decoding
+ `aristo_blobify`: Aristo database encoding/decoding

* Compacted representation of small nodes' links instead of Keccak hashes

why:
  Ethereum MPTs use Keccak hashes as node links if the size of an RLP
  encoded node is at least 32 bytes. Otherwise, the RLP encoded node
  value is used as a pseudo node link (rather than a hash.) Such a node
  is nor stored on key-value database. Rather the RLP encoded node value
  is stored instead of a lode link in a parent node instead. Only for
  the root hash, the top level node is always referred to by the hash.

  This feature needed an abstraction of the `HashKey` object which is now
  either a hash or a blob of length at most 31 bytes. This leaves two
  ways of representing an empty/void `HashKey` type, either as an empty
  blob of zero length, or the hash of an empty blob.

* Update `CoreDb` interface (mainly reducing logger noise)

* Fix copyright years (to make `Lint` happy)
											
										
										
											2023-11-08 12:18:32 +00:00
+								  # and write it to the top level `pAmk[]` and `kMap[]` tables.
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								  db.vidAttach(HashLabel(root: root, key: expected), vid)
 								  ok()
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								proc leafToRootCrawler(
-												Aristo db tidy up a bit (#1625)

* Slightly tighten some self-check conditions

* Redefined the database descriptor object as reference (to the object)

why:
  The upcoming transaction wrapper will work with a database reference
  rather than the object itself

* Append state before `save()` to the Aristo descriptor

why:
  This stae was previously returned by the function. Appending it to
  a field of the Aristo descriptor seems easier to handle.
											
										
										
											2023-07-04 19:24:03 +01:00
+								    db: AristoDbRef;                   # Database, top layer
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								    hike: Hike;                        # Hike for labelling leaf..root
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								      ): Result[DfReport,(VertexID,AristoError)] =
 								  ## Returns the index of the first node that could not be hashed by
 								  ## vertices all from the top layer cache.
 								  ##
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								  for n in (hike.legs.len-1).countDown(0):
 								    let
 								      wp = hike.legs[n].wp
-												Aristo db with storage backends (#1603)

* Generalised Aristo DB constructor for any type of backend

details:
  * Records to be deleted are represented as key-void (rather than
    key-value) pairs by the put-function arguments
  * Allow direct driver access, iterators as example implementation and
    for testing.

* Provide backend storage interface

details:
  Stores the top layer onto backend tables

* Implemented Rocks DB backend

details:
  Transaction based `put()` functionality
  Iterators (based on direct RocksDB access)
											
										
										
											2023-06-20 14:26:25 +01:00
+								      bg = hike.legs[n].backend
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								      node = wp.vtx.toNode(db, stopEarly=false, beKeyOk=false).valueOr:
 								        return ok DfReport(legInx: n, unresolved: error)
-												Aristo db supporting forest and layered tx architecture (#1598)

* Exclude some storage tests

why:
  These test running on external dumps slipped through. The particular
  dumps were reported earlier as somehow dodgy.

  This was changed in `#1457` but having a second look, the change on
  hexary_interpolate.nim(350) might be incorrect.

* Redesign `Aristo DB` descriptor for transaction based layers

why:
  Previous descriptor layout made it cumbersome to push/pop
  database delta layers.

  The new architecture keeps each layer with the full delta set
  relative to the database backend.

* Keep root ID as part of the `Patricia Trie` leaf path

why;
  That way, forests are supported
											
										
										
											2023-06-09 12:17:37 +01:00
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								    # Vertices marked proof nodes need not be checked
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    if wp.vid notin db.top.pPrf:
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								      # Check against existing key, or store new key
 								      let key = node.digestTo(HashKey)
 								      db.updateHashKey(hike.root, wp.vid, key, bg).isOkOr:
 								        return err((wp.vid,error))
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  ok DfReport(legInx: -1) # all could be hashed
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db update delete functionality (#1621)

* Fix missing branch checks in transcoder

why:
  Symmetry problem. `Blobify()` allowed for encoding degenerate branch
  vertices while `Deblobify()` rejected decoding wrongly encoded data.

* Update memory backend so that it rejects storing bogus vertices.

why:
  Error behaviour made similar to the rocks DB backend.

* Make sure that leaf vertex IDs are not repurposed

why:
  This makes it easier to record leaf node changes

* Update error return code for next()/right() traversal

why:
  Returning offending vertex ID (besides error code) helps debugging

* Update Merkle hasher for deleted nodes

why:
  Not implemented, yet

also:
  Provide cache & backend consistency check functions. This was
  partly re-implemented from `hashifyCheck()`

* Simplify some unit tests

* Fix delete function

why:
  Was conceptually wrong
											
										
										
											2023-06-30 23:22:33 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								proc cloudConnect(
 								    cloud: HashSet[VertexID];          # Vertex IDs to start connecting from
-												Aristo db tidy up a bit (#1625)

* Slightly tighten some self-check conditions

* Redefined the database descriptor object as reference (to the object)

why:
  The upcoming transaction wrapper will work with a database reference
  rather than the object itself

* Append state before `save()` to the Aristo descriptor

why:
  This stae was previously returned by the function. Appending it to
  a field of the Aristo descriptor seems easier to handle.
											
										
										
											2023-07-04 19:24:03 +01:00
+								    db: AristoDbRef;                   # Database, top layer
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    target: BackVidTab;                # Vertices to arrive to
 								      ): tuple[paths: WidthFirstForest, unresolved: HashSet[VertexID]] =
 								  ## For each vertex ID from argument `cloud` find a chain of `FollowUpVid`
 								  ## type links reaching into argument `target`. The `paths` entry from the
 								  ## `result` tuple contains the connections to the `target` argument and the
 								  ## `unresolved` entries the IDs left over from `cloud`.
 								  if 0 < cloud.len:
 								    result.unresolved = cloud
 								    var hold = target
 								    while 0 < hold.len:
 								      # Greedily trace back `bottomUp[]` entries for finding parents of
 								      # unresolved vertices from `cloud`
 								      var redo: BackVidTab
 								      for (vid,val) in hold.pairs:
 								        let vtx = db.getVtx vid
 								        if vtx.isValid:
 								          result.paths.pool[vid] = val
 								          # Grab child links
 								          for sub in vtx.subVids:
 								            let w = FollowUpVid(
 								              root:  val.root,
 								              toVid: vid)
 								            if sub notin cloud:
 								              redo[sub] = w
 								            else:
 								              result.paths.base[sub] = w # ok, use this
 								              result.unresolved.excl sub
 								              if result.unresolved.len == 0:
 								                return
 								      redo.swap hold
 								proc updateWFF(
 								    wff: var WidthFirstForest;         # Search tree to update
 								    hike: Hike;                        # Chain of vertices
 								    ltr: DfReport;                     # Index and extra vertex IDs for `hike`
 								      ) =
 								  ## Use vertices from the `hike` argument and link them leaf-to-root in a way
 								  ## so so that they can be traversed later in a width-first search.
 								  ##
 								  ## The `ltr` argument augments the `hike` path in that it defines a set of
 								  ## extra vertices where the width-first search is supposed to start.
 								  ##
 								  ##                   ..unresolved hash keys | all set here..
 								  ##                                          |
 								  ## hike.legs: (leg[0], leg[1], ..leg[legInx], ..)
 								  ##               |       |         |
 								  ##               | <---- |  <----- |
 								  ##               |                 |
 								  ##               |   wff.pool[]    |
 								  ##
 								  ## and the set `unresolved{} × leg[legInx]` will be registered in `base[]`.
 								  ##
 								  # Root target to reach via width-first search
 								  wff.root.incl hike.root
 								  # Add unresolved nodes for top level links
 								  for u in 1 .. ltr.legInx:
 								    let vid = hike.legs[u].wp.vid
 								    # Make sure that `base[]` and `pool[]` are disjunkt, possibly moving
 								    # `base[]` entries to the `pool[]`.
 								    wff.base.del vid
 								    wff.pool[vid] = FollowUpVid(
 								      root:  hike.root,
 								      toVid: hike.legs[u-1].wp.vid)
 								  # These ones have been resolved, already
 								  for u in ltr.legInx+1 ..< hike.legs.len:
 								    let vid = hike.legs[u].wp.vid
 								    wff.pool.del vid
 								    wff.base.del vid
 								  assert 0 < ltr.unresolved.len # debugging, only
 								  let vid = hike.legs[ltr.legInx].wp.vid
 								  for sub in ltr.unresolved:
 								    # Update request for unresolved sub-links by adding a new tail
 								    # entry (unless registered, already.)
 								    if sub notin wff:
 								      wff.base[sub] = FollowUpVid(
 								        root:  hike.root,
 								        toVid: vid)
-												Aristo db implement distributed backend access (#1688)

* Fix hashing algorithm

why:
  Particular case where a sub-tree is on the backend, linked by an
  Extension vertex to the top level.

* Update backend verification to report `dirty` top layer

* Implement distributed merge of backend filters

* Implement distributed backend access management

details:
  Implemented and tested as described in chapter 5 of the `README.md`
  file.
											
										
										
											2023-08-17 14:42:01 +01:00
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								# ------------------------------------------------------------------------------
 								# Public functions
 								# ------------------------------------------------------------------------------
 								proc hashify*(
-												Aristo db tidy up a bit (#1625)

* Slightly tighten some self-check conditions

* Redefined the database descriptor object as reference (to the object)

why:
  The upcoming transaction wrapper will work with a database reference
  rather than the object itself

* Append state before `save()` to the Aristo descriptor

why:
  This stae was previously returned by the function. Appending it to
  a field of the Aristo descriptor seems easier to handle.
											
										
										
											2023-07-04 19:24:03 +01:00
+								    db: AristoDbRef;                   # Database, top layer
-												Aristo db supporting forest and layered tx architecture (#1598)

* Exclude some storage tests

why:
  These test running on external dumps slipped through. The particular
  dumps were reported earlier as somehow dodgy.

  This was changed in `#1457` but having a second look, the change on
  hexary_interpolate.nim(350) might be incorrect.

* Redesign `Aristo DB` descriptor for transaction based layers

why:
  Previous descriptor layout made it cumbersome to push/pop
  database delta layers.

  The new architecture keeps each layer with the full delta set
  relative to the database backend.

* Keep root ID as part of the `Patricia Trie` leaf path

why;
  That way, forests are supported
											
										
										
											2023-06-09 12:17:37 +01:00
+								      ): Result[HashSet[VertexID],(VertexID,AristoError)] =
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								  ## Add keys to the  `Patricia Trie` so that it becomes a `Merkle Patricia
-												Aristo db traversal helpers (#1638)

* Misc fixes

detail:
* Fix de-serialisation for account leafs
* Update node recovery from unit tests

* Remove `LegacyAccount` from `PayloadRef` object

why:
  Legacy accounts use a hash key as storage root which is detrimental
  to the working of the Aristo database which uses a vertex ID.

* Dissolve `hashify_helper` into `aristo_utils` and `aristo_transcode`

why:
  Functions are of general interest so they should live in first level
  code files.

* Added left/right iterators over leaf nodes

* Some helper/wrapper functions that might be useful
											
										
										
											2023-07-13 00:03:14 +01:00
+								  ## Tree`. If successful, the function returns the keys (aka Merkle hash) of
 								  ## the root vertices.
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
+								  var
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    deleted = false                    # Need extra check for orphaned vertices
 								    completed: HashSet[VertexID]       # Root targets reached, already
 								    wff: WidthFirstForest              # Leaf-to-root traversal structure
-												Aristo db integrate hashify into tx (#1679)

* Renamed type `NoneBackendRef` => `VoidBackendRef`

* Clarify names: `BE=filter+backend` and `UBE=backend (unfiltered)`

why:
  Most functions used full names as `getVtxUnfilteredBackend()` or
  `getKeyBackend()`. After defining abbreviations (and its meaning) it
   seems easier to use `getVtxUBE()` and `getKeyBE()`.

* Integrate `hashify()` process into transaction logic

why:
  Is now transparent unless explicitly controlled.

details:
  Cache changes imply setting a `dirty` flag which in turn triggers
  `hashify()` processing in transaction and `pack()` directives.

* Removed `aristo_tx.exec()` directive

why:
  Inconsistent implementation, functionality will be provided with a
  different paradigm.
											
										
										
											2023-08-11 18:23:57 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  if not db.top.dirty:
 								    return ok completed
-												Aristo db supporting forest and layered tx architecture (#1598)

* Exclude some storage tests

why:
  These test running on external dumps slipped through. The particular
  dumps were reported earlier as somehow dodgy.

  This was changed in `#1457` but having a second look, the change on
  hexary_interpolate.nim(350) might be incorrect.

* Redesign `Aristo DB` descriptor for transaction based layers

why:
  Previous descriptor layout made it cumbersome to push/pop
  database delta layers.

  The new architecture keeps each layer with the full delta set
  relative to the database backend.

* Keep root ID as part of the `Patricia Trie` leaf path

why;
  That way, forests are supported
											
										
										
											2023-06-09 12:17:37 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								  for (lky,lfVid) in db.top.lTab.pairs:
 								    let
 								      rc = lky.hikeUp db
 								      hike = rc.to(Hike)
 								    if not lfVid.isValid:
 								      # Remember that there are left overs from a delete proedure which have
 								      # to be eventually found before starting width-first processing.
 								      deleted = true
 								    if hike.legs.len == 0:
 								      # Ignore left over path from deleted entry.
 								      if not lfVid.isValid:
 								        # FIXME: Is there a case for adding child-to-root links to the `wff`
 								        #        schedule?
 								        continue
 								      if rc.isErr:
 								        return err((lfVid,rc.error[1]))
 								      return err((hike.root,HashifyEmptyHike))
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    # Hash as much of as possible from `hike` starting at the downmost `leg`
 								    let ltr = ? db.leafToRootCrawler hike
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								    if ltr.legInx < 0:
 								      completed.incl hike.root
-												Aristo db update delete functionality (#1621)

* Fix missing branch checks in transcoder

why:
  Symmetry problem. `Blobify()` allowed for encoding degenerate branch
  vertices while `Deblobify()` rejected decoding wrongly encoded data.

* Update memory backend so that it rejects storing bogus vertices.

why:
  Error behaviour made similar to the rocks DB backend.

* Make sure that leaf vertex IDs are not repurposed

why:
  This makes it easier to record leaf node changes

* Update error return code for next()/right() traversal

why:
  Returning offending vertex ID (besides error code) helps debugging

* Update Merkle hasher for deleted nodes

why:
  Not implemented, yet

also:
  Provide cache & backend consistency check functions. This was
  partly re-implemented from `hashifyCheck()`

* Simplify some unit tests

* Fix delete function

why:
  Was conceptually wrong
											
										
										
											2023-06-30 23:22:33 +01:00
+								    else:
-												Aristo db update merkle hasher (#1925)

* Register paths for added leafs because of trie re-balancing

why:
  While the payload would not change, the prefix in the leaf vertex
  would. So it needs to be flagged for hash recompilation for the
  `hashify()` module.

also:
  Make sure that `Hike` paths which might have vertex links into the
  backend filter are replaced by vertex copies before manipulating.
  Otherwise the vertices on the immutable filter might be involuntarily
  changed.

* Also check for paths where the leaf vertex is on the backend, already

why:
  A a path can have dome vertices on the top layer cache with the
  `Leaf` vertex on  the backend.

* Re-define a void `HashLabel` type.

why:
  A `HashLabel` type is a pair `(root-vertex-ID, Keccak-hash)`. Previously,
  a valid `HashLabel` consisted of a non-empty hash and a non-zero vertex
  ID. This definition leads to a non-unique representation of a void
  `HashLabel` with either root-ID or has void. This has been changed to
  the unique void `HashLabel` exactly if the hash entry is void.

* Update consistency checkers

* Re-org `hashify()` procedure

why:
  Syncing against block chain showed serious deficiencies which produced
  wrong hashes or simply bailed out with error.

  So all fringe cases (mainly due to deleted entries) could be integrated
  into the labelling schedule rather than handling separate fringe cases.
											
										
										
											2023-12-04 20:39:26 +00:00
+								      # Not all could be hashed, merge the rest into `wff` width-first schedule
 								      wff.updateWFF(hike, ltr)
 								  # Update unresolved keys left over after delete operations when overlay
 								  # vertices have been added and there was no `hike` path to capture them.
 								  #
 								  # Considering a list of updated paths to these vertices after deleting a
 								  # `Leaf` vertex is deemed too expensive and more error prone. So it is
 								  # the task to search for unresolved node keys and add glue paths them to
 								  # the depth-first schedule.
 								  if deleted:
 								    var unresolved: HashSet[VertexID]
 								    for (vid,lbl) in db.top.kMap.pairs:
 								      if not lbl.isValid and
 								         vid notin wff and
 								         (vid notin db.top.sTab or db.top.sTab.getOrVoid(vid).isValid):
 								        unresolved.incl vid
 								    let glue = unresolved.cloudConnect(db, wff.base)
 								    if 0 < glue.unresolved.len:
 								      return err((glue.unresolved.toSeq[0],HashifyNodeUnresolved))
 								    # Add glue items to `wff.base[]` and `wff.pool[]` tables
 								    for (vid,val) in glue.paths.base.pairs:
 								      # Add vid to `wff.base[]` list
 								      wff.base[vid] = val
 								      # Move tail of VertexID chain to `wff.pool[]`
 								      var toVid = val.toVid
 								      while true:
 								        let w = glue.paths.pool.getOrVoid toVid
 								        if not w.isValid:
 								          break
 								        wff.base.del toVid
 								        wff.pool[toVid] = w
 								        toVid = w.toVid
 								  # Traverse width-first schedule and update remaining hashes.
 								  while 0 < wff.base.len:
 								    var redo: BackVidTab
 								    for (vid,val) in wff.base.pairs:
 								      block thisVtx:
 								        let vtx = db.getVtx vid
 								        # Try to convert the vertex to a node. This is possible only if all
 								        # link references have Merkle hash keys, already.
 								        if not vtx.isValid:
 								          # This might happen when proof nodes (see `snap` protocol) are on
 								          # an incomplete trie where this `vid` has a key but no vertex yet.
 								          # Also, the key (as part of the proof data) must be on the backend
 								          # by the way `leafToRootCrawler()` works. So it is enough to verify
 								          # the key there.
 								          discard db.getKeyBE(vid).valueOr:
 								            return err((vid,HashifyNodeUnresolved))
 								          break thisVtx
 								        # Try to resolve the current vertex as node
 								        let node = vtx.toNode(db).valueOr:
 								          # Cannot complete with this vertex unless updated, so do it later.
 								          redo[vid] = val
 								          break thisVtx
 								        # End block `thisVtx`
 								        # Could resolve => update Merkle hash
 								        let key = node.digestTo(HashKey)
 								        db.vidAttach(HashLabel(root: val.root, key: key), vid)
 								      # Proceed with back link
 								      let nextVal = wff.pool.getOrVoid val.toVid
 								      if nextVal.isValid:
 								        # Make sure that we we keep strict hierachial order
 								        if nextVal.toVid in redo:
 								          # Push back from `redo[]` to be considered later
 								          wff.pool[nextVal.toVid] = redo.getOrVoid nextVal.toVid
 								          redo.del nextVal.toVid
 								          # And move the next one to `redo[]`
 								          wff.pool.del val.toVid
 								          redo[val.toVid] = nextVal
 								        elif val.toVid notin redo.values.toSeq.mapIt(it.toVid):
 								          wff.pool.del val.toVid
 								          redo[val.toVid] = nextVal
 								    # Restart `wff.base[]`
 								    wff.base.swap redo
 								  # Update root nodes
 								  for vid in wff.root - db.top.pPrf:
 								    # Convert root vertex to a node.
 								    let node = db.getVtx(vid).toNode(db,stopEarly=false).valueOr:
 								      return err((vid,HashifyRootNodeUnresolved))
 								    db.vidAttach(HashLabel(root: vid, key: node.digestTo(HashKey)), vid)
 								    completed.incl vid
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
-												Aristo db integrate hashify into tx (#1679)

* Renamed type `NoneBackendRef` => `VoidBackendRef`

* Clarify names: `BE=filter+backend` and `UBE=backend (unfiltered)`

why:
  Most functions used full names as `getVtxUnfilteredBackend()` or
  `getKeyBackend()`. After defining abbreviations (and its meaning) it
   seems easier to use `getVtxUBE()` and `getKeyBE()`.

* Integrate `hashify()` process into transaction logic

why:
  Is now transparent unless explicitly controlled.

details:
  Cache changes imply setting a `dirty` flag which in turn triggers
  `hashify()` processing in transaction and `pack()` directives.

* Removed `aristo_tx.exec()` directive

why:
  Inconsistent implementation, functionality will be provided with a
  different paradigm.
											
										
										
											2023-08-11 18:23:57 +01:00
+								  db.top.dirty = false
-												Aristo db supporting forest and layered tx architecture (#1598)

* Exclude some storage tests

why:
  These test running on external dumps slipped through. The particular
  dumps were reported earlier as somehow dodgy.

  This was changed in `#1457` but having a second look, the change on
  hexary_interpolate.nim(350) might be incorrect.

* Redesign `Aristo DB` descriptor for transaction based layers

why:
  Previous descriptor layout made it cumbersome to push/pop
  database delta layers.

  The new architecture keeps each layer with the full delta set
  relative to the database backend.

* Keep root ID as part of the `Patricia Trie` leaf path

why;
  That way, forests are supported
											
										
										
											2023-06-09 12:17:37 +01:00
+								  ok completed
-												Aristo db merkle hashify functionality added (#1593)

* Keep vertex ID generator state with each db-layer

why:
  The vertex ID generator state is part of the difference to the below
  layer

* Move otherwise unused source to test directory

* Add Merkle hash generator

also:
  * Verification facility for debugging
  * Empty Merkle key hashes encoded as `EMPTY_ROOT_HASH`
											
										
										
											2023-05-30 22:21:15 +01:00
 								# ------------------------------------------------------------------------------
 								# End
 								# ------------------------------------------------------------------------------