nim-stew/stew/sorted_set.nim

322 lines
12 KiB
Nim

# Nimbus
# Copyright (c) 2018-2022 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
## Generic Sorted List Based on Red-black Trees
## ============================================
##
## Due to the sort order fetch operations ge, le, etc., this API differs
## considerably from the `table` API.
##
## Note that the list descriptor is a reference. So assigning an `sLstRef`
## descriptor variable does *not* duplicate the descriptor but rather
## add another link to the descriptor.
##
## Example:
## ::
## # create new list with integer keys, and integer values
## var sl = SortedSet[int,int].init()
##
## # add some entries
## for key in [208, 127, 106, 117, 49, 40, 171]:
## let rc = sl.insert(key)
## if rc.isOk:
## # unique key, store some value
## rc.value.data = -key
##
## # print entries with keys greater than 100 in natrual key order
## block:
## var rc = sl.ge(100)
## while rc.isOk:
## echo "*** item ", rc.value.key, " ", rc.value.data
## w = sl.gt(w.value.key)
##
## # print all key/value entries in natrual key order
## block:
## var
## walk = SortedSetWalkRef[K,V].init(sl)
## rc = w.first
## while rc.isOk:
## echo "*** item ", rc.value.key, " ", rc.value.data
## rc = w.next
## # optional clean up, see comments on the destroy() directive
## walk.destroy
##
import
std/[tables],
./sorted_set/[rbtree_delete,
rbtree_desc,
rbtree_find,
rbtree_flush,
rbtree_insert,
rbtree_reset,
rbtree_verify,
rbtree_walk],
./results
export
RbInfo,
RbResult,
`isRed=`, # no need to export all of `rbtree_desc`
`linkLeft=`,
`linkRight=`,
results
type
SortedSetItemRef*[K,V] = ref object ##\
## Data value container as stored in the list/database
key: K ## Sorter key, read-only
data*: V ## Some data value, to be modified freely
SortedSet*[K,V] = object of RootObj ##\
## Sorted list descriptor
tree: RbTreeRef[SortedSetItemRef[K,V],K]
SortedSetWalkRef*[K,V] = ##\
## Traversal/walk descriptor for sorted list
RbWalkRef[SortedSetItemRef[K,V],K]
SortedSetResult*[K,V] = ##\
## Data value container or error code, typically used as value \
## returned from functions.
RbResult[SortedSetItemRef[K,V]]
when (NimMajor, NimMinor) < (1, 4):
{.push raises: [Defect].}
else:
{.push raises: [].}
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
proc slstCmp[K,V](casket: SortedSetItemRef[K,V]; key: K): int =
casket.key.cmp(key)
proc slstMkc[K,V](key: K): SortedSetItemRef[K,V] =
SortedSetItemRef[K,V](key: key)
proc slstClup[K,V](c: var SortedSetItemRef[K,V]) =
# ... some smart stuff here?
c = nil # GC hint (if any, todo?)
proc slstLt[K,V](a, b: SortedSetItemRef[K,V]): bool =
## Debugging only
a.slstCmp(b.key) < 0
proc slstPr(code: RbInfo; ctxInfo: string) =
## Debugging only
echo "*** sLst Error(", code, "): ", ctxInfo
# ------------------------------------------------------------------------------
# Public functions, constructor
# ------------------------------------------------------------------------------
proc init*[K,V](sl: var SortedSet[K,V]) =
## Constructor for sorted list with key type `K` and data type `V`
sl.tree = newRbTreeRef[SortedSetItemRef[K,V],K](
cmp = proc(c: SortedSetItemRef[K,V]; k: K): int = c.slstCmp(k),
mkc = proc(k: K): SortedSetItemRef[K,V] = slstMkc[K,V](k))
proc init*[K,V](T: type SortedSet[K,V]): T =
## Variant of `init()`
result.init
proc move*[K,V](sl: var SortedSet[K,V]): SortedSet[K,V] =
## Return a shallow copy of the argument list `sl`, then reset `sl`.
result.tree = sl.tree
sl.init
proc clear*[K,V](sl: var SortedSet[K,V]) =
## Reset list descriptor to its inital value. This function also de-registers
## and flushes all traversal descriptors of type `SortedSetWalkRef`.
sl.tree.rbTreeReset(clup = proc(c: var SortedSetItemRef[K,V]) = c.slstClup)
# ------------------------------------------------------------------------------
# Public functions, getter, converter
# ------------------------------------------------------------------------------
proc key*[K,V](data: SortedSetItemRef[K,V]): K =
## Getter, extracts the key from the data container item.
data.key
proc len*[K,V](sl: var SortedSet[K,V]): int =
## Number of list elements
sl.tree.size
proc toSortedSetResult*[K,V](key: K; data: V): SortedSetResult[K,V] =
## Helper, chreate `ok()` result
ok(SortedSetItemRef[K,V](key: key, data: data))
# ------------------------------------------------------------------------------
# Public functions, list operations
# ------------------------------------------------------------------------------
proc insert*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Insert `key`, returns data container item with the `key`. Function fails
## if `key` exists in the list.
sl.tree.rbTreeInsert(key)
proc findOrInsert*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Insert or find `key`, returns data container item with the `key`. This
## function always succeeds (unless there is s problem with the list.)
result = sl.tree.rbTreeInsert(key)
if result.isErr:
return sl.tree.rbTreeFindEq(key)
proc delete*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Delete `key` from list and return the data container item that was
## holding the `key` if it was deleted.
sl.tree.rbTreeDelete(key)
proc flush*[K,V](sl: var SortedSet[K,V]) =
## Flush the sorted list, i.e. delete all entries. This function is
## more efficient than running a `delete()` loop.
sl.tree.rbTreeFlush(clup = proc(c: var SortedSetItemRef[K,V]) = c.slstClup)
# ------------------------------------------------------------------------------
# Public functions, query functions
# ------------------------------------------------------------------------------
proc eq*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Find `key` in list, returns data container item with the `key` if it
## exists.
sl.tree.rbTreeFindEq(key)
proc le*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Find data container iten with *largest* key *less or equal* the argument
## `key` in list and return it if found.
sl.tree.rbTreeFindLe(key)
proc lt*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Find data container item with *largest* key *less than* the argument
## `key` in list and return it if found.
sl.tree.rbTreeFindLt(key)
proc ge*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Find data container item with *smallest* key *greater or equal* the
## argument `key` in list and return it if found.
sl.tree.rbTreeFindGe(key)
proc gt*[K,V](sl: var SortedSet[K,V]; key: K): SortedSetResult[K,V] =
## Find data container item with *smallest* key *greater than* the argument
## `key` in list and return it if found.
sl.tree.rbTreeFindGt(key)
# ------------------------------------------------------------------------------
# Public functions, walk/traversal functions
# ------------------------------------------------------------------------------
proc init*[K,V](T: type SortedSetWalkRef[K,V]; sl: var SortedSet[K,V]): T =
## Open traversal descriptor on list and register it on the 'SortedSet`
## descriptor.
sl.tree.newRbWalk
proc destroy*[K,V](w: SortedSetWalkRef[K,V]) =
## De-register and close the traversal descriptor. This function renders
## the descriptor unusable, so it must be disposed of.
##
## This destructor function is crucial when insert/delete operations are
## needed to run while traversals are open and not rewound. These
## insert/delete operations modify the list so that `w.this`, `w.prev`,
## etc. operations might fail. All traversal descriptors must then be
## rewound or destroyed.
w.rbWalkDestroy
proc first*[K,V](w: SortedSetWalkRef[K,V]): SortedSetResult[K,V] =
## Rewind the traversal descriptor to the *least* list key and return
## the corresponding data container item.
##
## When all open traversals are rewound, blockers due to insert/delete
## list operations are reset.
w.rbWalkFirst
proc last*[K,V](w: SortedSetWalkRef[K,V]): SortedSetResult[K,V] =
## Rewind the traversal descriptor to the *greatest* list key and return
## the corresponding data container item.
##
## When all open traversals are rewound, blockers due to insert/delete
## list operations are reset.
w.rbWalkLast
proc this*[K,V](w: SortedSetWalkRef[K,V]): SortedSetResult[K,V] =
## Retrieve the *current* data container item. This is the same one retrieved
## last with any of the traversal functions returning the data container item.
##
## Note that the current node becomes unavailable if it was recently deleted.
w.rbWalkCurrent
proc next*[K,V](w: SortedSetWalkRef[K,V]): SortedSetResult[K,V] =
## Move the traversal descriptor to the next *greater* key and return the
## corresponding data container item. If this is the first call after
## `newWalk()`, then `w.first` is called implicitly.
##
## If there were tree insert/delete operations, blockers might be active
## causing this function to fail so that a rewind is needed.
w.rbWalkNext
proc prev*[K,V](w: SortedSetWalkRef[K,V]): SortedSetResult[K,V] =
## Move the traversal descriptor to the next *smaller* key and return the
## corresponding data container item . If this is the first call after
## `newWalk()`, then `w.last` is called implicitly.
##
## If there were tree insert/delete operations, blockers might be active
## causing this function to fail so that a rewind is needed.
w.rbWalkPrev
# ------------------------------------------------------------------------------
# Public helpers, debugging
# ------------------------------------------------------------------------------
proc `$`*[K,V](casket: SortedSetItemRef[K,V]): string =
## Pretty printer
##
## :CAVEAT:
## This function needs a working definition for the `data` item:
## ::
## proc `$`*[V](value: V): string {.gcsafe,raises:[Defect,CatchableError].}
##
if casket.isNil:
return "nil"
"(" & $casket.key & "," & $casket.data & ")"
proc `$`*[K,V](rc: SortedSetResult[K,V]): string =
## Pretty printer
##
## :CAVEAT:
## This function needs a working definition for the `data` item:
## ::
## proc `$`*[V](data: V): string {.gcsafe,raises:[Defect,CatchableError].}
##
if rc.isErr:
return $rc.error
$rc.value
proc verify*[K,V](sl: var SortedSet[K,V]):
Result[void,(SortedSetItemRef[K,V],RbInfo)]
{.gcsafe, raises: [Defect,CatchableError].} =
## Checks for consistency, may print an error message. Returns `rbOk` if
## the argument list `sl` is consistent. This function traverses all the
## internal data nodes which might be time consuming. So it would not be
## used in production code.
##
## :CAVEAT:
## This function needs a working definition for the `data` item:
## ::
## proc `$`*[V](data: V): string {.gcsafe,raises:[Defect,CatchableError].}
##
sl.tree.rbTreeVerify(
lt = proc(a, b: SortedSetItemRef[K,V]): bool = a.slstLt(b),
pr = proc(c: RbInfo; s: string) = c.slstPr(s))
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------