Add SortedDag, which maintains topological ordering

This commit is contained in:
Mark Spanbroek 2021-07-27 11:22:25 +02:00
parent f60d6189eb
commit 957767c730
5 changed files with 398 additions and 0 deletions

24
abc/dag/merge.license Normal file
View File

@ -0,0 +1,24 @@
=====================================================
Nim -- a Compiler for Nim. https://nim-lang.org/
Copyright (C) 2006-2021 Andreas Rumpf. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
[ MIT license: http://www.opensource.org/licenses/mit-license.php ]

95
abc/dag/merge.nim Normal file
View File

@ -0,0 +1,95 @@
# Copied from Nim standard library, development version:
# https://github.com/nim-lang/Nim/blob/493721c16c06b5681dc270679bdcbb41011614b2/lib/pure/algorithm.nim#L545
# See merge.license file for copyright info.
proc merge*[T](
result: var seq[T],
x, y: openArray[T], cmp: proc(x, y: T): int {.closure.}
) =
## Merges two sorted `openArray`. `x` and `y` are assumed to be sorted.
## If you do not wish to provide your own `cmp`,
## you may use `system.cmp` or instead call the overloaded
## version of `merge`, which uses `system.cmp`.
##
## .. note:: The original data of `result` is not cleared,
## new data is appended to `result`.
##
## **See also:**
## * `merge proc<#merge,seq[T],openArray[T],openArray[T]>`_
runnableExamples:
let x = @[1, 3, 6]
let y = @[2, 3, 4]
block:
var merged = @[7] # new data is appended to merged sequence
merged.merge(x, y, system.cmp[int])
assert merged == @[7, 1, 2, 3, 3, 4, 6]
block:
var merged = @[7] # if you only want new data, clear merged sequence first
merged.setLen(0)
merged.merge(x, y, system.cmp[int])
assert merged.isSorted
assert merged == @[1, 2, 3, 3, 4, 6]
import std/sugar
var res: seq[(int, int)]
res.merge([(1, 1)], [(1, 2)], (a, b) => a[0] - b[0])
assert res == @[(1, 1), (1, 2)]
assert seq[int].default.dup(merge([1, 3], [2, 4])) == @[1, 2, 3, 4]
let
sizeX = x.len
sizeY = y.len
oldLen = result.len
result.setLen(oldLen + sizeX + sizeY)
var
ix = 0
iy = 0
i = oldLen
while true:
if ix == sizeX:
while iy < sizeY:
result[i] = y[iy]
inc i
inc iy
return
if iy == sizeY:
while ix < sizeX:
result[i] = x[ix]
inc i
inc ix
return
let itemX = x[ix]
let itemY = y[iy]
if cmp(itemX, itemY) > 0: # to have a stable sort
result[i] = itemY
inc iy
else:
result[i] = itemX
inc ix
inc i
proc merge*[T](result: var seq[T], x, y: openArray[T]) {.inline.} =
## Shortcut version of `merge` that uses `system.cmp[T]` as the comparison function.
##
## **See also:**
## * `merge proc<#merge,seq[T],openArray[T],openArray[T],proc(T,T)>`_
runnableExamples:
let x = [5, 10, 15, 20, 25]
let y = [50, 40, 30, 20, 10].sorted
var merged: seq[int]
merged.merge(x, y)
assert merged.isSorted
assert merged == @[5, 10, 10, 15, 20, 20, 25, 30, 40, 50]
merge(result, x, y, system.cmp)

119
abc/dag/sorteddag.nim Normal file
View File

@ -0,0 +1,119 @@
import std/tables
import std/sets
import std/algorithm
import std/heapqueue
import std/hashes
import ./edgeset
import ./merge
## Implements a directed acyclic graph (DAG). Visiting vertices in topological
## order is fast. It is optimized for DAGs that grow by adding new vertices that
## point to existing vertices in the DAG, such as a blockchain transaction DAG.
##
## Uses the dynamic topological sort algorithm by
## [Pearce and Kelly](https://www.doc.ic.ac.uk/~phjk/Publications/DynamicTopoSortAlg-JEA-07.pdf).
type
SortedDag*[Vertex] = ref object
## A DAG whose vertices are kept in topological order
edges: EdgeSet[Vertex]
order: Table[Vertex, int]
SortedVertex[Vertex] = object
vertex: Vertex
index: int
func new*[V](_: type SortedDag[V]): SortedDag[V] =
SortedDag[V]()
func lookup[V](dag: SortedDag[V], vertex: V): SortedVertex[V] =
SortedVertex[V](vertex: vertex, index: dag.order[vertex])
func `<`*[V](a, b: SortedVertex[V]): bool =
a.index < b.index
func hash*[V](vertex: SortedVertex[V]): Hash =
vertex.index.hash
func searchForward[V](dag: SortedDag[V],
start: SortedVertex[V],
upperbound: SortedVertex[V]): seq[SortedVertex[V]] =
var todo = @[start]
var seen = @[start].toHashSet
while todo.len > 0:
let current = todo.pop()
result.add(current)
for neighbour in dag.edges.outgoing(current.vertex):
let vertex = dag.lookup(neighbour)
doAssert vertex.index != upperbound.index, "cycle detected"
if vertex notin seen and vertex < upperbound:
todo.add(vertex)
seen.incl(vertex)
func searchBackward[V](dag: SortedDag[V],
start: SortedVertex[V],
lowerbound: SortedVertex[V]): seq[SortedVertex[V]] =
var todo = @[start]
var seen = @[start].toHashSet
while todo.len > 0:
let current = todo.pop()
result.add(current)
for neighbour in dag.edges.incoming(current.vertex):
let vertex = dag.lookup(neighbour)
if vertex notin seen and vertex > lowerbound:
todo.add(vertex)
seen.incl(vertex)
func reorder[V](dag: SortedDag[V], forward, backward: seq[SortedVertex[V]]) =
var vertices: seq[V]
var indices, forwardIndices, backwardIndices: seq[int]
for vertex in backward.sorted:
vertices.add(vertex.vertex)
backwardIndices.add(vertex.index)
for vertex in forward.sorted:
vertices.add(vertex.vertex)
forwardIndices.add(vertex.index)
merge(indices, backwardIndices, forwardIndices)
for i in 0..<vertices.len:
dag.order[vertices[i]] = indices[i]
func update[V](dag: SortedDag[V], lowerbound, upperbound: SortedVertex[V]) =
if lowerbound < upperbound:
let forward = searchForward(dag, lowerbound, upperbound)
let backward = searchBackward(dag, upperbound, lowerbound)
dag.reorder(forward, backward)
func add*[V](dag: SortedDag[V], vertex: V) =
## Adds a vertex to the DAG
dag.order[vertex] = -(dag.order.len)
func add*[V](dag: SortedDag[V], edge: tuple[x, y: V]) =
## Adds an edge x -> y to the DAG
doAssert edge.x in dag
doAssert edge.y in dag
dag.edges.incl(edge)
dag.update(dag.lookup(edge.y), dag.lookup(edge.x))
func contains*[V](dag: SortedDag[V], vertex: V): bool =
vertex in dag.order
func contains*[V](dag: SortedDag[V], edge: Edge[V]): bool =
edge in dag.edges
iterator visit*[V](dag: SortedDag[V], start: V): V =
## Visits all vertices that are reachable from the starting vertex. Vertices
## are visited in topological order, meaning that vertices close to the
## starting vertex are visited first.
var todo = initHeapQueue[SortedVertex[V]]()
var seen: HashSet[SortedVertex[V]]
for neighbour in dag.edges.outgoing(start):
let vertex = dag.lookup(neighbour)
todo.push(vertex)
seen.incl(vertex)
while todo.len > 0:
let current = todo.pop()
yield current.vertex
for neighbour in dag.edges.outgoing(current.vertex):
let vertex = dag.lookup(neighbour)
if vertex notin seen:
todo.push(vertex)
seen.incl(vertex)

159
tests/abc/testSortedDag.nim Normal file
View File

@ -0,0 +1,159 @@
import std/sequtils
import std/algorithm
import std/random
import abc/dag/sorteddag
import ./basics
suite "Sorted DAG":
test "contains vertices":
var dag = SortedDag[int].new
dag.add(1)
check 1 in dag
check 42 notin dag
dag.add(42)
check 42 in dag
test "contains edges":
var dag = SortedDag[int].new
dag.add(1)
dag.add(2)
dag.add(3)
dag.add( (1, 2) )
check (1, 2) in dag
check (2, 3) notin dag
dag.add( (2, 3) )
check (2, 3) in dag
test "raises when adding adding edge for unknown vertex":
var dag = SortedDag[int].new
dag.add(1)
expect Defect:
dag.add( (1, 2) )
expect Defect:
dag.add( (2, 1) )
test "visits reachable vertices, nearest first":
# ⓪ → ①
# ↘ ↙
# ②
var dag = SortedDag[int].new
for vertex in 0..2:
dag.add(vertex)
for edge in [ (0, 1), (1, 2), (0, 2) ]:
dag.add(edge)
check toSeq(dag.visit(0)) == @[1, 2]
check toSeq(dag.visit(1)) == @[2]
check toSeq(dag.visit(2)).len == 0
test "visits vertices in topological order":
# ⑤ ④
# ↙ ↘ ↙ ↘
# ② ⓪ ①
# ↘ ↗
# ③
var dag = SortedDag[int].new
for vertex in 0..5:
dag.add(vertex)
for edge in [ (5, 2), (5, 0), (4, 0), (4, 1), (2, 3), (3, 1) ]:
dag.add(edge)
let reachableFrom5 = toSeq(dag.visit(5))
let reachableFrom4 = toSeq(dag.visit(4))
check reachableFrom5.sorted == @[0, 1, 2, 3]
check reachableFrom4.sorted == @[0, 1]
check reachableFrom5.find(2) < reachableFrom5.find(3)
check reachableFrom5.find(3) < reachableFrom5.find(1)
test "handles spending transactions before gaining transactions":
# acks
# ↙ ↘
# ack1 ack2
# ↓ ↓
# gain ← spend
var dag = SortedDag[string].new
for vertex in ["spend", "gain", "ack1", "ack2", "acks"]:
dag.add(vertex)
for edge in [("acks", "ack1"),
("acks", "ack2"),
("ack1", "gain"),
("ack2", "spend"),
("spend", "gain")]:
dag.add(edge)
let walk = toSeq dag.visit("acks")
check walk.find("spend") < walk.find("gain")
test "handles cross-referencing branches":
# ⓪
# ↙ ↘
# ① → ⑥
# ↓ ↓
# ② ← ⑦
# ↓ ↓
# ③ → ⑧
# ↓ ↓
# ④ ← ⑨
# ↓ ↓
# ⑤ → ⑩
var dag = SortedDag[int].new
for vertex in 0..10:
dag.add(vertex)
for vertex in [1,6]:
dag.add((0, vertex))
for vertex in 1..<5:
dag.add((vertex, vertex + 1))
for vertex in 6..<10:
dag.add((vertex, vertex + 1))
for vertex in [1, 3, 5]:
dag.add((vertex, vertex + 5))
for vertex in [2, 4]:
dag.add((vertex+5, vertex))
check toSeq(dag.visit(0)) == @[1, 6, 7, 2, 3, 8, 9, 4, 5, 10]
test "handles DAGs with many edges":
var dag = SortedDag[int].new
var vertices: seq[int]
for vertex in 0..100:
vertices.add(vertex)
vertices.shuffle()
for vertex in vertices:
dag.add(vertex)
for _ in 0..10_000:
let x, y = rand(100)
if x != y:
dag.add((min(x,y), max(x,y)))
var latest = -1
for vertex in dag.visit(0):
latest = vertex
check latest != -1
test "handles large DAGs that grow by adding new vertices":
# ⓪ ← ① ← ② ← ...
var dag = SortedDag[int].new
dag.add(0)
for i in 1..10_000:
dag.add(i)
dag.add((i, i-1))
var latest = 10_000
for vertex in dag.visit(10_000):
check vertex < latest
latest = vertex
check latest == 0

View File

@ -2,6 +2,7 @@ import abc/testAcks
import abc/testEdgeSet import abc/testEdgeSet
import abc/testHistory import abc/testHistory
import abc/testKeys import abc/testKeys
import abc/testSortedDag
import abc/testTransactions import abc/testTransactions
import abc/testTxStore import abc/testTxStore
import abc/testWallet import abc/testWallet