From a2d36d51922252ad40d9f6e70a494af1d4bf8bb1 Mon Sep 17 00:00:00 2001 From: thatben Date: Fri, 7 Feb 2025 15:35:40 +0100 Subject: [PATCH] basic crawling --- codexcrawler/crawler.nim | 19 +++++++++++++++++-- codexcrawler/list.nim | 24 +++++++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/codexcrawler/crawler.nim b/codexcrawler/crawler.nim index 680bf6e..c3b8124 100644 --- a/codexcrawler/crawler.nim +++ b/codexcrawler/crawler.nim @@ -7,6 +7,8 @@ import ./dht import ./list import ./nodeentry +import std/sequtils + logScope: topics = "crawler" @@ -16,6 +18,11 @@ type Crawler* = ref object okNodes: List nokNodes: List +# This is not going to stay this way. +proc isNew(c: Crawler, node: Node): bool = + not c.todoNodes.contains(node.id) and not c.okNodes.contains(node.id) and + not c.nokNodes.contains(node.id) + proc handleNodeNotOk(c: Crawler, target: NodeEntry) {.async.} = if err =? (await c.nokNodes.add(target)).errorOption: error "Failed to add not-OK-node to list", err = err.msg @@ -34,16 +41,24 @@ proc addNewTodoNodes(c: Crawler, newNodes: seq[Node]) {.async.} = error "Failed to add todo-node to list", err = err.msg proc step(c: Crawler) {.async.} = + logScope: + todo = $c.todoNodes.len + ok = $c.okNodes.len + nok = $c.nokNodes.len + without target =? (await c.todoNodes.pop()), err: error "Failed to get todo node", err = err.msg # todo: update target timestamp - without newNodes =? (await c.dht.getNeighbors(target.id)), err: - trace "getNeighbors call failed", node = $target.id, err = err.msg + without receivedNodes =? (await c.dht.getNeighbors(target.id)), err: + trace "Call failed", node = $target.id, err = err.msg await c.handleNodeNotOk(target) return + let newNodes = receivedNodes.filterIt(isNew(c, it)) + + trace "Received nodes", receivedNodes = receivedNodes.len, newNodes = newNodes.len await c.handleNodeOk(target) await c.addNewTodoNodes(newNodes) diff --git a/codexcrawler/list.nim b/codexcrawler/list.nim index 82e4719..e083e1a 100644 --- a/codexcrawler/list.nim +++ b/codexcrawler/list.nim @@ -12,6 +12,7 @@ import pkg/codexdht import std/sets import std/strutils +import std/sequtils import std/os import ./nodeentry @@ -25,7 +26,7 @@ type List* = ref object name: string store: TypedDatastore - items: HashSet[NodeEntry] + items: seq[NodeEntry] onMetric: OnUpdateMetric proc encode(s: NodeEntry): seq[byte] = @@ -40,7 +41,8 @@ proc decode(T: type NodeEntry, bytes: seq[byte]): ?!T = if tokens.len != 2: return failure("expected 2 tokens") - success(NodeEntry(id: NodeId(tokens[0].u256), value: tokens[1])) + let id = UInt256.fromHex(tokens[0]) + success(NodeEntry(id: id, value: tokens[1])) proc saveItem(this: List, item: NodeEntry): Future[?!void] {.async.} = without itemKey =? Key.init(this.name / $item.id), err: @@ -66,7 +68,7 @@ proc load*(this: List): Future[?!void] {.async.} = without value =? item.value, err: return failure(err) if value.value.len > 0: - this.items.incl(value) + this.items.add(value) this.onMetric(this.items.len.int64) info "Loaded list", name = this.name, items = this.items.len @@ -77,8 +79,17 @@ proc new*( ): List = List(name: name, store: store, onMetric: onMetric) +proc contains*(this: List, nodeId: NodeId): bool = + this.items.anyIt(it.id == nodeId) + +proc contains*(this: List, item: NodeEntry): bool = + this.contains(item.id) + proc add*(this: List, item: NodeEntry): Future[?!void] {.async.} = - this.items.incl(item) + if this.contains(item): + return success() + + this.items.add(item) this.onMetric(this.items.len.int64) if err =? (await this.saveItem(item)).errorOption: @@ -89,7 +100,10 @@ proc pop*(this: List): Future[?!NodeEntry] {.async.} = if this.items.len < 1: return failure(this.name & "List is empty.") - let item = this.items.pop() + let item = this.items[0] + this.items.delete(0) + this.onMetric(this.items.len.int64) + if err =? (await this.removeItem(item)).errorOption: return failure(err) return success(item)