From f7b1aab098747dbb197e4f870915cb69ff269638 Mon Sep 17 00:00:00 2001 From: thatben Date: Fri, 7 Feb 2025 13:44:35 +0100 Subject: [PATCH] hacky crawl is working --- codexcrawler/application.nim | 30 +++++++++++++++++++----------- codexcrawler/config.nim | 5 ++++- codexcrawler/dht.nim | 22 ++-------------------- config.nims | 4 ++-- 4 files changed, 27 insertions(+), 34 deletions(-) diff --git a/codexcrawler/application.nim b/codexcrawler/application.nim index 238aa21..fe1ebee 100644 --- a/codexcrawler/application.nim +++ b/codexcrawler/application.nim @@ -80,21 +80,28 @@ proc initializeDht(app: Application): Future[?!void] {.async.} = without privateKey =? setupKey(keyPath), err: return failure(err) - var announceAddresses = newSeq[MultiAddress]() - let aaa = MultiAddress.init("/ip4/172.21.64.1/udp/8090").expect("Should init multiaddress") - # /ip4/45.82.185.194/udp/8090 - # /ip4/172.21.64.1/udp/8090 - announceAddresses.add(aaa) + var listenAddresses = newSeq[MultiAddress]() + # TODO: when p2p connections are supported: + # let aaa = MultiAddress.init("/ip4/" & app.config.publicIp & "/tcp/53678").expect("Should init multiaddress") + # listenAddresses.add(aaa) + + var discAddresses = newSeq[MultiAddress]() + let bbb = MultiAddress.init("/ip4/" & app.config.publicIp & "/udp/" & $app.config.discPort).expect("Should init multiaddress") + discAddresses.add(bbb) app.dht = Dht.new( privateKey, bindPort = app.config.discPort, - announceAddrs = announceAddresses, + announceAddrs = listenAddresses, bootstrapNodes = app.config.bootNodes, store = dhtStore, ) + app.dht.updateAnnounceRecord(listenAddresses) + app.dht.updateDhtRecord(discAddresses) + await app.dht.start() + return success() proc initializeApp(app: Application): Future[?!void] {.async.} = @@ -112,21 +119,22 @@ proc hackyCrawl(app: Application) {.async.} = info "starting hacky crawl..." await sleepAsync(3000) - var nodeIds = await app.dht.getRoutingTableNodeIds() + var nodeIds = app.dht.getRoutingTableNodeIds() trace "starting with routing table nodes", nodes = nodeIds.len - while app.status == ApplicationStatus.Running: + while app.status == ApplicationStatus.Running and nodeIds.len > 0: let nodeId = nodeIds[0] nodeIds.delete(0) without newNodes =? (await app.dht.getNeighbors(nodeId)), err: error "getneighbors failed", err = err.msg - trace "adding new nodes", len = newNodes.len - for id in newNodes.mapIt(it.id): - nodeIds.add(id) + for node in newNodes: + nodeIds.add(node.id) + trace "adding new node", id = $node.id, addrs = $node.address await sleepAsync(1000) + info "hacky crawl stopped!" proc stop*(app: Application) = app.status = ApplicationStatus.Stopping diff --git a/codexcrawler/config.nim b/codexcrawler/config.nim index 1298051..75ab41f 100644 --- a/codexcrawler/config.nim +++ b/codexcrawler/config.nim @@ -10,10 +10,11 @@ let doc = Codex Network Crawler. Generates network metrics. Usage: - codexcrawler [--logLevel=] [--metricsAddress=] [--metricsPort=

] [--dataDir=

] [--discoveryPort=

] [--bootNodes=] + codexcrawler [--logLevel=] [--publicIp=] [--metricsAddress=] [--metricsPort=

] [--dataDir=

] [--discoveryPort=

] [--bootNodes=] Options: --logLevel= Sets log level [default: TRACE] + --publicIp= Public IP address where this instance is reachable. [default: 62.45.154.249] --metricsAddress= Listen address of the metrics server [default: 0.0.0.0] --metricsPort=

Listen HTTP port of the metrics server [default: 8008] --dataDir=

Directory for storing data [default: crawler_data] @@ -26,6 +27,7 @@ import docopt type CrawlerConfig* = ref object logLevel*: string + publicIp*: string metricsAddress*: IpAddress metricsPort*: Port dataDir*: string @@ -82,6 +84,7 @@ proc parseConfig*(): CrawlerConfig = return CrawlerConfig( logLevel: get("--logLevel"), + publicIp: get("--publicIp"), metricsAddress: parseIpAddress(get("--metricsAddress")), metricsPort: Port(parseInt(get("--metricsPort"))), dataDir: get("--dataDir"), diff --git a/codexcrawler/dht.nim b/codexcrawler/dht.nim index 27a2753..9da00b9 100644 --- a/codexcrawler/dht.nim +++ b/codexcrawler/dht.nim @@ -40,38 +40,20 @@ proc getNode*(d: Dht, nodeId: NodeId): ?!Node = return success(node.get()) return failure("Node not found for id: " & $nodeId) -proc hacky*(d: Dht, nodeId: NodeId) {.async.} = - await sleepAsync(1) - let node = d.protocol.getNode(nodeId) - if node.isSome(): - let n = node.get() - info "that worked", node = $n.id, seen = $n.seen - else: - info "that didn't work", node = $nodeId - -proc getRoutingTableNodeIds*(d: Dht): Future[seq[NodeId]] {.async.} = +proc getRoutingTableNodeIds*(d: Dht): seq[NodeId] = var ids = newSeq[NodeId]() info "routing table", len = $d.protocol.routingTable.len for bucket in d.protocol.routingTable.buckets: for node in bucket.nodes: warn "node seen", node = $node.id, seen = $node.seen ids.add(node.id) - - await d.hacky(node.id) - # await sleepAsync(1) return ids -proc getDistances(): seq[uint16] = - var d = newSeq[uint16]() - for i in 0..10: - d.add(i.uint16) - return d - proc getNeighbors*(d: Dht, target: NodeId): Future[?!seq[Node]] {.async.} = without node =? d.getNode(target), err: return failure(err) - let distances = getDistances() + let distances = @[256.uint16] let response = await d.protocol.findNode(node, distances) if response.isOk(): diff --git a/config.nims b/config.nims index 86f53f2..3e5bfb2 100644 --- a/config.nims +++ b/config.nims @@ -5,8 +5,8 @@ switch("define", "libp2p_pki_schemes=secp256k1") # switch("define", "chronicles_runtime_filtering=true") # Sets TRACE logging for everything except DHT -switch("define", "chronicles_log_level=INFO") -# switch("define", "chronicles_disabled_topics:discv5") +switch("define", "chronicles_log_level=TRACE") +switch("define", "chronicles_disabled_topics:discv5") when (NimMajor, NimMinor) >= (2, 0): --mm: