From 454fbd3474b1d7d2bded00eb2344a6ef3c0b44c4 Mon Sep 17 00:00:00 2001 From: thatben Date: Fri, 7 Feb 2025 13:57:57 +0100 Subject: [PATCH] setup crawler module --- codexcrawler/application.nim | 58 ++++++++++++++++++++++-------------- codexcrawler/config.nim | 8 ++--- codexcrawler/crawler.nim | 25 ++++++++++++++++ codexcrawler/keyutils.nim | 3 +- 4 files changed, 67 insertions(+), 27 deletions(-) create mode 100644 codexcrawler/crawler.nim diff --git a/codexcrawler/application.nim b/codexcrawler/application.nim index fe1ebee..1e73577 100644 --- a/codexcrawler/application.nim +++ b/codexcrawler/application.nim @@ -15,6 +15,7 @@ import ./metrics import ./list import ./dht import ./keyutils +import ./crawler declareGauge(todoNodesGauge, "DHT nodes to be visited") declareGauge(okNodesGauge, "DHT nodes successfully contacted") @@ -29,10 +30,11 @@ type Application* = ref object status: ApplicationStatus config*: CrawlerConfig - todoList*: List + todoNodes*: List okNodes*: List nokNodes*: List dht*: Dht + crawler*: Crawler proc createDatastore(app: Application, path: string): ?!Datastore = without store =? LevelDbDatastore.new(path), err: @@ -60,11 +62,11 @@ proc initializeLists(app: Application): Future[?!void] {.async.} = proc onNokMetric(value: int64) = nokNodesGauge.set(value) - app.todoList = List.new("todo", store, onTodoMetric) + app.todoNodes = List.new("todo", store, onTodoMetric) app.okNodes = List.new("ok", store, onOkMetric) app.nokNodes = List.new("nok", store, onNokMetric) - if err =? (await app.todoList.load()).errorOption: + if err =? (await app.todoNodes.load()).errorOption: return failure(err) if err =? (await app.okNodes.load()).errorOption: return failure(err) @@ -86,7 +88,9 @@ proc initializeDht(app: Application): Future[?!void] {.async.} = # listenAddresses.add(aaa) var discAddresses = newSeq[MultiAddress]() - let bbb = MultiAddress.init("/ip4/" & app.config.publicIp & "/udp/" & $app.config.discPort).expect("Should init multiaddress") + let bbb = MultiAddress + .init("/ip4/" & app.config.publicIp & "/udp/" & $app.config.discPort) + .expect("Should init multiaddress") discAddresses.add(bbb) app.dht = Dht.new( @@ -104,6 +108,16 @@ proc initializeDht(app: Application): Future[?!void] {.async.} = return success() +proc initializeCrawler(app: Application) = + app.crawler = Crawler.new( + app.dht, + app.todoNodes, + app.okNodes, + app.nokNodes + ) + + app.crawler.start() + proc initializeApp(app: Application): Future[?!void] {.async.} = if err =? (await app.initializeLists()).errorOption: error "Failed to initialize lists", err = err.msg @@ -113,28 +127,30 @@ proc initializeApp(app: Application): Future[?!void] {.async.} = error "Failed to initialize DHT", err = err.msg return failure(err) + app.initializeCrawler() + return success() -proc hackyCrawl(app: Application) {.async.} = - info "starting hacky crawl..." - await sleepAsync(3000) +# proc hackyCrawl(app: Application) {.async.} = +# info "starting hacky crawl..." +# await sleepAsync(3000) - var nodeIds = app.dht.getRoutingTableNodeIds() - trace "starting with routing table nodes", nodes = nodeIds.len +# var nodeIds = app.dht.getRoutingTableNodeIds() +# trace "starting with routing table nodes", nodes = nodeIds.len - while app.status == ApplicationStatus.Running and nodeIds.len > 0: - let nodeId = nodeIds[0] - nodeIds.delete(0) +# while app.status == ApplicationStatus.Running and nodeIds.len > 0: +# let nodeId = nodeIds[0] +# nodeIds.delete(0) - without newNodes =? (await app.dht.getNeighbors(nodeId)), err: - error "getneighbors failed", err = err.msg - - for node in newNodes: - nodeIds.add(node.id) - trace "adding new node", id = $node.id, addrs = $node.address - await sleepAsync(1000) +# without newNodes =? (await app.dht.getNeighbors(nodeId)), err: +# error "getneighbors failed", err = err.msg - info "hacky crawl stopped!" +# for node in newNodes: +# nodeIds.add(node.id) +# trace "adding new node", id = $node.id, addrs = $node.address +# await sleepAsync(1000) + +# info "hacky crawl stopped!" proc stop*(app: Application) = app.status = ApplicationStatus.Stopping @@ -161,8 +177,6 @@ proc run*(app: Application) = error "Failed to start application", err = err.msg return - asyncSpawn app.hackyCrawl() - while app.status == ApplicationStatus.Running: try: chronos.poll() diff --git a/codexcrawler/config.nim b/codexcrawler/config.nim index 75ab41f..20b6554 100644 --- a/codexcrawler/config.nim +++ b/codexcrawler/config.nim @@ -14,7 +14,7 @@ Usage: Options: --logLevel= Sets log level [default: TRACE] - --publicIp= Public IP address where this instance is reachable. [default: 62.45.154.249] + --publicIp= Public IP address where this instance is reachable. --metricsAddress= Listen address of the metrics server [default: 0.0.0.0] --metricsPort=

Listen HTTP port of the metrics server [default: 8008] --dataDir=

Directory for storing data [default: crawler_data] @@ -35,9 +35,9 @@ type CrawlerConfig* = ref object bootNodes*: seq[SignedPeerRecord] proc `$`*(config: CrawlerConfig): string = - "CrawlerConfig:" & " logLevel=" & config.logLevel & " metricsAddress=" & - $config.metricsAddress & " metricsPort=" & $config.metricsPort & " dataDir=" & - config.dataDir & " discPort=" & $config.discPort & " bootNodes=" & + "CrawlerConfig:" & " logLevel=" & config.logLevel & " publicIp=" & config.publicIp & + " metricsAddress=" & $config.metricsAddress & " metricsPort=" & $config.metricsPort & + " dataDir=" & config.dataDir & " discPort=" & $config.discPort & " bootNodes=" & config.bootNodes.mapIt($it).join(";") proc getDefaultTestnetBootNodes(): seq[string] = diff --git a/codexcrawler/crawler.nim b/codexcrawler/crawler.nim new file mode 100644 index 0000000..5911c2a --- /dev/null +++ b/codexcrawler/crawler.nim @@ -0,0 +1,25 @@ +import pkg/chronicles +import pkg/chronos + +import ./dht +import ./list + +logScope: + topics = "crawler" + +type Crawler* = ref object + dht: Dht + todoNodes: List + okNodes: List + nokNodes: List + +proc start*(c: Crawler) = + info "Starting crawler..." + +proc new*(T: type Crawler, dht: Dht, todoNodes: List, okNodes: List, nokNodes: List): Crawler = + Crawler( + dht: dht, + todoNodes: todoNodes, + okNodes: okNodes, + nokNodes: nokNodes + ) diff --git a/codexcrawler/keyutils.nim b/codexcrawler/keyutils.nim index 458b3f2..eb18b27 100644 --- a/codexcrawler/keyutils.nim +++ b/codexcrawler/keyutils.nim @@ -50,7 +50,8 @@ proc setupKey*(path: string): ?!PrivateKey = if not path.fileAccessible({AccessFlags.Find}): info "Creating a private key and saving it" let - res = ?PrivateKey.random(PKScheme.Secp256k1, Rng.instance()[]).mapFailure(KeyError) + res = + ?PrivateKey.random(PKScheme.Secp256k1, Rng.instance()[]).mapFailure(KeyError) bytes = ?res.getBytes().mapFailure(KeyError) ?path.secureWriteFile(bytes).mapFailure(KeyError)