setup crawler module

This commit is contained in:
thatben 2025-02-07 13:57:57 +01:00
parent f7b1aab098
commit 454fbd3474
No known key found for this signature in database
GPG Key ID: 62C543548433D43E
4 changed files with 67 additions and 27 deletions

View File

@ -15,6 +15,7 @@ import ./metrics
import ./list
import ./dht
import ./keyutils
import ./crawler
declareGauge(todoNodesGauge, "DHT nodes to be visited")
declareGauge(okNodesGauge, "DHT nodes successfully contacted")
@ -29,10 +30,11 @@ type
Application* = ref object
status: ApplicationStatus
config*: CrawlerConfig
todoList*: List
todoNodes*: List
okNodes*: List
nokNodes*: List
dht*: Dht
crawler*: Crawler
proc createDatastore(app: Application, path: string): ?!Datastore =
without store =? LevelDbDatastore.new(path), err:
@ -60,11 +62,11 @@ proc initializeLists(app: Application): Future[?!void] {.async.} =
proc onNokMetric(value: int64) =
nokNodesGauge.set(value)
app.todoList = List.new("todo", store, onTodoMetric)
app.todoNodes = List.new("todo", store, onTodoMetric)
app.okNodes = List.new("ok", store, onOkMetric)
app.nokNodes = List.new("nok", store, onNokMetric)
if err =? (await app.todoList.load()).errorOption:
if err =? (await app.todoNodes.load()).errorOption:
return failure(err)
if err =? (await app.okNodes.load()).errorOption:
return failure(err)
@ -86,7 +88,9 @@ proc initializeDht(app: Application): Future[?!void] {.async.} =
# listenAddresses.add(aaa)
var discAddresses = newSeq[MultiAddress]()
let bbb = MultiAddress.init("/ip4/" & app.config.publicIp & "/udp/" & $app.config.discPort).expect("Should init multiaddress")
let bbb = MultiAddress
.init("/ip4/" & app.config.publicIp & "/udp/" & $app.config.discPort)
.expect("Should init multiaddress")
discAddresses.add(bbb)
app.dht = Dht.new(
@ -104,6 +108,16 @@ proc initializeDht(app: Application): Future[?!void] {.async.} =
return success()
proc initializeCrawler(app: Application) =
app.crawler = Crawler.new(
app.dht,
app.todoNodes,
app.okNodes,
app.nokNodes
)
app.crawler.start()
proc initializeApp(app: Application): Future[?!void] {.async.} =
if err =? (await app.initializeLists()).errorOption:
error "Failed to initialize lists", err = err.msg
@ -113,28 +127,30 @@ proc initializeApp(app: Application): Future[?!void] {.async.} =
error "Failed to initialize DHT", err = err.msg
return failure(err)
app.initializeCrawler()
return success()
proc hackyCrawl(app: Application) {.async.} =
info "starting hacky crawl..."
await sleepAsync(3000)
# proc hackyCrawl(app: Application) {.async.} =
# info "starting hacky crawl..."
# await sleepAsync(3000)
var nodeIds = app.dht.getRoutingTableNodeIds()
trace "starting with routing table nodes", nodes = nodeIds.len
# var nodeIds = app.dht.getRoutingTableNodeIds()
# trace "starting with routing table nodes", nodes = nodeIds.len
while app.status == ApplicationStatus.Running and nodeIds.len > 0:
let nodeId = nodeIds[0]
nodeIds.delete(0)
# while app.status == ApplicationStatus.Running and nodeIds.len > 0:
# let nodeId = nodeIds[0]
# nodeIds.delete(0)
without newNodes =? (await app.dht.getNeighbors(nodeId)), err:
error "getneighbors failed", err = err.msg
for node in newNodes:
nodeIds.add(node.id)
trace "adding new node", id = $node.id, addrs = $node.address
await sleepAsync(1000)
# without newNodes =? (await app.dht.getNeighbors(nodeId)), err:
# error "getneighbors failed", err = err.msg
info "hacky crawl stopped!"
# for node in newNodes:
# nodeIds.add(node.id)
# trace "adding new node", id = $node.id, addrs = $node.address
# await sleepAsync(1000)
# info "hacky crawl stopped!"
proc stop*(app: Application) =
app.status = ApplicationStatus.Stopping
@ -161,8 +177,6 @@ proc run*(app: Application) =
error "Failed to start application", err = err.msg
return
asyncSpawn app.hackyCrawl()
while app.status == ApplicationStatus.Running:
try:
chronos.poll()

View File

@ -14,7 +14,7 @@ Usage:
Options:
--logLevel=<l> Sets log level [default: TRACE]
--publicIp=<a> Public IP address where this instance is reachable. [default: 62.45.154.249]
--publicIp=<a> Public IP address where this instance is reachable.
--metricsAddress=<ip> Listen address of the metrics server [default: 0.0.0.0]
--metricsPort=<p> Listen HTTP port of the metrics server [default: 8008]
--dataDir=<dir> Directory for storing data [default: crawler_data]
@ -35,9 +35,9 @@ type CrawlerConfig* = ref object
bootNodes*: seq[SignedPeerRecord]
proc `$`*(config: CrawlerConfig): string =
"CrawlerConfig:" & " logLevel=" & config.logLevel & " metricsAddress=" &
$config.metricsAddress & " metricsPort=" & $config.metricsPort & " dataDir=" &
config.dataDir & " discPort=" & $config.discPort & " bootNodes=" &
"CrawlerConfig:" & " logLevel=" & config.logLevel & " publicIp=" & config.publicIp &
" metricsAddress=" & $config.metricsAddress & " metricsPort=" & $config.metricsPort &
" dataDir=" & config.dataDir & " discPort=" & $config.discPort & " bootNodes=" &
config.bootNodes.mapIt($it).join(";")
proc getDefaultTestnetBootNodes(): seq[string] =

25
codexcrawler/crawler.nim Normal file
View File

@ -0,0 +1,25 @@
import pkg/chronicles
import pkg/chronos
import ./dht
import ./list
logScope:
topics = "crawler"
type Crawler* = ref object
dht: Dht
todoNodes: List
okNodes: List
nokNodes: List
proc start*(c: Crawler) =
info "Starting crawler..."
proc new*(T: type Crawler, dht: Dht, todoNodes: List, okNodes: List, nokNodes: List): Crawler =
Crawler(
dht: dht,
todoNodes: todoNodes,
okNodes: okNodes,
nokNodes: nokNodes
)

View File

@ -50,7 +50,8 @@ proc setupKey*(path: string): ?!PrivateKey =
if not path.fileAccessible({AccessFlags.Find}):
info "Creating a private key and saving it"
let
res = ?PrivateKey.random(PKScheme.Secp256k1, Rng.instance()[]).mapFailure(KeyError)
res =
?PrivateKey.random(PKScheme.Secp256k1, Rng.instance()[]).mapFailure(KeyError)
bytes = ?res.getBytes().mapFailure(KeyError)
?path.secureWriteFile(bytes).mapFailure(KeyError)