2023-12-01 16:20:52 +00:00
|
|
|
# Fluffy
|
2024-02-28 17:31:45 +00:00
|
|
|
# Copyright (c) 2022-2024 Status Research & Development GmbH
|
2022-07-26 11:14:56 +00:00
|
|
|
# Licensed and distributed under either of
|
|
|
|
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
|
|
|
|
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
|
|
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
|
2023-01-31 12:38:08 +00:00
|
|
|
{.push raises: [].}
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
import
|
2022-08-09 12:32:41 +00:00
|
|
|
std/math,
|
2022-07-26 11:14:56 +00:00
|
|
|
chronos,
|
|
|
|
eth/p2p/discoveryv5/[node, random2],
|
|
|
|
./wire/portal_protocol,
|
2022-08-09 12:32:41 +00:00
|
|
|
./history/[history_content, history_network],
|
2023-12-01 16:20:52 +00:00
|
|
|
../database/seed_db
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
# Experimental module which implements different content seeding strategies.
|
|
|
|
# Module is oblivious to content stored in seed database as all content related
|
|
|
|
# parameters should be available in seed db i.e (contentId, contentKey, content)
|
|
|
|
# One thing which might need to be parameterized per network basis in the future is
|
|
|
|
# the distance function.
|
|
|
|
# TODO: At this point all calls are one shot calls but we can also experiment with
|
|
|
|
# approaches which start some process which continuously seeds data.
|
|
|
|
# This would require creation of separate object which would manage started task
|
|
|
|
# like:
|
|
|
|
# type NetworkSeedingManager = ref object
|
|
|
|
# seedTask: Future[void]
|
|
|
|
# and creating few procs which would start/stop given seedTask or even few
|
|
|
|
# seed tasks
|
|
|
|
|
2022-08-09 12:32:41 +00:00
|
|
|
const
|
|
|
|
#TODO currently we are using value for history network, but this should be
|
|
|
|
#caluculated per netowork basis
|
|
|
|
maxItemsPerOfferBySize = getMaxOfferedContentKeys(
|
|
|
|
uint32(len(history_network.historyProtocolId)),
|
2024-02-28 17:31:45 +00:00
|
|
|
uint32(history_content.maxContentKeySize),
|
2022-08-09 12:32:41 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
# Offering is restricted to max 64 items
|
|
|
|
maxItemPerOfferByLen = 64
|
|
|
|
|
|
|
|
maxItemsPerOffer = min(maxItemsPerOfferBySize, maxItemPerOfferByLen)
|
|
|
|
|
2022-07-26 11:14:56 +00:00
|
|
|
proc depthContentPropagate*(
|
2024-02-28 17:31:45 +00:00
|
|
|
p: PortalProtocol, seedDbPath: string, maxClosestNodes: uint32
|
|
|
|
): Future[Result[void, string]] {.async.} =
|
2022-07-26 11:14:56 +00:00
|
|
|
## Choses `maxClosestNodes` closest known nodes with known radius and tries to
|
|
|
|
## offer as much content as possible in their range from seed db. Offers are made conccurently
|
|
|
|
## with at most one offer per peer at the time.
|
|
|
|
|
2022-08-09 12:32:41 +00:00
|
|
|
const batchSize = maxItemsPerOffer
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
var gossipWorkers: seq[Future[void]]
|
|
|
|
|
|
|
|
# TODO improve peer selection strategy, to be sure more network is covered, although
|
|
|
|
# it still does not need to be perfect as nodes which receive content will still
|
|
|
|
# propagate it further by neighbour gossip
|
2024-02-28 17:31:45 +00:00
|
|
|
let closestWithRadius =
|
|
|
|
p.getNClosestNodesWithRadius(p.localNode.id, int(maxClosestNodes), seenOnly = true)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
2023-08-24 16:19:29 +00:00
|
|
|
proc worker(
|
2024-02-28 17:31:45 +00:00
|
|
|
p: PortalProtocol, db: SeedDb, node: Node, radius: UInt256
|
|
|
|
): Future[void] {.async.} =
|
2022-07-26 11:14:56 +00:00
|
|
|
var offset = 0
|
|
|
|
while true:
|
|
|
|
let content = db.getContentInRange(node.id, radius, batchSize, offset)
|
|
|
|
|
|
|
|
if len(content) == 0:
|
|
|
|
break
|
|
|
|
|
2023-08-24 16:19:29 +00:00
|
|
|
var contentKV: seq[ContentKV]
|
2022-07-26 11:14:56 +00:00
|
|
|
for e in content:
|
2024-02-28 17:31:45 +00:00
|
|
|
let info =
|
|
|
|
ContentKV(contentKey: ByteList.init(e.contentKey), content: e.content)
|
2023-08-24 16:19:29 +00:00
|
|
|
contentKV.add(info)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
2023-08-24 16:19:29 +00:00
|
|
|
let offerResult = await p.offer(node, contentKV)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
if offerResult.isErr() or len(content) < batchSize:
|
|
|
|
# peer failed or we reached end of database stop offering more content
|
|
|
|
break
|
|
|
|
|
|
|
|
offset = offset + batchSize
|
|
|
|
|
|
|
|
proc saveDataToLocalDb(p: PortalProtocol, db: SeedDb) =
|
|
|
|
let localBatchSize = 10000
|
|
|
|
|
|
|
|
var offset = 0
|
|
|
|
while true:
|
2024-02-28 17:31:45 +00:00
|
|
|
let content =
|
|
|
|
db.getContentInRange(p.localNode.id, p.dataRadius, localBatchSize, offset)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
if len(content) == 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
for e in content:
|
2022-11-08 17:31:45 +00:00
|
|
|
p.storeContent(
|
2024-02-28 17:31:45 +00:00
|
|
|
ByteList.init(e.contentKey), UInt256.fromBytesBE(e.contentId), e.content
|
2022-11-08 17:31:45 +00:00
|
|
|
)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
if len(content) < localBatchSize:
|
|
|
|
# got to the end of db.
|
|
|
|
break
|
|
|
|
|
|
|
|
offset = offset + localBatchSize
|
|
|
|
|
|
|
|
let maybePathAndDbName = getDbBasePathAndName(seedDbPath)
|
|
|
|
|
|
|
|
if maybePathAndDbName.isNone():
|
|
|
|
return err("Provided path is not valid sqlite database path")
|
|
|
|
|
|
|
|
let
|
|
|
|
(dbPath, dbName) = maybePathAndDbName.unsafeGet()
|
|
|
|
db = SeedDb.new(path = dbPath, name = dbName)
|
|
|
|
|
|
|
|
for n in closestWithRadius:
|
|
|
|
gossipWorkers.add(p.worker(db, n[0], n[1]))
|
|
|
|
|
|
|
|
p.saveDataToLocalDb(db)
|
|
|
|
|
|
|
|
await allFutures(gossipWorkers)
|
|
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
return ok()
|
|
|
|
|
2023-08-24 16:19:29 +00:00
|
|
|
func contentDataToKeys(
|
2024-02-28 17:31:45 +00:00
|
|
|
contentData: seq[ContentDataDist]
|
|
|
|
): (Opt[NodeId], ContentKeysList, seq[seq[byte]]) =
|
2022-07-26 11:14:56 +00:00
|
|
|
var contentKeys: seq[ByteList]
|
|
|
|
var content: seq[seq[byte]]
|
|
|
|
for cd in contentData:
|
|
|
|
contentKeys.add(ByteList.init(cd.contentKey))
|
|
|
|
content.add(cd.content)
|
2023-09-04 10:21:01 +00:00
|
|
|
return (Opt.none(NodeId), ContentKeysList(contentKeys), content)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
proc breadthContentPropagate*(
|
2024-02-28 17:31:45 +00:00
|
|
|
p: PortalProtocol, seedDbPath: string
|
|
|
|
): Future[Result[void, string]] {.async.} =
|
2022-07-26 11:14:56 +00:00
|
|
|
## Iterates over whole seed database, and offer batches of content to different
|
|
|
|
## set of nodes
|
|
|
|
|
|
|
|
const concurrentGossips = 20
|
|
|
|
|
|
|
|
const gossipsPerBatch = 5
|
|
|
|
|
|
|
|
var gossipQueue =
|
2023-09-04 10:21:01 +00:00
|
|
|
newAsyncQueue[(Opt[NodeId], ContentKeysList, seq[seq[byte]])](concurrentGossips)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
var gossipWorkers: seq[Future[void]]
|
|
|
|
|
|
|
|
proc gossipWorker(p: PortalProtocol) {.async.} =
|
|
|
|
while true:
|
2023-09-04 10:21:01 +00:00
|
|
|
let (srcNodeId, keys, content) = await gossipQueue.popFirst()
|
2022-07-26 11:14:56 +00:00
|
|
|
|
2023-09-04 10:21:01 +00:00
|
|
|
discard await p.neighborhoodGossip(srcNodeId, keys, content)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
for i in 0 ..< concurrentGossips:
|
|
|
|
gossipWorkers.add(gossipWorker(p))
|
|
|
|
|
|
|
|
let maybePathAndDbName = getDbBasePathAndName(seedDbPath)
|
|
|
|
|
|
|
|
if maybePathAndDbName.isNone():
|
|
|
|
return err("Provided path is not valid sqlite database path")
|
|
|
|
|
|
|
|
let
|
|
|
|
(dbPath, dbName) = maybePathAndDbName.unsafeGet()
|
2022-08-09 12:32:41 +00:00
|
|
|
batchSize = maxItemsPerOffer
|
2022-07-26 11:14:56 +00:00
|
|
|
db = SeedDb.new(path = dbPath, name = dbName)
|
|
|
|
target = p.localNode.id
|
|
|
|
|
|
|
|
var offset = 0
|
|
|
|
|
|
|
|
while true:
|
|
|
|
# Setting radius to `UInt256.high` and using batchSize and offset, means
|
2022-08-09 12:32:41 +00:00
|
|
|
# we will iterate over whole database in batches of `maxItemsPerOffer` items
|
2024-02-28 17:31:45 +00:00
|
|
|
var contentData = db.getContentInRange(target, UInt256.high, batchSize, offset)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
if len(contentData) == 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
for cd in contentData:
|
2022-11-08 17:31:45 +00:00
|
|
|
p.storeContent(
|
2024-02-28 17:31:45 +00:00
|
|
|
ByteList.init(cd.contentKey), UInt256.fromBytesBE(cd.contentId), cd.content
|
2022-11-08 17:31:45 +00:00
|
|
|
)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
# TODO this a bit hacky way to make sure we will engage more valid peers for each
|
|
|
|
# batch of data. This maybe removed after improving neighborhoodGossip
|
|
|
|
# to better chose peers based on propagated content
|
|
|
|
for i in 0 ..< gossipsPerBatch:
|
|
|
|
p.baseProtocol.rng[].shuffle(contentData)
|
|
|
|
let keysWithContent = contentDataToKeys(contentData)
|
|
|
|
await gossipQueue.put(keysWithContent)
|
|
|
|
|
|
|
|
if len(contentData) < batchSize:
|
|
|
|
break
|
|
|
|
|
|
|
|
offset = offset + batchSize
|
|
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
return ok()
|
|
|
|
|
|
|
|
proc offerContentInNodeRange*(
|
2024-02-28 17:31:45 +00:00
|
|
|
p: PortalProtocol, seedDbPath: string, nodeId: NodeId, max: uint32, starting: uint32
|
|
|
|
): Future[PortalResult[int]] {.async.} =
|
2022-07-26 11:14:56 +00:00
|
|
|
## Offers `max` closest elements starting from `starting` index to peer
|
|
|
|
## with given `nodeId`.
|
2022-08-09 12:32:41 +00:00
|
|
|
## Maximum value of `max` is 64 , as this is limit for single offer. Although
|
2022-07-26 11:14:56 +00:00
|
|
|
## `starting` argument is needed as seed_db is read only, so if there is
|
|
|
|
## more content in peer range than max, then to offer 64 closest elements
|
2022-08-09 12:32:41 +00:00
|
|
|
## it needs to be set to 0. To offer next 64 elements it need to be set to
|
|
|
|
## 64 etc.
|
|
|
|
## Return number of items really offered to remote peer.
|
|
|
|
|
|
|
|
let numberToToOffer = min(int(max), maxItemsPerOffer)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
let maybePathAndDbName = getDbBasePathAndName(seedDbPath)
|
|
|
|
|
|
|
|
if maybePathAndDbName.isNone():
|
|
|
|
return err("Provided path is not valid sqlite database path")
|
|
|
|
|
|
|
|
let (dbPath, dbName) = maybePathAndDbName.unsafeGet()
|
|
|
|
|
|
|
|
let maybeNodeAndRadius = await p.resolveWithRadius(nodeId)
|
|
|
|
|
|
|
|
if maybeNodeAndRadius.isNone():
|
|
|
|
return err("Could not find node with provided nodeId")
|
|
|
|
|
|
|
|
let
|
|
|
|
db = SeedDb.new(path = dbPath, name = dbName)
|
|
|
|
(node, radius) = maybeNodeAndRadius.unsafeGet()
|
2024-02-28 17:31:45 +00:00
|
|
|
content =
|
|
|
|
db.getContentInRange(node.id, radius, int64(numberToToOffer), int64(starting))
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
# We got all we wanted from seed_db, it can be closed now.
|
|
|
|
db.close()
|
|
|
|
|
2023-08-24 16:19:29 +00:00
|
|
|
var ci: seq[ContentKV]
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
for cont in content:
|
|
|
|
let k = ByteList.init(cont.contentKey)
|
2023-08-24 16:19:29 +00:00
|
|
|
let info = ContentKV(contentKey: k, content: cont.content)
|
2022-07-26 11:14:56 +00:00
|
|
|
ci.add(info)
|
|
|
|
|
|
|
|
# waiting for offer result, by the end of this call remote node should
|
|
|
|
# have received offered content
|
2022-08-09 12:32:41 +00:00
|
|
|
let offerResult = await p.offer(node, ci)
|
|
|
|
|
|
|
|
if offerResult.isOk():
|
|
|
|
return ok(len(content))
|
|
|
|
else:
|
|
|
|
return err(offerResult.error)
|
|
|
|
|
2022-07-26 11:14:56 +00:00
|
|
|
proc storeContentInNodeRange*(
|
2024-02-28 17:31:45 +00:00
|
|
|
p: PortalProtocol, seedDbPath: string, max: uint32, starting: uint32
|
|
|
|
): PortalResult[void] =
|
2022-07-26 11:14:56 +00:00
|
|
|
let maybePathAndDbName = getDbBasePathAndName(seedDbPath)
|
|
|
|
|
|
|
|
if maybePathAndDbName.isNone():
|
|
|
|
return err("Provided path is not valid sqlite database path")
|
|
|
|
|
|
|
|
let (dbPath, dbName) = maybePathAndDbName.unsafeGet()
|
|
|
|
|
|
|
|
let
|
|
|
|
localRadius = p.dataRadius
|
|
|
|
db = SeedDb.new(path = dbPath, name = dbName)
|
|
|
|
localId = p.localNode.id
|
2024-02-28 17:31:45 +00:00
|
|
|
contentInRange =
|
|
|
|
db.getContentInRange(localId, localRadius, int64(max), int64(starting))
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
|
|
for contentData in contentInRange:
|
|
|
|
let cid = UInt256.fromBytesBE(contentData.contentId)
|
2024-02-28 17:31:45 +00:00
|
|
|
p.storeContent(ByteList.init(contentData.contentKey), cid, contentData.content)
|
2022-07-26 11:14:56 +00:00
|
|
|
|
|
|
|
return ok()
|