mirror of
https://github.com/logos-storage/logos-storage-nim.git
synced 2026-05-12 14:29:39 +00:00
335 lines
9.4 KiB
Nim
335 lines
9.4 KiB
Nim
## Logos Storage
|
|
## Copyright (c) 2026 Status Research & Development GmbH
|
|
## Licensed under either of
|
|
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
|
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
|
## at your option.
|
|
## This file may not be copied, modified, or distributed except according to
|
|
## those terms.
|
|
|
|
import std/[tables, sets, options, random]
|
|
|
|
import pkg/chronos
|
|
import pkg/libp2p/peerid
|
|
|
|
import ../peers/peerctxstore
|
|
import ../peers/peerstats
|
|
import ../types
|
|
import ../../logutils
|
|
import ./peertracker
|
|
|
|
export peerctxstore, types
|
|
|
|
randomize()
|
|
|
|
logScope:
|
|
topics = "logos-storage swarm"
|
|
|
|
const
|
|
DefaultDeltaMin* = 2
|
|
DefaultDeltaMax* = 16
|
|
DefaultDeltaTarget* = 8
|
|
PeerStaleTimeout* = 30.seconds
|
|
PeerDefaultMaxFailures*: uint32 = 2
|
|
PeerDefaultMaxTimeouts*: uint32 = 5
|
|
ExplorationProbability* = 0.2
|
|
TimeoutPenaltyWeight* = 3.0
|
|
|
|
type
|
|
SwarmPeer* = ref object
|
|
availability*: BlockAvailability
|
|
lastSeen*: Moment
|
|
availabilityUpdated*: Moment
|
|
failureCount*: uint32
|
|
timeoutCount*: uint32
|
|
|
|
SwarmConfig* = object
|
|
deltaMin*: int
|
|
deltaMax*: int
|
|
deltaTarget*: int
|
|
maxPeerFailures*: uint32
|
|
maxPeerTimeouts*: uint32
|
|
|
|
SwarmHealth* = enum
|
|
shHealthy
|
|
shBelowTarget
|
|
shBelowMin
|
|
|
|
PeerSelectionKind* = enum
|
|
pskFound
|
|
pskAtCapacity
|
|
pskNoPeers
|
|
|
|
PeerSelection* = object
|
|
case kind*: PeerSelectionKind
|
|
of pskFound:
|
|
peer*: PeerContext
|
|
of pskAtCapacity, pskNoPeers:
|
|
discard
|
|
|
|
Swarm* = ref object
|
|
config*: SwarmConfig
|
|
peers: Table[PeerId, SwarmPeer]
|
|
removedPeers: HashSet[PeerId]
|
|
|
|
proc new*(T: type SwarmPeer, availability: BlockAvailability): SwarmPeer =
|
|
let now = Moment.now()
|
|
SwarmPeer(
|
|
availability: availability,
|
|
lastSeen: now,
|
|
availabilityUpdated: now,
|
|
failureCount: 0,
|
|
timeoutCount: 0,
|
|
)
|
|
|
|
proc isStale*(peer: SwarmPeer): bool =
|
|
Moment.now() - peer.lastSeen > PeerStaleTimeout
|
|
|
|
proc touch*(peer: SwarmPeer) =
|
|
peer.lastSeen = Moment.now()
|
|
|
|
proc updateAvailability*(peer: SwarmPeer, availability: BlockAvailability) =
|
|
peer.availability = peer.availability.merge(availability)
|
|
peer.availabilityUpdated = Moment.now()
|
|
peer.touch()
|
|
|
|
proc recordFailure*(peer: SwarmPeer) =
|
|
peer.failureCount += 1
|
|
|
|
proc recordTimeout*(peer: SwarmPeer) =
|
|
peer.timeoutCount += 1
|
|
|
|
proc resetFailures*(peer: SwarmPeer) =
|
|
peer.failureCount = 0
|
|
peer.timeoutCount = 0
|
|
|
|
proc defaultConfig*(_: type SwarmConfig): SwarmConfig =
|
|
SwarmConfig(
|
|
deltaMin: DefaultDeltaMin,
|
|
deltaMax: DefaultDeltaMax,
|
|
deltaTarget: DefaultDeltaTarget,
|
|
maxPeerFailures: PeerDefaultMaxFailures,
|
|
maxPeerTimeouts: PeerDefaultMaxTimeouts,
|
|
)
|
|
|
|
proc new*(T: type Swarm, config: SwarmConfig = SwarmConfig.defaultConfig()): Swarm =
|
|
Swarm(
|
|
config: config,
|
|
peers: initTable[PeerId, SwarmPeer](),
|
|
removedPeers: initHashSet[PeerId](),
|
|
)
|
|
|
|
proc addPeer*(swarm: Swarm, peerId: PeerId, availability: BlockAvailability): bool =
|
|
if peerId in swarm.removedPeers:
|
|
return false
|
|
if swarm.peers.len >= swarm.config.deltaMax:
|
|
return false
|
|
swarm.peers[peerId] = SwarmPeer.new(availability)
|
|
true
|
|
|
|
proc removePeer*(swarm: Swarm, peerId: PeerId): Option[SwarmPeer] =
|
|
swarm.peers.withValue(peerId, peer):
|
|
let res = some(peer[])
|
|
swarm.peers.del(peerId)
|
|
return res
|
|
return none(SwarmPeer)
|
|
|
|
proc banPeer*(swarm: Swarm, peerId: PeerId) =
|
|
swarm.peers.del(peerId)
|
|
swarm.removedPeers.incl(peerId)
|
|
|
|
proc getPeer*(swarm: Swarm, peerId: PeerId): Option[SwarmPeer] =
|
|
swarm.peers.withValue(peerId, peer):
|
|
return some(peer[])
|
|
return none(SwarmPeer)
|
|
|
|
proc updatePeerAvailability*(
|
|
swarm: Swarm, peerId: PeerId, availability: BlockAvailability
|
|
) =
|
|
swarm.peers.withValue(peerId, peer):
|
|
peer[].updateAvailability(availability)
|
|
|
|
proc recordPeerFailure*(swarm: Swarm, peerId: PeerId): bool =
|
|
## return true if peer should be removed
|
|
swarm.peers.withValue(peerId, peer):
|
|
peer[].recordFailure()
|
|
return peer[].failureCount >= swarm.config.maxPeerFailures
|
|
return false
|
|
|
|
proc recordPeerTimeout*(swarm: Swarm, peerId: PeerId): bool =
|
|
## return true if peer should be removed
|
|
swarm.peers.withValue(peerId, peer):
|
|
peer[].recordTimeout()
|
|
return peer[].timeoutCount >= swarm.config.maxPeerTimeouts
|
|
return false
|
|
|
|
proc recordBatchSuccess*(
|
|
swarm: Swarm, peer: PeerContext, rttMicros: uint64, totalBytes: uint64
|
|
) =
|
|
swarm.peers.withValue(peer.id, swarmPeer):
|
|
swarmPeer[].resetFailures()
|
|
swarmPeer[].touch()
|
|
peer.stats.recordRequest(rttMicros, totalBytes)
|
|
|
|
proc activePeerCount*(swarm: Swarm): int =
|
|
for peer in swarm.peers.values:
|
|
if not peer.isStale:
|
|
result += 1
|
|
|
|
proc peerCount*(swarm: Swarm): int =
|
|
swarm.peers.len
|
|
|
|
proc peersNeeded*(swarm: Swarm): SwarmHealth =
|
|
let active = swarm.activePeerCount()
|
|
if active < swarm.config.deltaMin:
|
|
shBelowMin
|
|
elif active < swarm.config.deltaTarget:
|
|
shBelowTarget
|
|
else:
|
|
shHealthy
|
|
|
|
proc connectedPeers*(swarm: Swarm): seq[PeerId] =
|
|
for peerId in swarm.peers.keys:
|
|
result.add(peerId)
|
|
|
|
proc peersWithRange*(swarm: Swarm, start: uint64, count: uint64): seq[PeerId] =
|
|
for peerId, peer in swarm.peers:
|
|
if not peer.isStale and peer.availability.hasRange(start, count):
|
|
result.add(peerId)
|
|
|
|
proc peersWithAnyInRange*(swarm: Swarm, start: uint64, count: uint64): seq[PeerId] =
|
|
for peerId, peer in swarm.peers:
|
|
if not peer.isStale and peer.availability.hasAnyInRange(start, count):
|
|
result.add(peerId)
|
|
|
|
proc staleUnknownPeers*(swarm: Swarm): seq[PeerId] =
|
|
for peerId, peer in swarm.peers:
|
|
if peer.isStale and peer.availability.kind == bakUnknown:
|
|
result.add(peerId)
|
|
|
|
proc selectByBDP*(
|
|
peers: seq[PeerContext],
|
|
batchBytes: uint64,
|
|
tracker: PeerInFlightTracker,
|
|
penalties: var Table[PeerId, float],
|
|
explorationProb: float = ExplorationProbability,
|
|
): Option[PeerContext] {.gcsafe, raises: [].} =
|
|
if peers.len == 0:
|
|
return none(PeerContext)
|
|
if peers.len == 1:
|
|
return some(peers[0])
|
|
|
|
var untriedPeers: seq[PeerContext]
|
|
for peer in peers:
|
|
if peer.stats.throughputBps().isNone:
|
|
let
|
|
pipelineDepth = peer.optimalPipelineDepth(batchBytes)
|
|
currentLoad = tracker.count(peer.id)
|
|
if currentLoad < pipelineDepth:
|
|
untriedPeers.add(peer)
|
|
|
|
if untriedPeers.len > 0:
|
|
var
|
|
bestPeer = untriedPeers[0]
|
|
bestLoad = tracker.count(bestPeer.id)
|
|
for i in 1 ..< untriedPeers.len:
|
|
let load = tracker.count(untriedPeers[i].id)
|
|
if load < bestLoad:
|
|
bestLoad = load
|
|
bestPeer = untriedPeers[i]
|
|
return some(bestPeer)
|
|
|
|
let exploreRoll = rand(1.0)
|
|
if exploreRoll < explorationProb:
|
|
var peersWithCapacity: seq[PeerContext]
|
|
for peer in peers:
|
|
let
|
|
pipelineDepth = peer.optimalPipelineDepth(batchBytes)
|
|
currentLoad = tracker.count(peer.id)
|
|
if currentLoad < pipelineDepth:
|
|
peersWithCapacity.add(peer)
|
|
|
|
if peersWithCapacity.len > 0:
|
|
let idx = rand(peersWithCapacity.len - 1)
|
|
return some(peersWithCapacity[idx])
|
|
|
|
var
|
|
bestPeers: seq[PeerContext] = @[peers[0]]
|
|
bestScore = peers[0].evalBDPScore(
|
|
batchBytes, tracker.count(peers[0].id), penalties.getOrDefault(peers[0].id, 0.0)
|
|
)
|
|
|
|
for i in 1 ..< peers.len:
|
|
let score = peers[i].evalBDPScore(
|
|
batchBytes, tracker.count(peers[i].id), penalties.getOrDefault(peers[i].id, 0.0)
|
|
)
|
|
if score < bestScore:
|
|
bestScore = score
|
|
bestPeers = @[peers[i]]
|
|
elif score == bestScore:
|
|
bestPeers.add(peers[i])
|
|
|
|
if bestPeers.len > 1:
|
|
let idx = rand(bestPeers.len - 1)
|
|
return some(bestPeers[idx])
|
|
else:
|
|
return some(bestPeers[0])
|
|
|
|
proc selectPeerForBatch*(
|
|
swarm: Swarm,
|
|
peers: PeerContextStore,
|
|
start: uint64,
|
|
count: uint64,
|
|
batchBytes: uint64,
|
|
tracker: PeerInFlightTracker,
|
|
): PeerSelection =
|
|
var penalties: Table[PeerId, float]
|
|
for peerId, swarmPeer in swarm.peers:
|
|
if swarmPeer.timeoutCount > 0:
|
|
penalties[peerId] = swarmPeer.timeoutCount.float * TimeoutPenaltyWeight
|
|
|
|
let candidates = swarm.peersWithRange(start, count)
|
|
|
|
if candidates.len == 0:
|
|
let partialCandidates = swarm.peersWithAnyInRange(start, count)
|
|
trace "No full range peers, checking partial",
|
|
start = start, count = count, partialPeers = partialCandidates.len
|
|
if partialCandidates.len == 0:
|
|
return PeerSelection(kind: pskNoPeers)
|
|
|
|
var peerCtxs: seq[PeerContext]
|
|
for peerId in partialCandidates:
|
|
let peer = peers.get(peerId)
|
|
if peer.isNil:
|
|
# peer disconnected, remove from swarm immediately
|
|
discard swarm.removePeer(peerId)
|
|
continue
|
|
if tracker.count(peerId) < peer.optimalPipelineDepth(batchBytes):
|
|
peerCtxs.add(peer)
|
|
|
|
if peerCtxs.len == 0:
|
|
return PeerSelection(kind: pskAtCapacity)
|
|
|
|
let selected = selectByBDP(peerCtxs, batchBytes, tracker, penalties)
|
|
if selected.isSome:
|
|
return PeerSelection(kind: pskFound, peer: selected.get())
|
|
return PeerSelection(kind: pskNoPeers)
|
|
|
|
var peerCtxs: seq[PeerContext]
|
|
for peerId in candidates:
|
|
let peer = peers.get(peerId)
|
|
if peer.isNil:
|
|
# peer disconnected - remove from swarm immediately
|
|
discard swarm.removePeer(peerId)
|
|
continue
|
|
if tracker.count(peerId) < peer.optimalPipelineDepth(batchBytes):
|
|
peerCtxs.add(peer)
|
|
|
|
if peerCtxs.len == 0:
|
|
return PeerSelection(kind: pskAtCapacity)
|
|
|
|
let selected = selectByBDP(peerCtxs, batchBytes, tracker, penalties)
|
|
if selected.isSome:
|
|
return PeerSelection(kind: pskFound, peer: selected.get())
|
|
return PeerSelection(kind: pskNoPeers)
|