Chrysostomos Nanakos 8c9aec6ca2
fix: validate WantBlocks request ranges to prevent DoS
Signed-off-by: Chrysostomos Nanakos <chris@include.gr>
2026-05-06 18:20:52 +03:00

1195 lines
38 KiB
Nim

## Logos Storage
## Copyright (c) 2021 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import std/[sequtils, sets, options, algorithm, sugar, tables, random]
import pkg/chronos
import pkg/libp2p/[cid, switch, multihash, multicodec]
import pkg/metrics
import pkg/questionable
import pkg/questionable/results
import pkg/stew/shims/sets
import ../../stores/blockstore
import ../../errors
import ../../blocktype
import ../../utils
import ../../utils/trackedfutures
import ../../merkletree
import ../../manifest
import ../../logutils
import ../protocol/message
import ../protocol/presence
import ../protocol/constants
import ../network
import ../peers
import ../utils as bexutils
import ./discovery
import ./advertiser
import ./downloadmanager
import ./peertracker
import ./swarm
import ./scheduler
export peers, downloadmanager, discovery, swarm, scheduler
logScope:
topics = "storage blockexcengine"
declareCounter(
storage_block_exchange_want_have_lists_received,
"storage blockexchange wantHave lists received",
)
declareCounter(storage_block_exchange_blocks_sent, "storage blockexchange blocks sent")
declareCounter(
storage_block_exchange_blocks_received, "storage blockexchange blocks received"
)
declareCounter(
storage_block_exchange_discovery_requests_total,
"Total number of peer discovery requests sent",
)
declareCounter(
storage_block_exchange_peer_timeouts_total, "Total number of peer activity timeouts"
)
declareCounter(
storage_block_exchange_requests_failed_total,
"Total number of block requests that failed after exhausting retries",
)
const
# Don't do more than one discovery request per `DiscoveryRateLimit` seconds.
DiscoveryRateLimit = 3.seconds
PeerTrackerSweepInterval = 15.seconds
type
BlockExcEngine* = ref object of RootObj
localStore*: BlockStore # Local block store for this instance
network*: BlockExcNetwork
peers*: PeerContextStore # Peers we're currently actively exchanging with
trackedFutures: TrackedFutures # Tracks futures of blockexc tasks
blockexcRunning: bool # Indicates if the blockexc task is running
downloadManager*: DownloadManager
discovery*: DiscoveryEngine
advertiser*: Advertiser
lastDiscRequest: Moment # Time of last discovery request
selectionPolicy*: SelectionPolicy # Block selection policy for block scheduling
activeDownloads*: HashSet[uint64] # Track running download workers by download ID
DownloadHandleGeneric*[T] = object
treeCid*: Cid
downloadId*: uint64
iter*: SafeAsyncIter[T]
completionFuture*: Future[?!void].Raising([CancelledError])
DownloadHandle* = DownloadHandleGeneric[Block]
DownloadHandleOpaque* = DownloadHandleGeneric[void]
proc waitForComplete*[T](
h: DownloadHandleGeneric[T]
): Future[?!void] {.async: (raises: [CancelledError]).} =
return await h.completionFuture
proc requestWantBlocks*(
self: BlockExcEngine, peer: PeerId, blockRange: BlockRange
): Future[WantBlocksResult[seq[BlockDeliveryView]]] {.
async: (raises: [CancelledError])
.}
proc downloadWorker(
self: BlockExcEngine, download: ActiveDownload
) {.async: (raises: []).}
proc ensureDownloadWorker(
self: BlockExcEngine, download: ActiveDownload
) {.gcsafe, raises: [].}
proc startDownload(
self: BlockExcEngine, desc: DownloadDesc
): ActiveDownload {.gcsafe, raises: [].}
proc startDownload(
self: BlockExcEngine, desc: DownloadDesc, missingBlocks: seq[uint64]
): ActiveDownload {.gcsafe, raises: [].}
proc peerTrackerSweepLoop(self: BlockExcEngine) {.async: (raises: []).} =
try:
while self.blockexcRunning:
await sleepAsync(PeerTrackerSweepInterval)
await self.downloadManager.peerTracker.sweep()
except CancelledError:
discard
except CatchableError as exc:
warn "Peer tracker sweep loop failed", err = exc.msg
proc start*(self: BlockExcEngine) {.async: (raises: []).} =
## Start the blockexc task
##
await self.discovery.start()
await self.advertiser.start()
if self.blockexcRunning:
warn "Starting blockexc twice"
return
self.blockexcRunning = true
self.trackedFutures.track(self.peerTrackerSweepLoop())
proc stop*(self: BlockExcEngine) {.async: (raises: []).} =
## Stop the blockexc
##
await self.trackedFutures.cancelTracked()
await self.network.stop()
await self.discovery.stop()
await self.advertiser.stop()
trace "NetworkStore stop"
if not self.blockexcRunning:
warn "Stopping blockexc without starting it"
return
self.blockexcRunning = false
trace "NetworkStore stopped"
proc searchForNewPeers(self: BlockExcEngine, cid: Cid) =
if self.lastDiscRequest + DiscoveryRateLimit < Moment.now():
trace "Searching for new peers for", cid = cid
storage_block_exchange_discovery_requests_total.inc()
self.lastDiscRequest = Moment.now() # always refresh before calling await
self.discovery.queueFindBlocksReq(@[cid])
else:
trace "Not searching for new peers, rate limit not expired", cid = cid
proc banAndDropPeer(
self: BlockExcEngine, download: ActiveDownload, peerId: PeerId
) {.async: (raises: [CancelledError]).} =
download.ctx.swarm.banPeer(peerId)
download.handlePeerFailure(peerId)
await self.network.dropPeer(peerId)
proc evictPeer(self: BlockExcEngine, peer: PeerId) =
## Cleanup disconnected peer
##
trace "Evicting disconnected/departed peer", peer
self.peers.remove(peer)
self.downloadManager.peerTracker.clearPeer(peer)
proc validateBlockDeliveryView(self: BlockExcEngine, view: BlockDeliveryView): ?!void =
without proof =? view.proof:
return failure("Missing proof")
if proof.index != view.address.index:
return failure(
"Proof index " & $proof.index & " doesn't match leaf index " & $view.address.index
)
without expectedMhash =? view.cid.mhash.mapFailure, err:
return failure("Unable to get mhash from cid for block, nested err: " & err.msg)
without computedMhash =?
MultiHash.digest(
$expectedMhash.mcodec,
view.sharedBuf.data.toOpenArray(
view.dataOffset, view.dataOffset + view.dataLen - 1
),
).mapFailure, err:
return failure("Unable to compute hash of block data, nested err: " & err.msg)
if computedMhash != expectedMhash:
return failure("Block data hash doesn't match claimed CID")
without treeRoot =? view.address.treeCid.mhash.mapFailure, err:
return failure("Unable to get mhash from treeCid for block, nested err: " & err.msg)
if err =? proof.verify(computedMhash, treeRoot).errorOption:
return failure("Unable to verify proof for block, nested err: " & err.msg)
return success()
proc sendWantBlocksRequest(
self: BlockExcEngine,
download: ActiveDownload,
start: uint64,
count: uint64,
missingIndices: seq[uint64],
peer: PeerContext,
): Future[void] {.async: (raises: [CancelledError]).} =
if download.cancelled:
return
let treeCid = download.treeCid
# missingIndices must be sorted ascending with no duplicates for correct coalescing
var ranges: seq[tuple[start: uint64, count: uint64]] = @[]
if missingIndices.len > 0:
var
rangeStart = missingIndices[0]
rangeCount: uint64 = 1
for i in 1 ..< missingIndices.len:
if missingIndices[i] == rangeStart + rangeCount:
rangeCount += 1
else:
ranges.add((rangeStart, rangeCount))
rangeStart = missingIndices[i]
rangeCount = 1
ranges.add((rangeStart, rangeCount))
trace "Requesting missing blocks",
treeCid = treeCid,
originalRange = $(start, count),
missing = missingIndices.len,
ranges = ranges.len,
peer = peer.id
let
requestStartTime = Moment.now()
requestResult = await self.requestWantBlocks(
peer.id, BlockRange(treeCid: treeCid, ranges: ranges)
)
rttMicros = (Moment.now() - requestStartTime).microseconds.uint64
if download.cancelled:
return
# request might have timed-out and have been requeued to another peer
# if yes, then discard response, it's already handled.
download.pendingBatches.withValue(start, pending):
if pending[].peerId != peer.id:
# discard it, was reassigned
return
do:
# either completed or requeued
return
if requestResult.isErr:
warn "Batch request failed", peer = peer.id, error = requestResult.error.msg
let swarm = download.ctx.swarm
if swarm.recordPeerFailure(peer.id):
warn "Peer exceeded max failures, removing from swarm", peer = peer.id
if swarm.removePeer(peer.id).isNone:
trace "Peer was not in swarm", peer = peer.id
download.handlePeerFailure(peer.id)
else:
# we can requeue immediately (cancels timeout), no benefit waiting for timeout.
download.requeueBatch(start, count, front = true)
return
let allBlockViews = requestResult.get
if allBlockViews.len == 0:
trace "Peer responded with zero blocks", peer = peer.id, treeCid = treeCid
download.requeueBatch(start, count, front = false)
return
trace "Received batch response",
treeCid = treeCid,
originalRange = $(start, count),
received = allBlockViews.len,
requested = missingIndices.len,
peer = peer.id
var
totalBytes: uint64 = 0
validCount: int = 0
receivedIndices: HashSet[uint64]
for view in allBlockViews:
if not bexutils.isIndexInRanges(
view.address.index.uint64, ranges, sortedRanges = true
):
warn "Received unrequested block", index = view.address.index, ranges = ranges.len
continue
if view.address.index.uint64 >= download.ctx.totalBlocks:
warn "Received block with out-of-bounds index - banning peer",
index = view.address.index,
totalBlocks = download.ctx.totalBlocks,
peer = peer.id
await self.banAndDropPeer(download, peer.id)
return
if err =? self.validateBlockDeliveryView(view).errorOption:
error "Block validation failed - corrupted data from peer",
address = view.address, msg = err.msg, peer = peer.id
warn "Banning peer for sending corrupted block data", peer = peer.id
await self.banAndDropPeer(download, peer.id)
return
let
bd = view.toBlockDelivery()
putResult = await self.localStore.putBlock(bd.blk)
if putResult.isErr:
warn "Failed to store block", address = bd.address, error = putResult.error.msg
continue
let proofResult = await self.localStore.putCidAndProof(
bd.address.treeCid, bd.address.index, bd.blk.cid, bd.proof.get
)
if proofResult.isErr:
warn "Failed to store proof", address = bd.address
discard await self.localStore.delBlock(bd.blk.cid)
continue
totalBytes += bd.blk.data[].len.uint64
validCount += 1
receivedIndices.incl(bd.address.index.uint64)
if bd.address in download.blocks:
discard download.completeWantHandle(bd.address, some(bd.blk))
storage_block_exchange_blocks_received.inc(validCount.int64)
download.ctx.swarm.recordBatchSuccess(peer, rttMicros, totalBytes)
if validCount < missingIndices.len:
trace "Peer delivered partial batch, computing missing ranges",
peer = peer.id, requested = missingIndices.len, received = validCount
var stillMissing: seq[uint64]
for idx in missingIndices:
if idx notin receivedIndices:
stillMissing.add(idx)
if stillMissing.len > 0:
var penaltyAddresses: seq[BlockAddress]
let peerAvail = download.ctx.swarm.getPeer(peer.id)
for idx in stillMissing:
if peerAvail.isSome and peerAvail.get().availability.hasRange(idx, 1):
penaltyAddresses.add(download.makeBlockAddress(idx))
let exhausted = download.decrementBlockRetries(penaltyAddresses)
if exhausted.len > 0:
warn "Blocks exhausted retries after partial delivery",
treeCid = treeCid, exhaustedCount = exhausted.len
download.failExhaustedBlocks(exhausted)
let exhaustedIndices = exhausted.mapIt(it.index.uint64).toHashSet
stillMissing = stillMissing.filterIt(it notin exhaustedIndices)
var missingRanges: seq[tuple[start: uint64, count: uint64]] = @[]
if stillMissing.len > 0:
stillMissing.sort()
var
rangeStart = stillMissing[0]
rangeCount: uint64 = 1
for i in 1 ..< stillMissing.len:
if stillMissing[i] == rangeStart + rangeCount:
rangeCount += 1
else:
missingRanges.add((rangeStart, rangeCount))
rangeStart = stillMissing[i]
rangeCount = 1
missingRanges.add((rangeStart, rangeCount))
trace "Partial batch completion - requeuing missing ranges",
treeCid = treeCid,
originalStart = start,
originalCount = count,
received = validCount,
missingRanges = missingRanges.len
download.partialCompleteBatch(
start, count, validCount.uint64, missingRanges, totalBytes
)
else:
download.completeBatch(start, validCount.uint64, totalBytes)
proc ensureDownloadWorker(
self: BlockExcEngine, download: ActiveDownload
) {.gcsafe, raises: [].} =
let id = download.id
if id in self.activeDownloads:
return
self.activeDownloads.incl(id)
proc wrappedDownloadWorker() {.async: (raises: []).} =
try:
await self.downloadWorker(download)
finally:
self.activeDownloads.excl(id)
self.trackedFutures.track(wrappedDownloadWorker())
proc startDownload(
self: BlockExcEngine, desc: DownloadDesc
): ActiveDownload {.gcsafe, raises: [].} =
result = self.downloadManager.startDownload(desc)
self.ensureDownloadWorker(result)
proc startDownload(
self: BlockExcEngine, desc: DownloadDesc, missingBlocks: seq[uint64]
): ActiveDownload {.gcsafe, raises: [].} =
result = self.downloadManager.startDownload(desc, missingBlocks)
self.ensureDownloadWorker(result)
proc broadcastWantHave(
self: BlockExcEngine,
download: ActiveDownload,
start: uint64,
count: uint64,
peers: seq[PeerContext],
) {.async: (raises: [CancelledError]).} =
let rangeAddress = BlockAddress.init(download.treeCid, start.int)
for peerCtx in peers:
if not download.addPeerIfAbsent(peerCtx.id, BlockAvailability.unknown()):
# skip presence request for peer with Complete availability
continue
try:
await self.network.request
.sendWantList(
peerCtx.id,
@[rangeAddress],
priority = 0,
cancel = false,
wantType = WantType.WantHave,
full = false,
sendDontHave = false,
rangeCount = count,
downloadId = download.id,
)
.wait(DefaultWantHaveSendTimeout)
except AsyncTimeoutError:
warn "Want-have send timed out", peer = peerCtx.id
except CatchableError as err:
warn "Want-have send failed", peer = peerCtx.id, error = err.msg
proc downloadWorker(
self: BlockExcEngine, download: ActiveDownload
) {.async: (raises: []).} =
## Continuously schedules batches to peers until download completes.
## Supports concurrent batch requests per peer based on BDP pipeline depth.
let
treeCid = download.treeCid
retryInterval = self.downloadManager.retryInterval
logScope:
treeCid = treeCid
try:
let
(windowStart, windowCount) = download.ctx.currentPresenceWindow()
maxSwarmPeers = download.ctx.swarm.config.deltaMax
var connectedPeers = self.peers.toSeq()
if connectedPeers.len > maxSwarmPeers:
shuffle(connectedPeers)
connectedPeers.setLen(maxSwarmPeers)
if connectedPeers.len > 0:
trace "Initial presence window broadcast",
treeCid = treeCid,
windowStart = windowStart,
windowCount = windowCount,
totalBlocks = download.ctx.totalBlocks,
peerCount = connectedPeers.len
await self.broadcastWantHave(download, windowStart, windowCount, connectedPeers)
trace "Initial broadcast sent, proceeding to batch loop"
else:
trace "No connected peers for initial broadcast, triggering discovery"
self.searchForNewPeers(download.manifestCid)
while not download.cancelled and not download.isDownloadComplete():
let ctx = download.ctx
if not download.fetchLocal and ctx.needsNextPresenceWindow():
let (newStart, newCount) = ctx.advancePresenceWindow()
ctx.trimPresenceBeforeWatermark()
# Broadcast want-have for the new window to swarm peers only
var swarmPeers: seq[PeerContext] = @[]
for peerId in ctx.swarm.connectedPeers():
let peerCtx = self.peers.get(peerId)
if not peerCtx.isNil:
swarmPeers.add(peerCtx)
trace "Advancing presence window",
treeCid = treeCid,
newWindowStart = newStart,
newWindowCount = newCount,
watermark = ctx.scheduler.completedWatermark(),
swarmPeers = swarmPeers.len
await self.broadcastWantHave(download, newStart, newCount, swarmPeers)
# Broadcast availability to peers
if not download.fetchLocal and ctx.shouldBroadcastAvailability():
let broadcastRanges = ctx.getAvailabilityBroadcast()
if broadcastRanges.len > 0:
trace "Broadcasting availability to swarm",
treeCid = treeCid,
rangeCount = broadcastRanges.len,
swarmPeers = ctx.swarm.peerCount()
let presence = BlockPresence(
address: BlockAddress(treeCid: treeCid, index: broadcastRanges[0].start.int),
kind: BlockPresenceType.HaveRange,
ranges: broadcastRanges,
)
for peerId in ctx.swarm.connectedPeers():
let peerOpt = ctx.swarm.getPeer(peerId)
if peerOpt.isSome and peerOpt.get().availability.kind == bakComplete:
continue
try:
await self.network.request.sendPresence(peerId, @[presence]).wait(
DefaultWantHaveSendTimeout
)
except AsyncTimeoutError:
trace "Availability broadcast send timed out", peer = peerId
except CatchableError as err:
trace "Failed to broadcast availability", peer = peerId, error = err.msg
ctx.markAvailabilityBroadcasted()
let batchOpt = self.downloadManager.getNextBatch(download)
if batchOpt.isNone:
let pendingBatchCount = download.pendingBatchCount()
if pendingBatchCount == 0 and download.isDownloadComplete():
break
await sleepAsync(100.milliseconds)
continue
let (start, count) = batchOpt.get()
logScope:
batchStart = start
batchCount = count
var
missingIndices: seq[uint64] = @[]
localBlockCount: uint64 = 0
bailFetchLocal = false
block localScan:
var lastIdle = Moment.now()
let runtimeQuota = 100.milliseconds
for i in start ..< start + count:
if (Moment.now() - lastIdle) >= runtimeQuota:
await idleAsync()
lastIdle = Moment.now()
let address = download.makeBlockAddress(i)
if download.isBlockExhausted(address):
continue
let exists = await address in self.localStore
var missing = not exists
if exists:
let blkResult = await self.localStore.getBlock(address)
if blkResult.isOk:
localBlockCount += 1
if address in download.blocks:
discard download.completeWantHandle(address, some(blkResult.get))
else:
missing = true
if missing:
if download.fetchLocal:
download.failLocalMissing(address)
bailFetchLocal = true
break localScan
missingIndices.add(i)
if bailFetchLocal:
continue
if missingIndices.len == 0:
download.completeBatchLocal(start, localBlockCount)
continue
if download.cancelled or download.fetchLocal:
continue
let swarm = download.ctx.swarm
var shouldBroadcast = false
if swarm.peersNeeded() != shHealthy:
self.searchForNewPeers(download.manifestCid)
if swarm.peersWithRange(start, count).len == 0:
shouldBroadcast = true
if shouldBroadcast:
let connectedPeers = self.peers.toSeq()
if connectedPeers.len > 0:
trace "Broadcasting want-have for batch range",
treeCid = treeCid,
start = start,
count = count,
peerCount = connectedPeers.len
await self.broadcastWantHave(download, start, count, connectedPeers)
# Give peers a short time to respond with presence
await sleepAsync(50.milliseconds)
else:
await download.handleBatchRetry(start, count, retryInterval)
continue
if self.peers.len == 0:
await download.handleBatchRetry(start, count, DiscoveryRateLimit)
continue
let staleUnknown = swarm.staleUnknownPeers()
if staleUnknown.len > 0:
let rangeAddress = download.makeBlockAddress(start)
trace "Re-querying stale unknown peers",
treeCid = treeCid,
staleUnknownCount = staleUnknown.len,
batchStart = start,
batchCount = count
for peerId in staleUnknown:
try:
await self.network.request
.sendWantList(
peerId,
@[rangeAddress],
priority = 0,
cancel = false,
wantType = WantType.WantHave,
full = false,
sendDontHave = false,
rangeCount = count,
downloadId = download.id,
)
.wait(DefaultWantHaveSendTimeout)
except AsyncTimeoutError:
trace "Re-query stale unknown peer send timed out", peer = peerId
except CatchableError as err:
trace "Failed to re-query stale unknown peer",
peer = peerId, error = err.msg
await sleepAsync(50.milliseconds)
let
batchBytes = download.ctx.batchBytes
selection = swarm.selectPeerForBatch(
self.peers, start, count, batchBytes, self.downloadManager.peerTracker
)
if selection.kind == pskNoPeers:
trace "No peer with range, searching for new peers"
let
hasActivePeers = swarm.activePeerCount() > 0
waitTime = if hasActivePeers: retryInterval else: DiscoveryRateLimit
await download.handleBatchRetry(start, count, waitTime)
continue
if selection.kind == pskAtCapacity:
download.requeueBatch(start, count, front = false)
await sleepAsync(10.milliseconds)
continue
let peer = selection.peer
download.markBatchInFlight(start, count, localBlockCount, peer.id)
let batchFuture =
self.sendWantBlocksRequest(download, start, count, missingIndices, peer)
self.downloadManager.peerTracker.track(peer.id, batchFuture)
download.setBatchRequestFuture(start, batchFuture)
let timeout = download.ctx.batchTimeout(peer, count)
proc batchTimeoutHandler(dl: ActiveDownload) {.async: (raises: []).} =
try:
await sleepAsync(timeout)
except CancelledError:
return
if dl.cancelled:
return
dl.pendingBatches.withValue(start, pending):
if pending[].peerId == peer.id:
trace "Batch timed out", peer = peer.id, start = start, count = count
storage_block_exchange_peer_timeouts_total.inc()
let swarm = dl.ctx.swarm
if swarm.recordPeerTimeout(peer.id):
warn "Peer exceeded max timeouts, removing from swarm", peer = peer.id
discard swarm.removePeer(peer.id)
let
addresses = dl.getBlockAddressesForRange(start, count)
exhausted = dl.decrementBlockRetries(addresses)
if exhausted.len > 0:
warn "Blocks exhausted retries after timeout",
treeCid = treeCid, exhaustedCount = exhausted.len
dl.failExhaustedBlocks(exhausted)
let reqFuture = pending[].requestFuture
dl.requeueBatch(start, count, front = true)
if not reqFuture.isNil and not reqFuture.finished:
reqFuture.cancelSoon()
let timeoutFut = batchTimeoutHandler(download)
self.trackedFutures.track(timeoutFut)
download.setBatchTimeoutFuture(start, timeoutFut)
await sleepAsync(10.milliseconds)
except CancelledError:
trace "Batch download loop cancelled"
except CatchableError as exc:
error "Error in batch download loop", err = exc.msg
proc toDownloadDesc*(
md: ManifestDescriptor,
selectionPolicy: SelectionPolicy = spSequential,
isBackground: bool = false,
fetchLocal: bool = false,
): DownloadDesc =
DownloadDesc(
md: md,
startIndex: 0,
count: md.manifest.blocksCount.uint64,
selectionPolicy: selectionPolicy,
isBackground: isBackground,
fetchLocal: fetchLocal,
)
proc startTreeDownloadGeneric[T: Block | void](
self: BlockExcEngine,
md: ManifestDescriptor,
selectionPolicy: SelectionPolicy = spSequential,
isBackground: bool = false,
fetchLocal: bool = false,
): ?!DownloadHandleGeneric[T] =
## - T = Block: Returns actual block data (for streaming)
## - T = void: Returns success/failure only (for prefetching)
let
desc = toDownloadDesc(
md,
selectionPolicy = selectionPolicy,
isBackground = isBackground,
fetchLocal = fetchLocal,
)
activeDownload = self.startDownload(desc)
treeCid = md.manifest.treeCid
totalBlocks = md.manifest.blocksCount.uint64
when T is Block:
trace "Started tree block download", treeCid = treeCid, totalBlocks = totalBlocks
when T is void:
type HandleT = BlockHandleOpaque
else:
type HandleT = BlockHandle
var
pendingHandle: Option[HandleT] = none(HandleT)
nextBlockToRequest: uint64 = 0
proc isFinished(): bool =
nextBlockToRequest >= totalBlocks and pendingHandle.isNone
proc genNext(): Future[?!T] {.async: (raises: [CancelledError]).} =
while pendingHandle.isNone and nextBlockToRequest < totalBlocks:
let address = BlockAddress(treeCid: treeCid, index: nextBlockToRequest.int)
nextBlockToRequest += 1
let handle =
when T is void:
activeDownload.getWantHandleOpaque(address)
else:
activeDownload.getWantHandle(address)
when T is void:
let exists =
try:
await address in self.localStore
except CatchableError:
false
if exists:
discard activeDownload.completeWantHandle(address)
elif fetchLocal:
handle.cancelSoon()
return failure(
newException(BlockNotFoundError, "Block not found locally: " & $address)
)
else:
let blkResult = await self.localStore.getBlock(address)
if blkResult.isOk:
discard activeDownload.completeWantHandle(address, some(blkResult.get))
elif not (blkResult.error of BlockNotFoundError) or fetchLocal:
handle.cancelSoon()
return failure(blkResult.error)
pendingHandle = some(handle)
if pendingHandle.isNone:
return failure("No more blocks")
let handle = pendingHandle.get()
pendingHandle = none(HandleT)
let blkResult = await handle
if blkResult.isOk:
activeDownload.markBlockReturned()
return blkResult
success DownloadHandleGeneric[T](
treeCid: treeCid,
downloadId: activeDownload.id,
iter: SafeAsyncIter[T].new(genNext, isFinished),
completionFuture: activeDownload.completionFuture,
)
proc startTreeDownload*(
self: BlockExcEngine, md: ManifestDescriptor, fetchLocal: bool = false
): ?!DownloadHandle =
startTreeDownloadGeneric[Block](self, md, fetchLocal = fetchLocal)
proc startTreeDownloadOpaque*(
self: BlockExcEngine,
md: ManifestDescriptor,
selectionPolicy: SelectionPolicy = spSequential,
isBackground: bool = false,
fetchLocal: bool = false,
): ?!DownloadHandleOpaque =
startTreeDownloadGeneric[void](
self,
md,
selectionPolicy = selectionPolicy,
isBackground = isBackground,
fetchLocal = fetchLocal,
)
proc releaseDownload*[T](self: BlockExcEngine, handle: DownloadHandleGeneric[T]) =
self.downloadManager.releaseDownload(handle.downloadId, handle.treeCid)
proc cancelDownload*(self: BlockExcEngine, treeCid: Cid) =
self.downloadManager.cancelDownload(treeCid)
proc cancelBackgroundDownload*(
self: BlockExcEngine, downloadId: uint64, treeCid: Cid
): bool =
self.downloadManager.cancelBackgroundDownload(downloadId, treeCid)
proc getDownloadProgress*(
self: BlockExcEngine, downloadId: uint64, treeCid: Cid
): Option[DownloadProgress] =
self.downloadManager.getDownloadProgress(downloadId, treeCid)
proc blockPresenceHandler*(
self: BlockExcEngine, peer: PeerId, blocks: seq[BlockPresence]
) {.async: (raises: []).} =
trace "Received block presence from peer", peer, len = blocks.len
let peerCtx = self.peers.get(peer)
if peerCtx.isNil:
return
for blk in blocks:
if presence =? Presence.init(blk):
if presence.have:
let
treeCid = presence.address.treeCid
downloadOpt = self.downloadManager.getDownload(blk.downloadId, treeCid)
if downloadOpt.isSome:
let availability =
case presence.presenceType
of BlockPresenceType.Complete:
BlockAvailability.complete()
of BlockPresenceType.HaveRange:
if presence.ranges.len > 0:
BlockAvailability.fromRanges(presence.ranges)
else:
BlockAvailability.unknown()
of BlockPresenceType.DontHave:
BlockAvailability.unknown()
downloadOpt.get().updatePeerAvailability(peer, availability)
# try to propagate peer availability to other downloads for the same tree CID
self.downloadManager.downloads.withValue(treeCid, innerTable):
for otherId, otherDownload in innerTable[]:
if otherId != blk.downloadId:
otherDownload.updatePeerAvailability(peer, availability)
proc wantListHandler*(
self: BlockExcEngine, peer: PeerId, wantList: WantList
) {.async: (raises: []).} =
trace "Received want list from peer", peer, entries = wantList.entries.len
let peerCtx = self.peers.get(peer)
if peerCtx.isNil:
return
var presence: seq[BlockPresence]
try:
for e in wantList.entries:
storage_block_exchange_want_have_lists_received.inc()
if e.rangeCount > 0:
let
startIdx = e.address.index.uint64
count = e.rangeCount
treeCid = e.address.treeCid
if count > MaxPresenceWindowBlocks:
warn "Rejecting oversized range query",
peer = peer, treeCid = treeCid, count = count, max = MaxPresenceWindowBlocks
continue
trace "Processing range query",
treeCid = treeCid, start = startIdx, count = count
let runtimeQuota = 100.milliseconds
var
ranges: seq[tuple[start: uint64, count: uint64]] = @[]
rangeStart: uint64 = 0
inRange = false
lastIdle = Moment.now()
for i in 0'u64 ..< count:
if (Moment.now() - lastIdle) >= runtimeQuota:
await idleAsync()
lastIdle = Moment.now()
let address = BlockAddress(treeCid: treeCid, index: (startIdx + i).int)
let have =
try:
await address in self.localStore
except CatchableError:
false
if have:
if not inRange:
rangeStart = startIdx + i
inRange = true
else:
if inRange:
ranges.add((rangeStart, (startIdx + i) - rangeStart))
inRange = false
if inRange:
ranges.add((rangeStart, (startIdx + count) - rangeStart))
if ranges.len > 0:
trace "Have blocks in range", treeCid = treeCid, ranges = ranges
presence.add(
BlockPresence(
address: e.address,
kind: BlockPresenceType.HaveRange,
ranges: ranges,
downloadId: e.downloadId,
)
)
else:
trace "Don't have range", treeCid = treeCid, start = startIdx, count = count
if e.sendDontHave:
presence.add(
BlockPresence(
address: e.address,
kind: BlockPresenceType.DontHave,
downloadId: e.downloadId,
)
)
else:
let have =
try:
await e.address in self.localStore
except CatchableError:
false
if have:
presence.add(
BlockPresence(
address: e.address,
kind: BlockPresenceType.HaveRange,
ranges: @[(e.address.index.uint64, 1'u64)],
downloadId: e.downloadId,
)
)
elif e.sendDontHave:
presence.add(
BlockPresence(
address: e.address,
kind: BlockPresenceType.DontHave,
downloadId: e.downloadId,
)
)
if presence.len > 0:
trace "Sending presence to remote", items = presence.len
try:
await self.network.request.sendPresence(peer, presence).wait(
DefaultWantHaveSendTimeout
)
except AsyncTimeoutError:
warn "Presence response send timed out", peer = peer
except CancelledError as exc:
warn "Want list handling cancelled", error = exc.msg
proc peerAddedHandler*(
self: BlockExcEngine, peer: PeerId
) {.async: (raises: [CancelledError]).} =
## Perform initial setup, such as want
## list exchange
##
trace "Setting up peer", peer
if peer notin self.peers:
let peerCtx = PeerContext.new(peer)
self.peers.add(peerCtx)
proc localLookup(
self: BlockExcEngine, address: BlockAddress
): Future[?!BlockDelivery] {.async: (raises: [CancelledError]).} =
(await self.localStore.getBlockAndProof(address.treeCid, address.index)).map(
(blkAndProof: (Block, StorageMerkleProof)) =>
BlockDelivery(address: address, blk: blkAndProof[0], proof: blkAndProof[1].some)
)
proc requestWantBlocks*(
self: BlockExcEngine, peer: PeerId, blockRange: BlockRange
): Future[WantBlocksResult[seq[BlockDeliveryView]]] {.
async: (raises: [CancelledError])
.} =
let response = ?await self.network.sendWantBlocksRequest(peer, blockRange)
var blockViews: seq[BlockDeliveryView]
for btBlock in response.blocks:
let viewResult =
toBlockDeliveryView(btBlock, response.treeCid, response.sharedBuffer)
if viewResult.isOk:
blockViews.add(viewResult.get)
else:
warn "Failed to convert block entry to view", error = viewResult.error.msg
if blockViews.len == 0:
trace "Request succeeded but received zero blocks",
peer = peer,
treeCid = blockRange.treeCid,
rangeCount = blockRange.ranges.len,
responseBlockCount = response.blocks.len
return ok(blockViews)
proc new*(
T: type BlockExcEngine,
localStore: BlockStore,
network: BlockExcNetwork,
discovery: DiscoveryEngine,
advertiser: Advertiser,
peerStore: PeerContextStore,
downloadManager: DownloadManager,
selectionPolicy = spSequential,
): BlockExcEngine =
## Create new block exchange engine instance
##
let self = BlockExcEngine(
localStore: localStore,
peers: peerStore,
downloadManager: downloadManager,
network: network,
trackedFutures: TrackedFutures(),
discovery: discovery,
advertiser: advertiser,
selectionPolicy: selectionPolicy,
activeDownloads: initHashSet[uint64](),
)
proc blockWantListHandler(
peer: PeerId, wantList: WantList
): Future[void] {.async: (raises: []).} =
self.wantListHandler(peer, wantList)
proc blockPresenceHandler(
peer: PeerId, presence: seq[BlockPresence]
): Future[void] {.async: (raises: []).} =
self.blockPresenceHandler(peer, presence)
proc peerAddedHandler(
peer: PeerId
): Future[void] {.async: (raises: [CancelledError]).} =
await self.peerAddedHandler(peer)
proc peerDepartedHandler(
peer: PeerId
): Future[void] {.async: (raises: [CancelledError]).} =
self.evictPeer(peer)
proc wantBlocksRequestHandler(
peer: PeerId, req: WantBlocksRequest
): Future[seq[BlockDelivery]] {.async: (raises: [CancelledError]).} =
let maxIndex = high(Natural).uint64
var totalCount: uint64 = 0
for r in req.ranges:
if r.count == 0 or r.count > MaxBlocksPerBatch or r.start > maxIndex or
r.count - 1 > maxIndex - r.start or r.start > uint64.high - r.count or
r.count > uint64.high - totalCount:
warn "Rejecting WantBlocks request: invalid range",
peer = peer, start = r.start, count = r.count
return @[]
totalCount += r.count
if totalCount > MaxBlocksPerBatch:
warn "Rejecting WantBlocks request: total blocks exceeds cap",
peer = peer, total = totalCount
return @[]
var
blockDeliveries: seq[BlockDelivery]
notFoundCount = 0
totalRequested: uint64 = 0
for (start, count) in req.ranges:
totalRequested += count
for i in start ..< start + count:
let address = BlockAddress(treeCid: req.treeCid, index: i.Natural)
let res = await self.localLookup(address)
if res.isOk:
blockDeliveries.add(res.get)
else:
notFoundCount += 1
if notFoundCount > 0:
warn "Some blocks not found in WantBlocks request",
peer = peer,
treeCid = req.treeCid,
requested = totalRequested,
found = blockDeliveries.len,
notFound = notFoundCount
storage_block_exchange_blocks_sent.inc(blockDeliveries.len.int64)
return blockDeliveries
network.handlers = BlockExcHandlers(
onWantList: blockWantListHandler,
onPresence: blockPresenceHandler,
onWantBlocksRequest: wantBlocksRequestHandler,
onPeerJoined: peerAddedHandler,
onPeerDeparted: peerDepartedHandler,
)
return self