Merge 1d1c7e683486d901e1311f469f0903ce9c48d506 into 89160b58d4929691df95296f15d8eaad8c50f1dc

This commit is contained in:
Akhil 2025-03-10 16:07:06 +05:30 committed by GitHub
commit ce5104dc26
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 383 additions and 11 deletions

View File

@ -1,4 +1,4 @@
import std/times
import std/[times, options, sets]
type
SdsMessageID* = seq[byte]
@ -8,7 +8,7 @@ type
messageId*: SdsMessageID
lamportTimestamp*: int64
causalHistory*: seq[SdsMessageID]
channelId*: SdsChannelID
channelId*: Option[SdsChannelID]
content*: seq[byte]
bloomFilter*: seq[byte]
@ -17,6 +17,10 @@ type
sendTime*: Time
resendAttempts*: int
IncomingMessage* = object
message*: SdsMessage
missingDeps*: HashSet[SdsMessageID]
const
DefaultMaxMessageHistory* = 1000
DefaultMaxCausalHistory* = 10

View File

@ -12,7 +12,8 @@ proc encode*(msg: SdsMessage): ProtoBuffer =
for hist in msg.causalHistory:
pb.write(3, hist)
pb.write(4, msg.channelId)
if msg.channelId.isSome():
pb.write(4, msg.channelId.get())
pb.write(5, msg.content)
pb.write(6, msg.bloomFilter)
pb.finish()
@ -36,8 +37,11 @@ proc decode*(T: type SdsMessage, buffer: seq[byte]): ProtobufResult[T] =
if histResult.isOk:
msg.causalHistory = causalHistory
if not ?pb.getField(4, msg.channelId):
return err(ProtobufError.missingRequiredField("channelId"))
var channelId: seq[byte]
if ?pb.getField(4, channelId):
msg.channelId = some(channelId)
else:
msg.channelId = none[SdsChannelID]()
if not ?pb.getField(5, msg.content):
return err(ProtobufError.missingRequiredField("content"))

354
src/reliability.nim Normal file
View File

@ -0,0 +1,354 @@
import std/[times, locks, tables, sets, options]
import chronos, results, chronicles
import ./[message, protobuf, reliability_utils, rolling_bloom_filter]
proc newReliabilityManager*(
channelId: Option[SdsChannelID], config: ReliabilityConfig = defaultConfig()
): Result[ReliabilityManager, ReliabilityError] =
## Creates a new ReliabilityManager with the specified channel ID and configuration.
##
## Parameters:
## - channelId: A unique identifier for the communication channel.
## - config: Configuration options for the ReliabilityManager. If not provided, default configuration is used.
##
## Returns:
## A Result containing either a new ReliabilityManager instance or an error.
if not channelId.isSome():
return err(ReliabilityError.reInvalidArgument)
try:
let bloomFilter =
newRollingBloomFilter(config.bloomFilterCapacity, config.bloomFilterErrorRate)
let rm = ReliabilityManager(
lamportTimestamp: 0,
messageHistory: @[],
bloomFilter: bloomFilter,
outgoingBuffer: @[],
incomingBuffer: initTable[SdsMessageID, IncomingMessage](),
channelId: channelId,
config: config,
)
initLock(rm.lock)
return ok(rm)
except Exception:
error "Failed to create ReliabilityManager", msg = getCurrentExceptionMsg()
return err(ReliabilityError.reOutOfMemory)
proc isAcknowledged*(
msg: UnacknowledgedMessage,
causalHistory: seq[SdsMessageID],
rbf: Option[RollingBloomFilter],
): bool =
if msg.message.messageId in causalHistory:
return true
if rbf.isSome():
return rbf.get().contains(msg.message.messageId)
false
proc reviewAckStatus(rm: ReliabilityManager, msg: SdsMessage) {.gcsafe.} =
# Parse bloom filter
var rbf: Option[RollingBloomFilter]
if msg.bloomFilter.len > 0:
let bfResult = deserializeBloomFilter(msg.bloomFilter)
if bfResult.isOk():
rbf = some(
RollingBloomFilter(
filter: bfResult.get(),
capacity: bfResult.get().capacity,
minCapacity: (
bfResult.get().capacity.float * (100 - CapacityFlexPercent).float / 100.0
).int,
maxCapacity: (
bfResult.get().capacity.float * (100 + CapacityFlexPercent).float / 100.0
).int,
messages: @[],
)
)
else:
error "Failed to deserialize bloom filter", error = bfResult.error
rbf = none[RollingBloomFilter]()
else:
rbf = none[RollingBloomFilter]()
# Keep track of indices to delete
var toDelete: seq[int] = @[]
var i = 0
while i < rm.outgoingBuffer.len:
let outMsg = rm.outgoingBuffer[i]
if outMsg.isAcknowledged(msg.causalHistory, rbf):
if not rm.onMessageSent.isNil():
rm.onMessageSent(outMsg.message.messageId)
toDelete.add(i)
inc i
for i in countdown(toDelete.high, 0): # Delete in reverse order to maintain indices
rm.outgoingBuffer.delete(toDelete[i])
proc wrapOutgoingMessage*(
rm: ReliabilityManager, message: seq[byte], messageId: SdsMessageID
): Result[seq[byte], ReliabilityError] =
## Wraps an outgoing message with reliability metadata.
##
## Parameters:
## - message: The content of the message to be sent.
## - messageId: Unique identifier for the message
##
## Returns:
## A Result containing either wrapped message bytes or an error.
if message.len == 0:
return err(ReliabilityError.reInvalidArgument)
if message.len > MaxMessageSize:
return err(ReliabilityError.reMessageTooLarge)
withLock rm.lock:
try:
rm.updateLamportTimestamp(getTime().toUnix)
let bfResult = serializeBloomFilter(rm.bloomFilter.filter)
if bfResult.isErr:
error "Failed to serialize bloom filter"
return err(ReliabilityError.reSerializationError)
let msg = SdsMessage(
messageId: messageId,
lamportTimestamp: rm.lamportTimestamp,
causalHistory: rm.getRecentSdsMessageIDs(rm.config.maxCausalHistory),
channelId: rm.channelId,
content: message,
bloomFilter: bfResult.get(),
)
# Add to outgoing buffer
rm.outgoingBuffer.add(
UnacknowledgedMessage(message: msg, sendTime: getTime(), resendAttempts: 0)
)
# Add to causal history and bloom filter
rm.bloomFilter.add(msg.messageId)
rm.addToHistory(msg.messageId)
return serializeMessage(msg)
except Exception:
error "Failed to wrap message", msg = getCurrentExceptionMsg()
return err(ReliabilityError.reSerializationError)
proc processIncomingBuffer(rm: ReliabilityManager) {.gcsafe.} =
withLock rm.lock:
if rm.incomingBuffer.len == 0:
return
var processed = initHashSet[SdsMessageID]()
var readyToProcess = newSeq[SdsMessageID]()
# Find initially ready messages
for msgId, entry in rm.incomingBuffer:
if entry.missingDeps.len == 0:
readyToProcess.add(msgId)
while readyToProcess.len > 0:
let msgId = readyToProcess.pop()
if msgId in processed:
continue
if msgId in rm.incomingBuffer:
rm.addToHistory(msgId)
if not rm.onMessageReady.isNil():
rm.onMessageReady(msgId)
processed.incl(msgId)
# Update dependencies for remaining messages
for remainingId, entry in rm.incomingBuffer:
if remainingId notin processed:
if msgId in entry.missingDeps:
rm.incomingBuffer[remainingId].missingDeps.excl(msgId)
if rm.incomingBuffer[remainingId].missingDeps.len == 0:
readyToProcess.add(remainingId)
# Remove processed messages
for msgId in processed:
rm.incomingBuffer.del(msgId)
proc unwrapReceivedMessage*(
rm: ReliabilityManager, message: seq[byte]
): Result[tuple[message: seq[byte], missingDeps: seq[SdsMessageID]], ReliabilityError] =
## Unwraps a received message and processes its reliability metadata.
##
## Parameters:
## - message: The received message bytes
##
## Returns:
## A Result containing either tuple of (processed message, missing dependencies) or an error.
try:
let msg = deserializeMessage(message).valueOr:
return err(ReliabilityError.reDeserializationError)
if msg.messageId in rm.messageHistory:
return ok((msg.content, @[]))
rm.bloomFilter.add(msg.messageId)
# Update Lamport timestamp
rm.updateLamportTimestamp(msg.lamportTimestamp)
# Review ACK status for outgoing messages
rm.reviewAckStatus(msg)
var missingDeps = rm.checkDependencies(msg.causalHistory)
if missingDeps.len == 0:
# Check if any dependencies are still in incoming buffer
var depsInBuffer = false
for msgId, entry in rm.incomingBuffer.pairs():
if msgId in msg.causalHistory:
depsInBuffer = true
break
if depsInBuffer:
rm.incomingBuffer[msg.messageId] = IncomingMessage(
message: msg,
missingDeps: initHashSet[SdsMessageID]()
)
else:
# All dependencies met, add to history
rm.addToHistory(msg.messageId)
rm.processIncomingBuffer()
if not rm.onMessageReady.isNil():
rm.onMessageReady(msg.messageId)
else:
rm.incomingBuffer[msg.messageId] = IncomingMessage(
message: msg,
missingDeps: missingDeps.toHashSet()
)
if not rm.onMissingDependencies.isNil():
rm.onMissingDependencies(msg.messageId, missingDeps)
return ok((msg.content, missingDeps))
except Exception:
error "Failed to unwrap message", msg = getCurrentExceptionMsg()
return err(ReliabilityError.reDeserializationError)
proc markDependenciesMet*(
rm: ReliabilityManager, messageIds: seq[SdsMessageID]
): Result[void, ReliabilityError] =
## Marks the specified message dependencies as met.
##
## Parameters:
## - messageIds: A sequence of message IDs to mark as met.
##
## Returns:
## A Result indicating success or an error.
try:
# Add all messageIds to bloom filter
for msgId in messageIds:
if not rm.bloomFilter.contains(msgId):
rm.bloomFilter.add(msgId)
# rm.addToHistory(msgId) -- not needed as this proc usually called when msg in long-term storage of application?
# Update any pending messages that depend on this one
for pendingId, entry in rm.incomingBuffer:
if msgId in entry.missingDeps:
rm.incomingBuffer[pendingId].missingDeps.excl(msgId)
rm.processIncomingBuffer()
return ok()
except Exception:
error "Failed to mark dependencies as met", msg = getCurrentExceptionMsg()
return err(ReliabilityError.reInternalError)
proc setCallbacks*(
rm: ReliabilityManager,
onMessageReady: proc(messageId: SdsMessageID) {.gcsafe.},
onMessageSent: proc(messageId: SdsMessageID) {.gcsafe.},
onMissingDependencies:
proc(messageId: SdsMessageID, missingDeps: seq[SdsMessageID]) {.gcsafe.},
onPeriodicSync: PeriodicSyncCallback = nil,
) =
## Sets the callback functions for various events in the ReliabilityManager.
##
## Parameters:
## - onMessageReady: Callback function called when a message is ready to be processed.
## - onMessageSent: Callback function called when a message is confirmed as sent.
## - onMissingDependencies: Callback function called when a message has missing dependencies.
## - onPeriodicSync: Callback function called to notify about periodic sync
withLock rm.lock:
rm.onMessageReady = onMessageReady
rm.onMessageSent = onMessageSent
rm.onMissingDependencies = onMissingDependencies
rm.onPeriodicSync = onPeriodicSync
proc checkUnacknowledgedMessages(rm: ReliabilityManager) {.gcsafe.} =
## Checks and processes unacknowledged messages in the outgoing buffer.
withLock rm.lock:
let now = getTime()
var newOutgoingBuffer: seq[UnacknowledgedMessage] = @[]
for unackMsg in rm.outgoingBuffer:
let elapsed = now - unackMsg.sendTime
if elapsed > rm.config.resendInterval:
# Time to attempt resend
if unackMsg.resendAttempts < rm.config.maxResendAttempts:
var updatedMsg = unackMsg
updatedMsg.resendAttempts += 1
updatedMsg.sendTime = now
newOutgoingBuffer.add(updatedMsg)
else:
if not rm.onMessageSent.isNil():
rm.onMessageSent(unackMsg.message.messageId)
else:
newOutgoingBuffer.add(unackMsg)
rm.outgoingBuffer = newOutgoingBuffer
proc periodicBufferSweep(
rm: ReliabilityManager
) {.async: (raises: [CancelledError]), gcsafe.} =
## Periodically sweeps the buffer to clean up and check unacknowledged messages.
while true:
try:
rm.checkUnacknowledgedMessages()
rm.cleanBloomFilter()
except Exception:
error "Error in periodic buffer sweep", msg = getCurrentExceptionMsg()
await sleepAsync(chronos.milliseconds(rm.config.bufferSweepInterval.inMilliseconds))
proc periodicSyncMessage(
rm: ReliabilityManager
) {.async: (raises: [CancelledError]), gcsafe.} =
## Periodically notifies to send a sync message to maintain connectivity.
while true:
try:
if not rm.onPeriodicSync.isNil():
rm.onPeriodicSync()
except Exception:
error "Error in periodic sync", msg = getCurrentExceptionMsg()
await sleepAsync(chronos.seconds(rm.config.syncMessageInterval.inSeconds))
proc startPeriodicTasks*(rm: ReliabilityManager) =
## Starts the periodic tasks for buffer sweeping and sync message sending.
##
## This procedure should be called after creating a ReliabilityManager to enable automatic maintenance.
asyncSpawn rm.periodicBufferSweep()
asyncSpawn rm.periodicSyncMessage()
proc resetReliabilityManager*(rm: ReliabilityManager): Result[void, ReliabilityError] =
## Resets the ReliabilityManager to its initial state.
##
## This procedure clears all buffers and resets the Lamport timestamp.
withLock rm.lock:
try:
rm.lamportTimestamp = 0
rm.messageHistory.setLen(0)
rm.outgoingBuffer.setLen(0)
rm.incomingBuffer.clear()
rm.bloomFilter = newRollingBloomFilter(
rm.config.bloomFilterCapacity, rm.config.bloomFilterErrorRate
)
return ok()
except Exception:
error "Failed to reset ReliabilityManager", msg = getCurrentExceptionMsg()
return err(ReliabilityError.reInternalError)

View File

@ -1,4 +1,4 @@
import std/[times, locks]
import std/[times, locks, options]
import chronicles
import ./[rolling_bloom_filter, message]
@ -20,8 +20,8 @@ type
messageHistory*: seq[SdsMessageID]
bloomFilter*: RollingBloomFilter
outgoingBuffer*: seq[UnacknowledgedMessage]
incomingBuffer*: seq[SdsMessage]
channelId*: SdsChannelID
incomingBuffer*: Table[SdsMessageID, IncomingMessage]
channelId*: Option[SdsChannelID]
config*: ReliabilityConfig
lock*: Lock
onMessageReady*: proc(messageId: SdsMessageID) {.gcsafe.}
@ -59,7 +59,7 @@ proc cleanup*(rm: ReliabilityManager) {.raises: [].} =
try:
withLock rm.lock:
rm.outgoingBuffer.setLen(0)
rm.incomingBuffer.setLen(0)
rm.incomingBuffer.clear()
rm.messageHistory.setLen(0)
except Exception:
error "Error during cleanup", error = getCurrentExceptionMsg()
@ -84,6 +84,15 @@ proc updateLamportTimestamp*(
proc getRecentSdsMessageIDs*(rm: ReliabilityManager, n: int): seq[SdsMessageID] =
result = rm.messageHistory[max(0, rm.messageHistory.len - n) .. ^1]
proc checkDependencies*(
rm: ReliabilityManager, deps: seq[SdsMessageID]
): seq[SdsMessageID] =
var missingDeps: seq[SdsMessageID] = @[]
for depId in deps:
if depId notin rm.messageHistory:
missingDeps.add(depId)
return missingDeps
proc getMessageHistory*(rm: ReliabilityManager): seq[SdsMessageID] =
withLock rm.lock:
result = rm.messageHistory
@ -92,6 +101,8 @@ proc getOutgoingBuffer*(rm: ReliabilityManager): seq[UnacknowledgedMessage] =
withLock rm.lock:
result = rm.outgoingBuffer
proc getIncomingBuffer*(rm: ReliabilityManager): seq[SdsMessage] =
proc getIncomingBuffer*(
rm: ReliabilityManager
): Table[SdsMessageID, message.IncomingMessage] =
withLock rm.lock:
result = rm.incomingBuffer

View File

@ -1,7 +1,6 @@
import unittest, results, strutils
import ../src/bloom
from random import rand, randomize
import ../src/[message, protobuf, protobufutil, reliability_utils, rolling_bloom_filter]
suite "bloom filter":
setup: