mirror of
https://github.com/logos-messaging/nim-sds.git
synced 2026-06-11 19:59:28 +00:00
Refactor persistency - follow up - review fixes (#73)
removal of persistency of retrievalHints because its never read. The PR also covers some leftovers of #72 - small interface and code style changes.
This commit is contained in:
parent
4ccdd122fc
commit
abdd40cc64
@ -1,6 +1,7 @@
|
||||
<!-- gitnexus:start -->
|
||||
# GitNexus — Code Intelligence
|
||||
|
||||
|
||||
This project is indexed by GitNexus as **nim-sds** (1100 symbols, 1820 relationships, 66 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||
|
||||
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
||||
|
||||
2
sds.nim
2
sds.nim
@ -106,7 +106,7 @@ proc wrapOutgoingMessage*(
|
||||
return err(error)
|
||||
|
||||
let bfResult = serializeBloomFilter(channel.bloomFilter.filter)
|
||||
if bfResult.isErr:
|
||||
if bfResult.isErr():
|
||||
error "Failed to serialize bloom filter", channelId = channelId
|
||||
return err(ReliabilityError.reSerializationError)
|
||||
|
||||
|
||||
@ -15,22 +15,7 @@ export
|
||||
proc defaultConfig*(): ReliabilityConfig =
|
||||
return ReliabilityConfig.init()
|
||||
|
||||
proc reliabilityErr*(detail: string): ReliabilityError {.gcsafe, raises: [].} =
|
||||
## Maps a backend-supplied persistence error string onto the
|
||||
## `rePersistenceError` enum value. The enum carries no payload, so the
|
||||
## original detail is logged here — this is the single point where a
|
||||
## persistence failure is recorded, while the enum value travels up the
|
||||
## `Result` chain to the public API caller, who decides what to do.
|
||||
##
|
||||
## With the snapshot-based Persistence interface, most protocol ops no
|
||||
## longer propagate persistence errors at all — they log and continue
|
||||
## (see PLAN_SNAPSHOT_PERSISTENCE.md §8). This helper is still used by
|
||||
## the durability-intent ops (removeChannel, resetReliabilityManager,
|
||||
## getOrCreateChannel) that retain err-on-failure semantics.
|
||||
warn "persistence operation failed", detail = detail
|
||||
ReliabilityError.rePersistenceError
|
||||
|
||||
proc snapshotMeta*(channel: ChannelContext): ChannelMeta {.gcsafe, raises: [].} =
|
||||
proc snapshotMeta(channel: ChannelContext): ChannelMeta {.gcsafe, raises: [].} =
|
||||
## Captures the current in-memory state of a `ChannelContext` as a
|
||||
## `ChannelMeta` blob, suitable for `Persistence.saveChannelMeta`.
|
||||
##
|
||||
@ -39,16 +24,17 @@ proc snapshotMeta*(channel: ChannelContext): ChannelMeta {.gcsafe, raises: [].}
|
||||
## (see PLAN §6). The bloom filter and message history are intentionally
|
||||
## excluded — the former is rebuilt from the latter on bootstrap, and
|
||||
## the latter is persisted separately via `updateHistory`.
|
||||
result = ChannelMeta.init()
|
||||
result.lamportTimestamp = channel.lamportTimestamp
|
||||
var meta = ChannelMeta.init()
|
||||
meta.lamportTimestamp = channel.lamportTimestamp
|
||||
for u in channel.outgoingBuffer:
|
||||
result.outgoingBuffer.add(u)
|
||||
meta.outgoingBuffer.add(u)
|
||||
for _, m in channel.incomingBuffer.pairs:
|
||||
result.incomingBuffer.add(m)
|
||||
meta.incomingBuffer.add(m)
|
||||
for id, e in channel.outgoingRepairBuffer.pairs:
|
||||
result.outgoingRepairBuffer.add(OutgoingRepairKV(messageId: id, entry: e))
|
||||
meta.outgoingRepairBuffer.add(OutgoingRepairKV(messageId: id, entry: e))
|
||||
for id, e in channel.incomingRepairBuffer.pairs:
|
||||
result.incomingRepairBuffer.add(IncomingRepairKV(messageId: id, entry: e))
|
||||
meta.incomingRepairBuffer.add(IncomingRepairKV(messageId: id, entry: e))
|
||||
return meta
|
||||
|
||||
proc trySaveMeta*(
|
||||
rm: ReliabilityManager, channelId: SdsChannelID, channel: ChannelContext
|
||||
@ -59,12 +45,11 @@ proc trySaveMeta*(
|
||||
##
|
||||
## This helper is the single point where snapshot-save failures are
|
||||
## logged; callers do not need to handle the Result.
|
||||
let res = await rm.persistence.saveChannelMeta(channelId, snapshotMeta(channel))
|
||||
if res.isErr:
|
||||
(await rm.persistence.saveChannelMeta(channelId, snapshotMeta(channel))).isOkOr:
|
||||
warn "snapshot save failed; in-memory state authoritative, next op will retry",
|
||||
channelId = channelId, detail = res.error
|
||||
channelId = channelId, detail = error
|
||||
|
||||
proc queueHistoryAppend*(channel: ChannelContext, msgId: SdsMessageID) =
|
||||
proc queueHistoryAppend(channel: ChannelContext, msgId: SdsMessageID) =
|
||||
## Push an append onto the pending history queue. Only the id is
|
||||
## stored — the full SdsMessage is looked up from `messageHistory` at
|
||||
## flush time (invariant: every queued id is present in messageHistory).
|
||||
@ -76,7 +61,7 @@ proc queueHistoryAppend*(channel: ChannelContext, msgId: SdsMessageID) =
|
||||
channel.pendingHistoryEvicts.excl(msgId)
|
||||
channel.pendingHistoryAppends.incl(msgId)
|
||||
|
||||
proc queueHistoryEvict*(channel: ChannelContext, msgId: SdsMessageID) =
|
||||
proc queueHistoryEvict(channel: ChannelContext, msgId: SdsMessageID) =
|
||||
## Push an evict onto the pending history queue. Merge rule symmetric
|
||||
## with `queueHistoryAppend`: cancels any pending append for the same
|
||||
## id (the just-evicted message no longer needs to be persisted as an
|
||||
@ -119,8 +104,7 @@ proc tryUpdateHistory*(
|
||||
except KeyError:
|
||||
return # checked `in` above; unreachable, but tables can raise per spec
|
||||
|
||||
if channel.pendingHistoryAppends.len == 0 and
|
||||
channel.pendingHistoryEvicts.len == 0:
|
||||
if channel.pendingHistoryAppends.len == 0 and channel.pendingHistoryEvicts.len == 0:
|
||||
return # nothing to flush — no round-trip cost
|
||||
|
||||
var batch = HistoryUpdate.init()
|
||||
@ -151,8 +135,7 @@ proc tryUpdateHistory*(
|
||||
detail = res.error
|
||||
if channel.pendingHistoryAppends.len > rm.config.maxMessageHistory:
|
||||
warn "pending history queue exceeds maxMessageHistory; backend may be stuck",
|
||||
channelId = channelId,
|
||||
pendingAppends = channel.pendingHistoryAppends.len
|
||||
channelId = channelId, pendingAppends = channel.pendingHistoryAppends.len
|
||||
|
||||
proc dropChannelFromPersistence*(
|
||||
rm: ReliabilityManager, channelId: SdsChannelID
|
||||
@ -165,7 +148,8 @@ proc dropChannelFromPersistence*(
|
||||
## err on failure (durability is the semantic intent — the caller asked
|
||||
## us to confirm a disk wipe; we cannot silently lie). See PLAN §8.
|
||||
(await rm.persistence.dropChannel(channelId)).isOkOr:
|
||||
return err(reliabilityErr(error))
|
||||
warn "persistence operation failed", cause = error
|
||||
return err(ReliabilityError.rePersistenceError)
|
||||
ok()
|
||||
|
||||
proc cleanup*(rm: ReliabilityManager) {.async: (raises: []).} =
|
||||
@ -344,16 +328,6 @@ proc getRecentHistoryEntries*(
|
||||
if not rm.onRetrievalHint.isNil():
|
||||
{.cast(raises: []).}:
|
||||
entry.retrievalHint = rm.onRetrievalHint(msgId)
|
||||
if entry.retrievalHint.len > 0:
|
||||
# Phase 2B: best-effort hint persistence via V2. Non-fatal —
|
||||
# hints are an optimisation; a missing hint just means the
|
||||
# peer falls back to slower retrieval.
|
||||
let hintRes = await rm.persistence.setRetrievalHint(
|
||||
msgId, entry.retrievalHint
|
||||
)
|
||||
if hintRes.isErr:
|
||||
warn "retrieval hint save failed; continuing",
|
||||
msgId = msgId, detail = hintRes.error
|
||||
entry.senderId = channel.messageHistory[msgId].senderId
|
||||
entries.add(entry)
|
||||
ok(entries)
|
||||
@ -456,7 +430,9 @@ proc getOrCreateChannel*(
|
||||
)
|
||||
)
|
||||
let data = (await rm.persistence.loadChannel(channelId)).valueOr:
|
||||
return err(reliabilityErr(error))
|
||||
warn "persistence operation failed", cause = error
|
||||
return err(ReliabilityError.rePersistenceError)
|
||||
|
||||
channel.lamportTimestamp = data.meta.lamportTimestamp
|
||||
# Backend contract: messageHistory MUST be ordered oldest-first.
|
||||
# If a backend violates this, FIFO eviction breaks across restarts.
|
||||
|
||||
@ -27,7 +27,6 @@ export results, sds_message_id, channel_meta, history_update
|
||||
type Persistence* = object
|
||||
## Pluggable durability backend. Supplied at `newReliabilityManager`
|
||||
## construction time; defaults to `noOpPersistence()` when not given.
|
||||
|
||||
saveChannelMeta*: proc(
|
||||
channelId: SdsChannelID, meta: ChannelMeta
|
||||
): Future[Result[void, string]] {.async: (raises: []), gcsafe.}
|
||||
@ -42,30 +41,21 @@ type Persistence* = object
|
||||
## maxMessageHistory cap. Callers SHOULD skip this call entirely when
|
||||
## `update.isEmpty`.
|
||||
|
||||
loadChannel*: proc(
|
||||
channelId: SdsChannelID
|
||||
): Future[Result[ChannelData, string]] {.async: (raises: []), gcsafe.}
|
||||
loadChannel*: proc(channelId: SdsChannelID): Future[Result[ChannelData, string]] {.
|
||||
async: (raises: []), gcsafe
|
||||
.}
|
||||
## Bootstrap on `getOrCreateChannel`. Returns the full prior state, or
|
||||
## an empty `ChannelData` if the channel is new on disk. Failure
|
||||
## propagates to the caller — bootstrap is a durability-intent op.
|
||||
|
||||
dropChannel*: proc(
|
||||
channelId: SdsChannelID
|
||||
): Future[Result[void, string]] {.async: (raises: []), gcsafe.}
|
||||
dropChannel*: proc(channelId: SdsChannelID): Future[Result[void, string]] {.
|
||||
async: (raises: []), gcsafe
|
||||
.}
|
||||
## Wipe all persisted state for a channel. Called by `removeChannel` /
|
||||
## `resetReliabilityManager`. Backends SHOULD execute atomically.
|
||||
## Failure propagates to the caller — the caller asked us to confirm a
|
||||
## disk wipe and we cannot silently lie.
|
||||
|
||||
setRetrievalHint*: proc(
|
||||
msgId: SdsMessageID, hint: seq[byte]
|
||||
): Future[Result[void, string]] {.async: (raises: []), gcsafe.}
|
||||
## Record a retrieval hint for a message id. Called from
|
||||
## `getRecentHistoryEntries` when an application-supplied hint
|
||||
## provider returns a non-empty hint. Out-of-band from the
|
||||
## snapshot/history write path because hints are populated lazily
|
||||
## during read. Non-fatal on failure.
|
||||
|
||||
proc noOpPersistence*(): Persistence =
|
||||
## Default backend: discards all writes, returns an empty snapshot on
|
||||
## load. Used when no real backend is supplied (existing tests and
|
||||
@ -87,8 +77,4 @@ proc noOpPersistence*(): Persistence =
|
||||
channelId: SdsChannelID
|
||||
): Future[Result[void, string]] {.async: (raises: []).} =
|
||||
ok(),
|
||||
setRetrievalHint: proc(
|
||||
msgId: SdsMessageID, hint: seq[byte]
|
||||
): Future[Result[void, string]] {.async: (raises: []).} =
|
||||
ok(),
|
||||
)
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
##
|
||||
## `failingOps` injects backend failures. Op names match the `Persistence`
|
||||
## field names: "saveChannelMeta", "updateHistory", "loadChannel",
|
||||
## "dropChannel", "setRetrievalHint".
|
||||
## "dropChannel".
|
||||
|
||||
import std/[tables, sets]
|
||||
import chronos
|
||||
@ -15,7 +15,6 @@ import sds
|
||||
type InMemoryStore* = ref object
|
||||
lamports*: Table[SdsChannelID, int64]
|
||||
log*: Table[SdsChannelID, OrderedTable[SdsMessageID, SdsMessage]]
|
||||
hints*: Table[SdsMessageID, seq[byte]]
|
||||
outgoing*: Table[SdsChannelID, OrderedTable[SdsMessageID, UnacknowledgedMessage]]
|
||||
incoming*: Table[SdsChannelID, OrderedTable[SdsMessageID, IncomingMessage]]
|
||||
outgoingRepair*: Table[SdsChannelID, OrderedTable[SdsMessageID, OutgoingRepairEntry]]
|
||||
@ -23,8 +22,7 @@ type InMemoryStore* = ref object
|
||||
dropChannelCalls*: Table[SdsChannelID, int]
|
||||
## Per-channel counter; lets tests assert dropChannel is invoked
|
||||
## exactly once per logical drop.
|
||||
failingOps*: HashSet[string]
|
||||
## Op names that should return an injected backend error.
|
||||
failingOps*: HashSet[string] ## Op names that should return an injected backend error.
|
||||
|
||||
proc newInMemoryStore*(): InMemoryStore =
|
||||
InMemoryStore(failingOps: initHashSet[string]())
|
||||
@ -48,8 +46,7 @@ proc newInMemoryPersistence*(store: InMemoryStore): Persistence =
|
||||
store.outgoing[channelId][u.message.messageId] = u
|
||||
|
||||
# Incoming buffer.
|
||||
store.incoming[channelId] =
|
||||
initOrderedTable[SdsMessageID, IncomingMessage]()
|
||||
store.incoming[channelId] = initOrderedTable[SdsMessageID, IncomingMessage]()
|
||||
for m in meta.incomingBuffer:
|
||||
store.incoming[channelId][m.message.messageId] = m
|
||||
|
||||
@ -120,11 +117,4 @@ proc newInMemoryPersistence*(store: InMemoryStore): Persistence =
|
||||
store.dropChannelCalls[channelId] =
|
||||
store.dropChannelCalls.getOrDefault(channelId) + 1
|
||||
ok(),
|
||||
setRetrievalHint: proc(
|
||||
msgId: SdsMessageID, hint: seq[byte]
|
||||
): Future[Result[void, string]] {.async: (raises: []).} =
|
||||
if "setRetrievalHint" in store.failingOps:
|
||||
return err("injected backend failure: setRetrievalHint")
|
||||
store.hints[msgId] = hint
|
||||
ok(),
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user