mirror of https://github.com/waku-org/nwaku.git
chore: simple PR to enhance postgres and retention policy logs (#2884)
This commit is contained in:
parent
241fb8cb97
commit
71ee42de56
|
@ -87,7 +87,6 @@ proc handleMessage*(
|
|||
return
|
||||
|
||||
let msgHash = computeMessageHash(pubsubTopic, msg)
|
||||
|
||||
let insertStartTime = getTime().toUnixFloat()
|
||||
|
||||
(await self.driver.put(msgHash, pubsubTopic, msg)).isOkOr:
|
||||
|
@ -99,7 +98,7 @@ proc handleMessage*(
|
|||
timestamp = msg.timestamp,
|
||||
error = error
|
||||
|
||||
notice "message archived",
|
||||
trace "message archived",
|
||||
hash_hash = msgHash.to0xHex(),
|
||||
pubsubTopic = pubsubTopic,
|
||||
contentTopic = msg.contentTopic,
|
||||
|
@ -186,11 +185,10 @@ proc findMessages*(
|
|||
)
|
||||
|
||||
proc periodicRetentionPolicy(self: WakuArchive) {.async.} =
|
||||
debug "executing message retention policy"
|
||||
|
||||
let policy = self.retentionPolicy.get()
|
||||
|
||||
while true:
|
||||
debug "executing message retention policy"
|
||||
(await policy.execute(self.driver)).isOkOr:
|
||||
waku_archive_errors.inc(labelValues = [retPolicyFailure])
|
||||
error "failed execution of retention policy", error = error
|
||||
|
|
|
@ -346,6 +346,7 @@ proc getPartitionsList(
|
|||
## Retrieves the seq of partition table names.
|
||||
## e.g: @["messages_1708534333_1708534393", "messages_1708534273_1708534333"]
|
||||
|
||||
debug "beginning getPartitionsList"
|
||||
var partitions: seq[string]
|
||||
proc rowCallback(pqResult: ptr PGresult) =
|
||||
for iRow in 0 ..< pqResult.pqNtuples():
|
||||
|
@ -404,6 +405,7 @@ proc getMessagesArbitraryQuery(
|
|||
): Future[ArchiveDriverResult[seq[ArchiveRow]]] {.async.} =
|
||||
## This proc allows to handle atypical queries. We don't use prepared statements for those.
|
||||
|
||||
debug "beginning getMessagesArbitraryQuery"
|
||||
var query = SelectClause
|
||||
var statements: seq[string]
|
||||
var args: seq[string]
|
||||
|
@ -484,7 +486,9 @@ proc getMessageHashesArbitraryQuery(
|
|||
.} =
|
||||
## This proc allows to handle atypical queries. We don't use prepared statements for those.
|
||||
|
||||
debug "beginning of getMessagesV2ArbitraryQuery"
|
||||
var query = """SELECT messageHash FROM messages"""
|
||||
|
||||
var statements: seq[string]
|
||||
var args: seq[string]
|
||||
|
||||
|
@ -565,6 +569,7 @@ proc getMessagesPreparedStmt(
|
|||
|
||||
var rows: seq[(WakuMessageHash, PubsubTopic, WakuMessage)]
|
||||
|
||||
debug "beginning of getMessagesPreparedStmt"
|
||||
proc rowCallback(pqResult: ptr PGresult) =
|
||||
rowCallbackImpl(pqResult, rows)
|
||||
|
||||
|
@ -653,6 +658,7 @@ proc getMessageHashesPreparedStmt(
|
|||
|
||||
var rows: seq[(WakuMessageHash, PubsubTopic, WakuMessage)]
|
||||
|
||||
debug "beginning of getMessagesV2PreparedStmt"
|
||||
proc rowCallback(pqResult: ptr PGresult) =
|
||||
hashCallbackImpl(pqResult, rows)
|
||||
|
||||
|
@ -742,6 +748,8 @@ method getMessages*(
|
|||
maxPageSize = DefaultPageSize,
|
||||
ascendingOrder = true,
|
||||
): Future[ArchiveDriverResult[seq[ArchiveRow]]] {.async.} =
|
||||
debug "beginning of getMessages"
|
||||
|
||||
let hexHashes = hashes.mapIt(toHex(it))
|
||||
|
||||
if contentTopics.len > 0 and hexHashes.len > 0 and pubsubTopic.isSome() and
|
||||
|
@ -787,6 +795,8 @@ proc getStr(
|
|||
): Future[ArchiveDriverResult[string]] {.async.} =
|
||||
# Performs a query that is expected to return a single string
|
||||
|
||||
debug "beginning of getStr"
|
||||
|
||||
var ret: string
|
||||
proc rowCallback(pqResult: ptr PGresult) =
|
||||
if pqResult.pqnfields() != 1:
|
||||
|
@ -809,6 +819,7 @@ proc getInt(
|
|||
): Future[ArchiveDriverResult[int64]] {.async.} =
|
||||
# Performs a query that is expected to return a single numeric value (int64)
|
||||
|
||||
debug "beginning of getInt"
|
||||
var retInt = 0'i64
|
||||
let str = (await s.getStr(query)).valueOr:
|
||||
return err("could not get str in getInt: " & $error)
|
||||
|
@ -826,6 +837,8 @@ proc getInt(
|
|||
method getDatabaseSize*(
|
||||
s: PostgresDriver
|
||||
): Future[ArchiveDriverResult[int64]] {.async.} =
|
||||
debug "beginning of getDatabaseSize"
|
||||
|
||||
let intRes = (await s.getInt("SELECT pg_database_size(current_database())")).valueOr:
|
||||
return err("error in getDatabaseSize: " & error)
|
||||
|
||||
|
@ -835,6 +848,8 @@ method getDatabaseSize*(
|
|||
method getMessagesCount*(
|
||||
s: PostgresDriver
|
||||
): Future[ArchiveDriverResult[int64]] {.async.} =
|
||||
debug "beginning of getMessagesCount"
|
||||
|
||||
let intRes = await s.getInt("SELECT COUNT(1) FROM messages")
|
||||
if intRes.isErr():
|
||||
return err("error in getMessagesCount: " & intRes.error)
|
||||
|
@ -847,6 +862,8 @@ method getOldestMessageTimestamp*(
|
|||
## In some cases it could happen that we have
|
||||
## empty partitions which are older than the current stored rows.
|
||||
## In those cases we want to consider those older partitions as the oldest considered timestamp.
|
||||
debug "beginning of getOldestMessageTimestamp"
|
||||
|
||||
let oldestPartition = s.partitionMngr.getOldestPartition().valueOr:
|
||||
return err("could not get oldest partition: " & $error)
|
||||
|
||||
|
@ -862,7 +879,9 @@ method getOldestMessageTimestamp*(
|
|||
method getNewestMessageTimestamp*(
|
||||
s: PostgresDriver
|
||||
): Future[ArchiveDriverResult[Timestamp]] {.async.} =
|
||||
debug "beginning of getNewestMessageTimestamp"
|
||||
let intRes = await s.getInt("SELECT MAX(timestamp) FROM messages")
|
||||
|
||||
if intRes.isErr():
|
||||
return err("error in getNewestMessageTimestamp: " & intRes.error)
|
||||
|
||||
|
@ -871,6 +890,8 @@ method getNewestMessageTimestamp*(
|
|||
method deleteOldestMessagesNotWithinLimit*(
|
||||
s: PostgresDriver, limit: int
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
debug "beginning of deleteOldestMessagesNotWithinLimit"
|
||||
|
||||
let execRes = await s.writeConnPool.pgQuery(
|
||||
"""DELETE FROM messages WHERE messageHash NOT IN
|
||||
(
|
||||
|
@ -881,9 +902,12 @@ method deleteOldestMessagesNotWithinLimit*(
|
|||
if execRes.isErr():
|
||||
return err("error in deleteOldestMessagesNotWithinLimit: " & execRes.error)
|
||||
|
||||
debug "end of deleteOldestMessagesNotWithinLimit"
|
||||
return ok()
|
||||
|
||||
method close*(s: PostgresDriver): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
debug "beginning of postgres close"
|
||||
|
||||
## Cancel the partition factory loop
|
||||
s.futLoopPartitionFactory.cancelSoon()
|
||||
|
||||
|
@ -929,6 +953,9 @@ proc acquireDatabaseLock*(
|
|||
## approach is using the "performWriteQueryWithLock" proc. However, we can't use
|
||||
## "performWriteQueryWithLock" in the migrations process because we can't nest two PL/SQL
|
||||
## scripts.
|
||||
|
||||
debug "beginning of acquireDatabaseLock", lockId
|
||||
|
||||
let locked = (
|
||||
await s.getStr(
|
||||
fmt"""
|
||||
|
@ -947,6 +974,7 @@ proc releaseDatabaseLock*(
|
|||
s: PostgresDriver, lockId: int = 841886
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
## Release an advisory lock (useful to avoid more than one application running migrations at the same time)
|
||||
debug "beginning of releaseDatabaseLock", lockId
|
||||
let unlocked = (
|
||||
await s.getStr(
|
||||
fmt"""
|
||||
|
@ -976,9 +1004,7 @@ const COULD_NOT_ACQUIRE_ADVISORY_LOCK* = "could not acquire advisory lock"
|
|||
proc performWriteQueryWithLock*(
|
||||
self: PostgresDriver, queryToProtect: string
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
## This wraps the original query in a script so that we make sure a pg_advisory lock protects it.
|
||||
## The purpose of this proc is to protect write queries that might be performed simultaneously
|
||||
## to the same database, from different store nodes.
|
||||
## This wraps the original query in a script so that we make sure a pg_advisory lock protects it
|
||||
debug "performWriteQueryWithLock", queryToProtect
|
||||
let query =
|
||||
fmt"""
|
||||
|
@ -1042,6 +1068,7 @@ proc addPartition(
|
|||
## Creates a partition table that will store the messages that fall in the range
|
||||
## `startTime` <= timestamp < `startTime + duration`.
|
||||
## `startTime` is measured in seconds since epoch
|
||||
debug "beginning of addPartition"
|
||||
|
||||
let beginning = startTime
|
||||
let `end` = partitions_manager.calcEndPartitionTime(startTime)
|
||||
|
@ -1139,7 +1166,7 @@ proc loopPartitionFactory(
|
|||
debug "starting loopPartitionFactory"
|
||||
|
||||
while true:
|
||||
trace "Check if we need to create a new partition"
|
||||
trace "Check if a new partition is needed"
|
||||
|
||||
## Let's make the 'partition_manager' aware of the current partitions
|
||||
(await self.refreshPartitionsInfo()).isOkOr:
|
||||
|
@ -1184,6 +1211,7 @@ proc getTableSize*(
|
|||
): Future[ArchiveDriverResult[string]] {.async.} =
|
||||
## Returns a human-readable representation of the size for the requested table.
|
||||
## tableName - table of interest.
|
||||
debug "beginning of getTableSize"
|
||||
|
||||
let tableSize = (
|
||||
await self.getStr(
|
||||
|
@ -1200,6 +1228,8 @@ proc getTableSize*(
|
|||
proc removePartition(
|
||||
self: PostgresDriver, partitionName: string
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
debug "beginning of removePartition", partitionName
|
||||
|
||||
var partSize = ""
|
||||
let partSizeRes = await self.getTableSize(partitionName)
|
||||
if partSizeRes.isOk():
|
||||
|
@ -1229,6 +1259,7 @@ proc removePartitionsOlderThan(
|
|||
## Removes old partitions that don't contain the specified timestamp
|
||||
|
||||
let tsInSec = Timestamp(float(tsInNanoSec) / 1_000_000_000)
|
||||
debug "beginning of removePartitionsOlderThan", tsInSec
|
||||
|
||||
var oldestPartition = self.partitionMngr.getOldestPartition().valueOr:
|
||||
return err("could not get oldest partition in removePartitionOlderThan: " & $error)
|
||||
|
@ -1249,6 +1280,7 @@ proc removeOldestPartition(
|
|||
self: PostgresDriver, forceRemoval: bool = false, ## To allow cleanup in tests
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
## Indirectly called from the retention policy
|
||||
debug "beginning of removeOldestPartition"
|
||||
|
||||
let oldestPartition = self.partitionMngr.getOldestPartition().valueOr:
|
||||
return err("could not remove oldest partition: " & $error)
|
||||
|
@ -1271,6 +1303,8 @@ proc containsAnyPartition*(self: PostgresDriver): bool =
|
|||
method decreaseDatabaseSize*(
|
||||
driver: PostgresDriver, targetSizeInBytes: int64, forceRemoval: bool = false
|
||||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
debug "beginning of decreaseDatabaseSize"
|
||||
|
||||
var dbSize = (await driver.getDatabaseSize()).valueOr:
|
||||
return err("decreaseDatabaseSize failed to get database size: " & $error)
|
||||
|
||||
|
@ -1337,6 +1371,8 @@ method existsTable*(
|
|||
proc getCurrentVersion*(
|
||||
s: PostgresDriver
|
||||
): Future[ArchiveDriverResult[int64]] {.async.} =
|
||||
debug "beginning of getCurrentVersion"
|
||||
|
||||
let existsVersionTable = (await s.existsTable("version")).valueOr:
|
||||
return err("error in getCurrentVersion-existsTable: " & $error)
|
||||
|
||||
|
@ -1353,6 +1389,8 @@ method deleteMessagesOlderThanTimestamp*(
|
|||
): Future[ArchiveDriverResult[void]] {.async.} =
|
||||
## First of all, let's remove the older partitions so that we can reduce
|
||||
## the database size.
|
||||
debug "beginning of deleteMessagesOlderThanTimestamp"
|
||||
|
||||
(await s.removePartitionsOlderThan(tsNanoSec)).isOkOr:
|
||||
return err("error while removing older partitions: " & $error)
|
||||
|
||||
|
|
|
@ -53,6 +53,8 @@ proc new*(T: type CapacityRetentionPolicy, capacity = DefaultCapacity): T =
|
|||
method execute*(
|
||||
p: CapacityRetentionPolicy, driver: ArchiveDriver
|
||||
): Future[RetentionPolicyResult[void]] {.async.} =
|
||||
debug "beginning executing message retention policy - capacity"
|
||||
|
||||
let numMessages = (await driver.getMessagesCount()).valueOr:
|
||||
return err("failed to get messages count: " & error)
|
||||
|
||||
|
@ -62,4 +64,6 @@ method execute*(
|
|||
(await driver.deleteOldestMessagesNotWithinLimit(limit = p.capacity + p.deleteWindow)).isOkOr:
|
||||
return err("deleting oldest messages failed: " & error)
|
||||
|
||||
debug "end executing message retention policy - capacity"
|
||||
|
||||
return ok()
|
||||
|
|
|
@ -18,7 +18,10 @@ proc new*(T: type SizeRetentionPolicy, size = DefaultRetentionSize): T =
|
|||
method execute*(
|
||||
p: SizeRetentionPolicy, driver: ArchiveDriver
|
||||
): Future[RetentionPolicyResult[void]] {.async.} =
|
||||
debug "beginning of executing message retention policy - size"
|
||||
|
||||
(await driver.decreaseDatabaseSize(p.sizeLimit)).isOkOr:
|
||||
return err("decreaseDatabaseSize failed: " & $error)
|
||||
|
||||
debug "end of executing message retention policy - size"
|
||||
return ok()
|
||||
|
|
|
@ -18,6 +18,7 @@ method execute*(
|
|||
p: TimeRetentionPolicy, driver: ArchiveDriver
|
||||
): Future[RetentionPolicyResult[void]] {.async.} =
|
||||
## Delete messages that exceed the retention time by 10% and more (batch delete for efficiency)
|
||||
debug "beginning of executing message retention policy - time"
|
||||
|
||||
let omtRes = await driver.getOldestMessageTimestamp()
|
||||
if omtRes.isErr():
|
||||
|
@ -34,4 +35,5 @@ method execute*(
|
|||
if res.isErr():
|
||||
return err("failed to delete oldest messages: " & res.error)
|
||||
|
||||
debug "end of executing message retention policy - time"
|
||||
return ok()
|
||||
|
|
Loading…
Reference in New Issue