chore: per limit split of PostgreSQL queries (#3008)

This commit is contained in:
Simon-Pierre Vivier 2024-09-04 10:17:28 -04:00 committed by GitHub
parent 8baf627feb
commit e1e05afb02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 83 additions and 50 deletions

View File

@ -1,7 +1,7 @@
{.push raises: [].} {.push raises: [].}
import import
std/[nre, options, sequtils, strutils, strformat, times], std/[nre, options, sequtils, strutils, strformat, times, sugar],
stew/[byteutils, arrayops], stew/[byteutils, arrayops],
results, results,
chronos, chronos,
@ -128,7 +128,9 @@ const SelectCursorByHashDef =
"""SELECT timestamp FROM messages """SELECT timestamp FROM messages
WHERE messageHash = $1""" WHERE messageHash = $1"""
const DefaultMaxNumConns = 50 const
DefaultMaxNumConns = 50
MaxHashesPerQuery = 100
proc new*( proc new*(
T: type PostgresDriver, T: type PostgresDriver,
@ -815,6 +817,70 @@ proc getMessagesByMessageHashes(
debug "end of getMessagesByMessageHashes" debug "end of getMessagesByMessageHashes"
return ok(rows) return ok(rows)
proc getMessagesWithinLimits(
self: PostgresDriver,
includeData: bool,
contentTopics: seq[ContentTopic],
pubsubTopic: Option[PubsubTopic],
cursor: Option[ArchiveCursor],
startTime: Option[Timestamp],
endTime: Option[Timestamp],
hashes: seq[WakuMessageHash],
maxPageSize: uint,
ascendingOrder: bool,
requestId: string,
): Future[ArchiveDriverResult[seq[ArchiveRow]]] {.async.} =
if hashes.len > MaxHashesPerQuery:
return err(fmt"can not attend queries with more than {MaxHashesPerQuery} hashes")
let hexHashes = hashes.mapIt(toHex(it))
if cursor.isNone() and pubsubTopic.isNone() and contentTopics.len == 0 and
startTime.isNone() and endTime.isNone() and hexHashes.len > 0:
return await self.getMessagesByMessageHashes(
"'" & hexHashes.join("','") & "'", maxPageSize, requestId
)
if contentTopics.len > 0 and hexHashes.len > 0 and pubsubTopic.isSome() and
startTime.isSome() and endTime.isSome():
## Considered the most common query. Therefore, we use prepared statements to optimize it.
if includeData:
return await self.getMessagesPreparedStmt(
contentTopics.join(","),
PubsubTopic(pubsubTopic.get()),
cursor,
startTime.get(),
endTime.get(),
hexHashes.join(","),
maxPageSize,
ascendingOrder,
requestId,
)
else:
return await self.getMessageHashesPreparedStmt(
contentTopics.join(","),
PubsubTopic(pubsubTopic.get()),
cursor,
startTime.get(),
endTime.get(),
hexHashes.join(","),
maxPageSize,
ascendingOrder,
requestId,
)
else:
if includeData:
## We will run atypical query. In this case we don't use prepared statemets
return await self.getMessagesArbitraryQuery(
contentTopics, pubsubTopic, cursor, startTime, endTime, hexHashes, maxPageSize,
ascendingOrder, requestId,
)
else:
return await self.getMessageHashesArbitraryQuery(
contentTopics, pubsubTopic, cursor, startTime, endTime, hexHashes, maxPageSize,
ascendingOrder, requestId,
)
method getMessages*( method getMessages*(
s: PostgresDriver, s: PostgresDriver,
includeData = true, includeData = true,
@ -830,57 +896,24 @@ method getMessages*(
): Future[ArchiveDriverResult[seq[ArchiveRow]]] {.async.} = ): Future[ArchiveDriverResult[seq[ArchiveRow]]] {.async.} =
debug "beginning of getMessages" debug "beginning of getMessages"
const MAX_ALLOWED_HASHES = 100 let rows = collect(newSeq):
if hashes.len > MAX_ALLOWED_HASHES: for i in countup(0, hashes.len, MaxHashesPerQuery):
return err(fmt"can not attend queries with more than {MAX_ALLOWED_HASHES} hashes") let stop = min(i + MaxHashesPerQuery, hashes.len)
let hexHashes = hashes.mapIt(toHex(it)) let splittedHashes = hashes[i ..< stop]
if cursor.isNone() and pubsubTopic.isNone() and contentTopics.len == 0 and let subRows =
startTime.isNone() and endTime.isNone() and hexHashes.len > 0: ?await s.getMessagesWithinLimits(
return await s.getMessagesByMessageHashes( includeData, contentTopics, pubsubTopic, cursor, startTime, endTime,
"'" & hexHashes.join("','") & "'", maxPageSize, requestId splittedHashes, maxPageSize, ascendingOrder, requestId,
) )
if contentTopics.len > 0 and hexHashes.len > 0 and pubsubTopic.isSome() and for row in subRows:
startTime.isSome() and endTime.isSome(): row
## Considered the most common query. Therefore, we use prepared statements to optimize it.
if includeData: debug "end of getMessages"
return await s.getMessagesPreparedStmt(
contentTopics.join(","), return ok(rows)
PubsubTopic(pubsubTopic.get()),
cursor,
startTime.get(),
endTime.get(),
hexHashes.join(","),
maxPageSize,
ascendingOrder,
requestId,
)
else:
return await s.getMessageHashesPreparedStmt(
contentTopics.join(","),
PubsubTopic(pubsubTopic.get()),
cursor,
startTime.get(),
endTime.get(),
hexHashes.join(","),
maxPageSize,
ascendingOrder,
requestId,
)
else:
if includeData:
## We will run atypical query. In this case we don't use prepared statemets
return await s.getMessagesArbitraryQuery(
contentTopics, pubsubTopic, cursor, startTime, endTime, hexHashes, maxPageSize,
ascendingOrder, requestId,
)
else:
return await s.getMessageHashesArbitraryQuery(
contentTopics, pubsubTopic, cursor, startTime, endTime, hexHashes, maxPageSize,
ascendingOrder, requestId,
)
proc getStr( proc getStr(
s: PostgresDriver, query: string s: PostgresDriver, query: string