2023-08-01 09:05:16 -04:00
|
|
|
|
## Waku autosharding utils
|
|
|
|
|
##
|
|
|
|
|
## See 51/WAKU2-RELAY-SHARDING RFC: https://rfc.vac.dev/spec/51/#automatic-sharding
|
|
|
|
|
|
2024-06-28 16:04:57 +05:30
|
|
|
|
{.push raises: [].}
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2024-07-09 13:14:28 +02:00
|
|
|
|
import nimcrypto, std/options, std/tables, stew/endians2, results, stew/byteutils
|
2024-03-16 00:08:47 +01:00
|
|
|
|
|
|
|
|
|
import ./content_topic, ./pubsub_topic
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2024-03-13 10:58:13 +01:00
|
|
|
|
type Sharding* = object
|
2024-06-05 15:32:35 +02:00
|
|
|
|
clusterId*: uint16
|
2024-03-13 10:58:13 +01:00
|
|
|
|
# TODO: generations could be stored in a table here
|
|
|
|
|
shardCountGenZero*: uint32
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
2024-06-05 15:32:35 +02:00
|
|
|
|
proc new*(T: type Sharding, clusterId: uint16, shardCount: uint32): T =
|
2024-03-13 10:58:13 +01:00
|
|
|
|
return Sharding(clusterId: clusterId, shardCountGenZero: shardCount)
|
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
proc getGenZeroShard*(s: Sharding, topic: NsContentTopic, count: int): RelayShard =
|
2023-08-17 08:11:18 -04:00
|
|
|
|
let bytes = toBytes(topic.application) & toBytes(topic.version)
|
|
|
|
|
|
|
|
|
|
let hash = sha256.digest(bytes)
|
|
|
|
|
|
|
|
|
|
# We only use the last 64 bits of the hash as having more shards is unlikely.
|
2024-03-16 00:08:47 +01:00
|
|
|
|
let hashValue = uint64.fromBytesBE(hash.data[24 .. 31])
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
|
|
|
|
# This is equilavent to modulo shard count but faster
|
|
|
|
|
let shard = hashValue and uint64((count - 1))
|
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
RelayShard.staticSharding(s.clusterId, uint16(shard))
|
2024-03-13 10:58:13 +01:00
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
proc getShard*(s: Sharding, topic: NsContentTopic): Result[RelayShard, string] =
|
2023-08-17 08:11:18 -04:00
|
|
|
|
## Compute the (pubsub topic) shard to use for this content topic.
|
2024-03-13 10:58:13 +01:00
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
if topic.generation.isNone():
|
|
|
|
|
## Implicit generation # is 0 for all content topic
|
2024-03-13 10:58:13 +01:00
|
|
|
|
return ok(s.getGenZeroShard(topic, int(s.shardCountGenZero)))
|
|
|
|
|
|
2024-03-16 00:08:47 +01:00
|
|
|
|
case topic.generation.get()
|
|
|
|
|
of 0:
|
|
|
|
|
return ok(s.getGenZeroShard(topic, int(s.shardCountGenZero)))
|
|
|
|
|
else:
|
|
|
|
|
return err("Generation > 0 are not supported yet")
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
2024-03-13 10:58:13 +01:00
|
|
|
|
proc getShard*(s: Sharding, topic: ContentTopic): Result[PubsubTopic, string] =
|
2023-08-31 16:13:45 -04:00
|
|
|
|
let parsedTopic = NsContentTopic.parse(topic).valueOr:
|
|
|
|
|
return err($error)
|
|
|
|
|
|
2024-03-13 10:58:13 +01:00
|
|
|
|
let shard = ?s.getShard(parsedTopic)
|
2023-08-31 16:13:45 -04:00
|
|
|
|
|
|
|
|
|
ok($shard)
|
|
|
|
|
|
2024-03-16 00:08:47 +01:00
|
|
|
|
proc parseSharding*(
|
|
|
|
|
s: Sharding,
|
|
|
|
|
pubsubTopic: Option[PubsubTopic],
|
|
|
|
|
contentTopics: ContentTopic | seq[ContentTopic],
|
2024-08-19 11:29:35 +02:00
|
|
|
|
): Result[Table[RelayShard, seq[NsContentTopic]], string] =
|
2023-08-17 08:11:18 -04:00
|
|
|
|
var topics: seq[ContentTopic]
|
|
|
|
|
when contentTopics is seq[ContentTopic]:
|
|
|
|
|
topics = contentTopics
|
|
|
|
|
else:
|
|
|
|
|
topics = @[contentTopics]
|
2024-03-13 10:58:13 +01:00
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
var topicMap = initTable[RelayShard, seq[NsContentTopic]]()
|
2023-08-17 08:11:18 -04:00
|
|
|
|
for contentTopic in topics:
|
|
|
|
|
let parseRes = NsContentTopic.parse(contentTopic)
|
|
|
|
|
|
|
|
|
|
let content =
|
|
|
|
|
if parseRes.isErr():
|
|
|
|
|
return err("Cannot parse content topic: " & $parseRes.error)
|
2024-03-16 00:08:47 +01:00
|
|
|
|
else:
|
|
|
|
|
parseRes.get()
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
|
|
|
|
let pubsub =
|
|
|
|
|
if pubsubTopic.isSome():
|
2024-08-19 11:29:35 +02:00
|
|
|
|
let parseRes = RelayShard.parse(pubsubTopic.get())
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
|
|
|
|
if parseRes.isErr():
|
|
|
|
|
return err("Cannot parse pubsub topic: " & $parseRes.error)
|
2024-03-16 00:08:47 +01:00
|
|
|
|
else:
|
|
|
|
|
parseRes.get()
|
2023-08-17 08:11:18 -04:00
|
|
|
|
else:
|
2024-03-13 10:58:13 +01:00
|
|
|
|
let shardsRes = s.getShard(content)
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
|
|
|
|
if shardsRes.isErr():
|
|
|
|
|
return err("Cannot autoshard content topic: " & $shardsRes.error)
|
2024-03-16 00:08:47 +01:00
|
|
|
|
else:
|
|
|
|
|
shardsRes.get()
|
2024-01-04 16:26:27 +01:00
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
if not topicMap.hasKey(pubsub):
|
|
|
|
|
topicMap[pubsub] = @[]
|
2024-03-13 10:58:13 +01:00
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
try:
|
|
|
|
|
topicMap[pubsub].add(content)
|
|
|
|
|
except CatchableError:
|
|
|
|
|
return err(getCurrentExceptionMsg())
|
|
|
|
|
|
|
|
|
|
ok(topicMap)
|
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
#type ShardsPriority = seq[tuple[topic: RelayShard, value: float64]]
|
2023-08-17 08:11:18 -04:00
|
|
|
|
|
|
|
|
|
#[ proc shardCount*(topic: NsContentTopic): Result[int, string] =
|
|
|
|
|
## Returns the total shard count from the content topic.
|
2023-08-01 09:05:16 -04:00
|
|
|
|
let shardCount =
|
|
|
|
|
if topic.generation.isNone():
|
|
|
|
|
## Implicit generation # is 0 for all content topic
|
|
|
|
|
GenerationZeroShardsCount
|
|
|
|
|
else:
|
|
|
|
|
case topic.generation.get():
|
|
|
|
|
of 0:
|
|
|
|
|
GenerationZeroShardsCount
|
|
|
|
|
else:
|
|
|
|
|
return err("Generation > 0 are not supported yet")
|
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
ok((shardCount)) ]#
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
#[ proc applyWeight(hashValue: uint64, weight: float64): float64 =
|
|
|
|
|
(-weight) / math.ln(float64(hashValue) / float64(high(uint64))) ]#
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
#[ proc hashOrder*(x, y: (RelayShard, float64)): int =
|
2023-08-17 08:11:18 -04:00
|
|
|
|
cmp(x[1], y[1]) ]#
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
#[ proc weightedShardList*(topic: NsContentTopic, shardCount: int, weightList: seq[float64]): Result[ShardsPriority, string] =
|
2023-08-01 09:05:16 -04:00
|
|
|
|
## Returns the ordered list of shards and their priority values.
|
|
|
|
|
if weightList.len < shardCount:
|
|
|
|
|
return err("Must provide weights for every shards")
|
|
|
|
|
|
|
|
|
|
let shardsNWeights = zip(toSeq(0..shardCount), weightList)
|
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
var list = newSeq[(RelayShard, float64)](shardCount)
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
|
|
|
|
for (shard, weight) in shardsNWeights:
|
2024-08-19 11:29:35 +02:00
|
|
|
|
let pubsub = RelayShard.staticSharding(ClusterId, uint16(shard))
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2023-10-05 08:37:05 -04:00
|
|
|
|
let clusterBytes = toBytesBE(uint16(ClusterId))
|
2023-08-01 09:05:16 -04:00
|
|
|
|
let shardBytes = toBytesBE(uint16(shard))
|
|
|
|
|
let bytes = toBytes(topic.application) & toBytes(topic.version) & @clusterBytes & @shardBytes
|
|
|
|
|
let hash = sha256.digest(bytes)
|
|
|
|
|
let hashValue = uint64.fromBytesBE(hash.data)
|
|
|
|
|
let value = applyWeight(hashValue, weight)
|
|
|
|
|
|
|
|
|
|
list[shard] = (pubsub, value)
|
|
|
|
|
|
|
|
|
|
list.sort(hashOrder)
|
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
ok(list) ]#
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
2024-08-19 11:29:35 +02:00
|
|
|
|
#[ proc singleHighestWeigthShard*(topic: NsContentTopic): Result[RelayShard, string] =
|
2023-08-01 09:05:16 -04:00
|
|
|
|
let count = ? shardCount(topic)
|
|
|
|
|
|
2023-08-17 08:11:18 -04:00
|
|
|
|
let weights = repeat(1.0, count)
|
2023-08-01 09:05:16 -04:00
|
|
|
|
|
|
|
|
|
let list = ? weightedShardList(topic, count, weights)
|
|
|
|
|
|
|
|
|
|
let (pubsub, _) = list[list.len - 1]
|
|
|
|
|
|
2024-03-13 10:58:13 +01:00
|
|
|
|
ok(pubsub) ]#
|