feat: autosharding core algorithm (#1854)

- basic rendezvous hashing
- content topic parsing
- sharding config
- tests
This commit is contained in:
Simon-Pierre Vivier 2023-08-01 09:05:16 -04:00 committed by GitHub
parent 0b2cfae5a4
commit bbff1ac138
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 446 additions and 29 deletions

View File

@ -4,6 +4,7 @@ else:
{.push raises: [].}
import
std/options,
stew/[byteutils, results],
libp2p/crypto/crypto
import
@ -35,6 +36,8 @@ proc toV2ContentTopic*(v1Topic: waku_protocol.Topic): ContentTopic =
## <v1-topic-bytes-as-hex> should be prefixed with `0x`
var namespacedTopic = NsContentTopic()
namespacedTopic.generation = none(int)
namespacedTopic.bias = Unbiased
namespacedTopic.application = ContentTopicApplication
namespacedTopic.version = ContentTopicAppVersion
namespacedTopic.name = v1Topic.to0xHex()

View File

@ -3,7 +3,8 @@
import
testutils/unittests
import
../../waku/v2/waku_core,
../../waku/v2/waku_core/message,
../../waku/v2/waku_core/time,
../../waku/v2/utils/compat,
./testlib/common

View File

@ -1,6 +1,7 @@
{.used.}
import
std/options,
stew/results,
testutils/unittests
import
@ -11,6 +12,8 @@ suite "Waku Message - Content topics namespacing":
test "Stringify namespaced content topic":
## Given
var ns = NsContentTopic()
ns.generation = none(int)
ns.bias = Unbiased
ns.application = "toychat"
ns.version = "2"
ns.name = "huilong"
@ -31,10 +34,31 @@ suite "Waku Message - Content topics namespacing":
let nsRes = NsContentTopic.parse(topic)
## Then
check nsRes.isOk()
assert nsRes.isOk(), $nsRes.error
let ns = nsRes.get()
check:
ns.generation == none(int)
ns.bias == Unbiased
ns.application == "toychat"
ns.version == "2"
ns.name == "huilong"
ns.encoding == "proto"
test "Parse content topic string - Valid string with sharding":
## Given
let topic = "/0/lower20/toychat/2/huilong/proto"
## When
let nsRes = NsContentTopic.parse(topic)
## Then
assert nsRes.isOk(), $nsRes.error
let ns = nsRes.get()
check:
ns.generation == some(0)
ns.bias == Lower20
ns.application == "toychat"
ns.version == "2"
ns.name == "huilong"
@ -48,7 +72,8 @@ suite "Waku Message - Content topics namespacing":
let ns = NsContentTopic.parse(topic)
## Then
check ns.isErr()
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
@ -62,13 +87,13 @@ suite "Waku Message - Content topics namespacing":
let ns = NsContentTopic.parse(topic)
## Then
check ns.isErr()
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
err.cause == "invalid topic structure"
test "Parse content topic string - Invalid string: missing encoding part":
## Given
let topic = "/toychat/2/huilong"
@ -77,13 +102,14 @@ suite "Waku Message - Content topics namespacing":
let ns = NsContentTopic.parse(topic)
## Then
check ns.isErr()
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
err.cause == "invalid topic structure"
test "Parse content topic string - Invalid string: too many parts":
test "Parse content topic string - Invalid string: wrong extra parts":
## Given
let topic = "/toychat/2/huilong/proto/33"
@ -91,12 +117,42 @@ suite "Waku Message - Content topics namespacing":
let ns = NsContentTopic.parse(topic)
## Then
check ns.isErr()
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
err.cause == "invalid topic structure"
test "Parse content topic string - Invalid string: non numeric generation":
## Given
let topic = "/first/unbiased/toychat/2/huilong/proto"
## When
let ns = NsContentTopic.parse(topic)
## Then
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
err.cause == "generation should be a numeric value"
test "Parse content topic string - Invalid string: invalid bias":
## Given
let topic = "/0/no/toychat/2/huilong/proto"
## When
let ns = NsContentTopic.parse(topic)
## Then
assert ns.isErr(), $ns.get()
let err = ns.tryError()
check:
err.kind == ParsingErrorKind.InvalidFormat
err.cause == "bias should be one of; unbiased, lower20 or higher80"
suite "Waku Message - Pub-sub topics namespacing":
@ -178,7 +234,6 @@ suite "Waku Message - Pub-sub topics namespacing":
err.kind == ParsingErrorKind.MissingPart
err.part == "shard_cluster_index"
test "Parse static sharding pub-sub topic string - Invalid string: cluster value":
## Given
let topic = "/waku/2/rs/xx/77"

View File

@ -0,0 +1,178 @@
{.used.}
import
std/options,
std/strutils,
std/sugar,
std/algorithm,
std/random,
stew/results,
testutils/unittests
import
../../../waku/v2/waku_core/topics
suite "Waku Sharding":
randomize()
const WordLength = 5
proc randomContentTopic(): NsContentTopic =
var app = ""
for n in 0..<WordLength:
let letter = sample(Letters)
app.add(letter)
let version = "1"
var name = ""
for n in 0..<WordLength:
let letter = sample(Letters)
name.add(letter)
let enc = "cbor"
NsContentTopic.init(none(int), Unbiased, app, version, name, enc)
test "Implicit content topic generation":
## Given
let topic = "/toychat/2/huilong/proto"
## When
let ns = NsContentTopic.parse(topic).expect("Parsing")
let paramRes = shardCount(ns)
## Then
assert paramRes.isOk(), paramRes.error
let count = paramRes.get()
check:
count == GenerationZeroShardsCount
ns.bias == Unbiased
test "Valid content topic":
## Given
let topic = "/0/lower20/toychat/2/huilong/proto"
## When
let ns = NsContentTopic.parse(topic).expect("Parsing")
let paramRes = shardCount(ns)
## Then
assert paramRes.isOk(), paramRes.error
let count = paramRes.get()
check:
count == GenerationZeroShardsCount
ns.bias == Lower20
test "Invalid content topic generation":
## Given
let topic = "/1/unbiased/toychat/2/huilong/proto"
## When
let ns = NsContentTopic.parse(topic).expect("Parsing")
let paramRes = shardCount(ns)
## Then
assert paramRes.isErr(), $paramRes.get()
let err = paramRes.error
check:
err == "Generation > 0 are not supported yet"
test "Weigths bias":
## Given
let count = 5
## When
let anonWeigths = biasedWeights(count, ShardingBias.Lower20)
let speedWeigths = biasedWeights(count, ShardingBias.Higher80)
## Then
check:
anonWeigths[0] == 2.0
anonWeigths[1] == 1.0
anonWeigths[2] == 1.0
anonWeigths[3] == 1.0
anonWeigths[4] == 1.0
speedWeigths[0] == 1.0
speedWeigths[1] == 2.0
speedWeigths[2] == 2.0
speedWeigths[3] == 2.0
speedWeigths[4] == 2.0
test "Sorted shard list":
## Given
let topic = "/0/unbiased/toychat/2/huilong/proto"
## When
let contentTopic = NsContentTopic.parse(topic).expect("Parsing")
let count = shardCount(contentTopic).expect("Valid parameters")
let weights = biasedWeights(count, contentTopic.bias)
let shardsRes = weightedShardList(contentTopic, count, weights)
## Then
assert shardsRes.isOk(), shardsRes.error
let shards = shardsRes.get()
check:
shards.len == count
isSorted(shards, hashOrder)
test "Shard Choice Reproducibility":
## Given
let topic = "/toychat/2/huilong/proto"
## When
let contentTopic = NsContentTopic.parse(topic).expect("Parsing")
let res = singleHighestWeigthShard(contentTopic)
## Then
assert res.isOk(), res.error
let pubsubTopic = res.get()
check:
pubsubTopic == NsPubsubTopic.staticSharding(ClusterIndex, 3)
test "Shard Choice Simulation":
## Given
let topics = collect:
for i in 0..<100000:
randomContentTopic()
var counts = newSeq[0](GenerationZeroShardsCount)
## When
for topic in topics:
let pubsub = singleHighestWeigthShard(topic).expect("Valid Topic")
counts[pubsub.shard] += 1
## Then
for i in 1..<GenerationZeroShardsCount:
check:
float64(counts[i - 1]) <= (float64(counts[i]) * 1.05)
float64(counts[i]) <= (float64(counts[i - 1]) * 1.05)
float64(counts[i - 1]) >= (float64(counts[i]) * 0.95)
float64(counts[i]) >= (float64(counts[i - 1]) * 0.95)
#echo counts

View File

@ -1,7 +1,9 @@
import
./topics/content_topic,
./topics/pubsub_topic
./topics/pubsub_topic,
./topics/sharding
export
content_topic,
pubsub_topic
pubsub_topic,
sharding

View File

@ -8,6 +8,7 @@ else:
{.push raises: [].}
import
std/options,
std/strutils,
stew/results
import
@ -25,63 +26,115 @@ const DefaultContentTopic* = ContentTopic("/waku/2/default-content/proto")
## Namespaced content topic
type ShardingBias* = enum
Unbiased = "unbiased"
Lower20 = "lower20"
Higher80 = "higher80"
type
NsContentTopic* = object
generation*: Option[int]
bias*: ShardingBias
application*: string
version*: string
name*: string
encoding*: string
proc init*(T: type NsContentTopic, application, version, name, encoding: string): T =
proc init*(T: type NsContentTopic, generation: Option[int], bias: ShardingBias,
application: string, version: string, name: string, encoding: string): T =
NsContentTopic(
generation: generation,
bias: bias,
application: application,
version: version,
name: name,
encoding: encoding
)
# Serialization
proc `$`*(topic: NsContentTopic): string =
## Returns a string representation of a namespaced topic
## in the format `/<application>/<version>/<topic-name>/<encoding>`
"/" & topic.application & "/" & topic.version & "/" & topic.name & "/" & topic.encoding
## Autosharding adds 2 optional prefixes `/<gen#>/bias
var formatted = ""
if topic.generation.isSome():
formatted = formatted & "/" & $topic.generation.get()
if topic.bias != ShardingBias.Unbiased:
formatted = formatted & "/" & $topic.bias
formatted & "/" & topic.application & "/" & topic.version & "/" & topic.name & "/" & topic.encoding
# Deserialization
proc parse*(T: type NsContentTopic, topic: ContentTopic|string): ParsingResult[NsContentTopic] =
## Splits a namespaced topic string into its constituent parts.
## The topic string has to be in the format `/<application>/<version>/<topic-name>/<encoding>`
## Autosharding adds 2 optional prefixes `/<gen#>/bias
if not topic.startsWith("/"):
return err(ParsingError.invalidFormat("topic must start with slash"))
let parts = topic[1..<topic.len].split("/")
if parts.len != 4:
return err(ParsingError.invalidFormat("invalid topic structure"))
case parts.len:
of 4:
let app = parts[0]
if app.len == 0:
return err(ParsingError.missingPart("appplication"))
let app = parts[0]
if app.len == 0:
return err(ParsingError.missingPart("appplication"))
let ver = parts[1]
if ver.len == 0:
return err(ParsingError.missingPart("version"))
let ver = parts[1]
if ver.len == 0:
return err(ParsingError.missingPart("version"))
let name = parts[2]
if name.len == 0:
return err(ParsingError.missingPart("topic-name"))
let name = parts[2]
if name.len == 0:
return err(ParsingError.missingPart("topic-name"))
let enc = parts[3]
if enc.len == 0:
return err(ParsingError.missingPart("encoding"))
let enc = parts[3]
if enc.len == 0:
return err(ParsingError.missingPart("encoding"))
return ok(NsContentTopic.init(none(int), Unbiased, app, ver, name, enc))
of 6:
if parts[0].len == 0:
return err(ParsingError.missingPart("generation"))
let gen = try:
parseInt(parts[0])
except ValueError:
return err(ParsingError.invalidFormat("generation should be a numeric value"))
ok(NsContentTopic.init(app, ver, name, enc))
if parts[1].len == 0:
return err(ParsingError.missingPart("sharding-bias"))
let bias = try:
parseEnum[ShardingBias](parts[1])
except ValueError:
return err(ParsingError.invalidFormat("bias should be one of; unbiased, lower20 or higher80"))
let app = parts[2]
if app.len == 0:
return err(ParsingError.missingPart("appplication"))
let ver = parts[3]
if ver.len == 0:
return err(ParsingError.missingPart("version"))
let name = parts[4]
if name.len == 0:
return err(ParsingError.missingPart("topic-name"))
let enc = parts[5]
if enc.len == 0:
return err(ParsingError.missingPart("encoding"))
return ok(NsContentTopic.init(some(gen), bias, app, ver, name, enc))
else:
return err(ParsingError.invalidFormat("invalid topic structure"))
# Content topic compatibility

View File

@ -116,3 +116,23 @@ proc parse*(T: type NsPubsubTopic, topic: PubsubTopic|string): ParsingResult[NsP
converter toPubsubTopic*(topic: NsPubsubTopic): PubsubTopic =
$topic
proc `==`*[T: NsPubsubTopic](x, y: T): bool =
case y.kind
of NsPubsubTopicKind.StaticSharding:
if x.kind != NsPubsubTopicKind.StaticSharding:
return false
if x.cluster != y.cluster:
return false
if x.shard != y.shard:
return false
of NsPubsubTopicKind.NamedSharding:
if x.kind != NsPubsubTopicKind.NamedSharding:
return false
if x.name != y.name:
return false
true

View File

@ -0,0 +1,105 @@
## Waku autosharding utils
##
## See 51/WAKU2-RELAY-SHARDING RFC: https://rfc.vac.dev/spec/51/#automatic-sharding
when (NimMajor, NimMinor) < (1, 4):
{.push raises: [Defect].}
else:
{.push raises: [].}
import
nimcrypto,
std/options,
std/math,
std/sequtils,
std/algorithm,
stew/endians2,
stew/results,
stew/byteutils
import
./content_topic,
./pubsub_topic
## For indices allocation and other magic numbers refer to RFC 51
const ClusterIndex* = 49152
const GenerationZeroShardsCount* = 5
type ShardsPriority = seq[tuple[topic: NsPubsubTopic, value: float64]]
proc shardCount*(topic: NsContentTopic): Result[int, string] =
## Returns the total shard count, sharding selection bias
## and the shard name from the content topic.
let shardCount =
if topic.generation.isNone():
## Implicit generation # is 0 for all content topic
GenerationZeroShardsCount
else:
case topic.generation.get():
of 0:
GenerationZeroShardsCount
else:
return err("Generation > 0 are not supported yet")
ok((shardCount))
proc biasedWeights*(shardCount: int, bias: ShardingBias): seq[float64] =
var weights = repeat(1.0, shardCount)
case bias:
of Unbiased:
return weights
of Lower20:
# we choose the lower 20% of shards and double their weigths
let index = shardCount div 5
for i in (0..<index):
weights[i] *= 2.0
of Higher80:
# we choose the higher 80% of shards and double their weigths
let index = shardCount div 5
for i in (index..<shardCount):
weights[i] *= 2.0
weights
proc applyWeight(hashValue: uint64, weight: float64): float64 =
(-weight) / math.ln(float64(hashValue) / float64(high(uint64)))
proc hashOrder*(x, y: (NsPubsubTopic, float64)): int =
cmp(x[1], y[1])
proc weightedShardList*(topic: NsContentTopic, shardCount: int, weightList: seq[float64]): Result[ShardsPriority, string] =
## Returns the ordered list of shards and their priority values.
if weightList.len < shardCount:
return err("Must provide weights for every shards")
let shardsNWeights = zip(toSeq(0..shardCount), weightList)
var list = newSeq[(NsPubsubTopic, float64)](shardCount)
for (shard, weight) in shardsNWeights:
let pubsub = NsPubsubTopic.staticSharding(ClusterIndex, uint16(shard))
let clusterBytes = toBytesBE(uint16(ClusterIndex))
let shardBytes = toBytesBE(uint16(shard))
let bytes = toBytes(topic.application) & toBytes(topic.version) & @clusterBytes & @shardBytes
let hash = sha256.digest(bytes)
let hashValue = uint64.fromBytesBE(hash.data)
let value = applyWeight(hashValue, weight)
list[shard] = (pubsub, value)
list.sort(hashOrder)
ok(list)
proc singleHighestWeigthShard*(topic: NsContentTopic): Result[NsPubsubTopic, string] =
let count = ? shardCount(topic)
let weights = biasedWeights(count, topic.bias)
let list = ? weightedShardList(topic, count, weights)
let (pubsub, _) = list[list.len - 1]
ok(pubsub)