add repeatable seeder sets

This commit is contained in:
gmega 2024-12-09 16:27:14 -03:00
parent 688ea63d55
commit 3d202d281e
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
4 changed files with 71 additions and 18 deletions

View File

@ -49,12 +49,17 @@ DelugeDisseminationExperiment = IteratedExperiment[BoundExperiment[StaticDissemi
class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]):
repetitions: int = Field(gt=0)
file_size: int = Field(gt=0)
seeders: int = Field(gt=0)
shared_volume_path: Path
tracker_announce_url: HttpUrl
nodes: List[DelugeNodeConfig] | DelugeNodeSetConfig
seeder_sets: int = Field(gt=0, default=1, description='Number of distinct seeder sets to experiment with')
seeders: int = Field(gt=0, description='Number of seeders per seeder set')
repetitions: int = Field(gt=0, description='How many experiment repetitions to run for each seeder set')
file_size: int = Field(gt=0, description='File size, in bytes')
shared_volume_path: Path = Field(description='Path to the volume shared between clients and experiment runner')
tracker_announce_url: HttpUrl = Field(description='URL to the tracker announce endpoint')
nodes: List[DelugeNodeConfig] | DelugeNodeSetConfig = Field(
description='Configuration for the nodes that make up the network')
def build(self) -> DelugeDisseminationExperiment:
nodes_specs = self.nodes.nodes if isinstance(self.nodes, DelugeNodeSetConfig) else self.nodes
@ -76,15 +81,16 @@ class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]):
polling_interval=0.5,
)
repetitions = (
env.bind(StaticDisseminationExperiment(
network=network,
seeders=list(islice(sample(len(network)), self.seeders)),
data=RandomTempData(size=self.file_size,
meta=DelugeMeta(f'dataset-{experiment_run}',
announce_url=tracker.announce_url))
))
for experiment_run in range(self.repetitions)
)
def repetitions():
for seeder_set in range(self.seeder_sets):
seeders = list(islice(sample(len(network)), self.seeders))
for experiment_run in range(self.repetitions):
yield env.bind(StaticDisseminationExperiment(
network=network,
seeders=seeders,
data=RandomTempData(size=self.file_size,
meta=DelugeMeta(f'dataset-{seeder_set}',
announce_url=tracker.announce_url))
))
return IteratedExperiment(repetitions)
return IteratedExperiment(repetitions())

View File

@ -4,6 +4,7 @@ from unittest.mock import patch
import yaml
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
from benchmarks.deluge.config import DelugeNodeSetConfig, DelugeNodeConfig, DelugeExperimentConfig
from benchmarks.deluge.deluge_node import DelugeNode
@ -44,6 +45,7 @@ def test_should_expand_node_sets_into_simple_nodes():
),
]
def test_should_respect_first_node_index():
nodeset = DelugeNodeSetConfig(
name='deluge-{node_index}',
@ -69,6 +71,7 @@ def test_should_respect_first_node_index():
),
]
def test_should_build_experiment_from_config():
config_file = StringIO("""
deluge_experiment:
@ -99,3 +102,40 @@ def test_should_build_experiment_from_config():
assert cast(DelugeNode, repetitions[0].experiment.nodes[5]).daemon_args['port'] == 6890
def test_should_create_n_repetitions_per_seeder_set():
config_file = StringIO("""
deluge_experiment:
seeder_sets: 2
repetitions: 3
seeders: 3
tracker_announce_url: http://localhost:2020/announce
file_size: 1024
shared_volume_path: /var/lib/deluge
nodes:
network_size: 100
name: 'deluge-{node_index}'
address: 'node-{node_index}.deluge.codexbenchmarks.svc.cluster.local'
daemon_port: 6890
listen_ports: [ 6891, 6892 ]
""")
config = DelugeExperimentConfig.model_validate(yaml.safe_load(config_file)['deluge_experiment'])
# Need to patch mkdir, or we'll try to actually create the folder when DelugeNode gets initialized.
with patch('pathlib.Path.mkdir'):
experiment = config.build()
repetitions = list(experiment.experiments)
assert len(repetitions) == 3 * 2
experiment_set1_1 = cast(StaticDisseminationExperiment, repetitions[0].experiment)
experiment_set1_2 = cast(StaticDisseminationExperiment, repetitions[2].experiment)
experiment_set2_1 = cast(StaticDisseminationExperiment, repetitions[3].experiment)
# FIXME Ehm... this test might actually fail with a very low probability if the seeder sets end
# up being the same by chance, but the probability is very small (you're drawing 3 out of 100 twice
# and the result needs to be the same). The fix would be having a deterministic sampler but I feel
# lazy right now. :-)
assert experiment_set1_1.seeders == experiment_set1_2.seeders
assert experiment_set1_1.seeders != experiment_set2_1.seeders

View File

@ -1,4 +1,5 @@
deluge_experiment:
seeder_sets: ${SEEDER_SETS}
seeders: ${SEEDERS}
tracker_announce_url: ${TRACKER_ANNOUNCE_URL}
file_size: ${FILE_SIZE}

View File

@ -5,11 +5,13 @@ metadata:
namespace: codex-benchmarks
labels:
app: testrunner
spec:
template:
metadata:
labels:
app: testrunner
spec:
containers:
- name: testrunner
@ -18,11 +20,13 @@ spec:
args: [ "run", "deluge_experiment" ]
env:
- name: NETWORK_SIZE
value: "100"
value: "10"
- name: SEEDERS
value: "4"
- name: REPETITIONS
value: "10"
- name: SEEDER_SETS
value: "2"
- name: FILE_SIZE
value: "104857600"
- name: TRACKER_ANNOUNCE_URL
@ -47,9 +51,11 @@ spec:
limits:
cpu: "1"
memory: "2Gi"
volumes:
- name: benchmark-volume
persistentVolumeClaim:
claimName: deluge-pvc
restartPolicy: Never
backoffLimit: 0