diff --git a/benchmarks/deluge/config.py b/benchmarks/deluge/config.py index 6b1cca3..aeb8566 100644 --- a/benchmarks/deluge/config.py +++ b/benchmarks/deluge/config.py @@ -49,12 +49,17 @@ DelugeDisseminationExperiment = IteratedExperiment[BoundExperiment[StaticDissemi class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]): - repetitions: int = Field(gt=0) - file_size: int = Field(gt=0) - seeders: int = Field(gt=0) - shared_volume_path: Path - tracker_announce_url: HttpUrl - nodes: List[DelugeNodeConfig] | DelugeNodeSetConfig + seeder_sets: int = Field(gt=0, default=1, description='Number of distinct seeder sets to experiment with') + seeders: int = Field(gt=0, description='Number of seeders per seeder set') + + repetitions: int = Field(gt=0, description='How many experiment repetitions to run for each seeder set') + file_size: int = Field(gt=0, description='File size, in bytes') + + shared_volume_path: Path = Field(description='Path to the volume shared between clients and experiment runner') + tracker_announce_url: HttpUrl = Field(description='URL to the tracker announce endpoint') + + nodes: List[DelugeNodeConfig] | DelugeNodeSetConfig = Field( + description='Configuration for the nodes that make up the network') def build(self) -> DelugeDisseminationExperiment: nodes_specs = self.nodes.nodes if isinstance(self.nodes, DelugeNodeSetConfig) else self.nodes @@ -76,15 +81,16 @@ class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]): polling_interval=0.5, ) - repetitions = ( - env.bind(StaticDisseminationExperiment( - network=network, - seeders=list(islice(sample(len(network)), self.seeders)), - data=RandomTempData(size=self.file_size, - meta=DelugeMeta(f'dataset-{experiment_run}', - announce_url=tracker.announce_url)) - )) - for experiment_run in range(self.repetitions) - ) + def repetitions(): + for seeder_set in range(self.seeder_sets): + seeders = list(islice(sample(len(network)), self.seeders)) + for experiment_run in range(self.repetitions): + yield env.bind(StaticDisseminationExperiment( + network=network, + seeders=seeders, + data=RandomTempData(size=self.file_size, + meta=DelugeMeta(f'dataset-{seeder_set}', + announce_url=tracker.announce_url)) + )) - return IteratedExperiment(repetitions) + return IteratedExperiment(repetitions()) diff --git a/benchmarks/deluge/tests/test_config.py b/benchmarks/deluge/tests/test_config.py index c20b26e..d84baf0 100644 --- a/benchmarks/deluge/tests/test_config.py +++ b/benchmarks/deluge/tests/test_config.py @@ -4,6 +4,7 @@ from unittest.mock import patch import yaml +from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment from benchmarks.deluge.config import DelugeNodeSetConfig, DelugeNodeConfig, DelugeExperimentConfig from benchmarks.deluge.deluge_node import DelugeNode @@ -44,6 +45,7 @@ def test_should_expand_node_sets_into_simple_nodes(): ), ] + def test_should_respect_first_node_index(): nodeset = DelugeNodeSetConfig( name='deluge-{node_index}', @@ -69,6 +71,7 @@ def test_should_respect_first_node_index(): ), ] + def test_should_build_experiment_from_config(): config_file = StringIO(""" deluge_experiment: @@ -99,3 +102,40 @@ def test_should_build_experiment_from_config(): assert cast(DelugeNode, repetitions[0].experiment.nodes[5]).daemon_args['port'] == 6890 +def test_should_create_n_repetitions_per_seeder_set(): + config_file = StringIO(""" + deluge_experiment: + seeder_sets: 2 + repetitions: 3 + seeders: 3 + tracker_announce_url: http://localhost:2020/announce + file_size: 1024 + shared_volume_path: /var/lib/deluge + + nodes: + network_size: 100 + name: 'deluge-{node_index}' + address: 'node-{node_index}.deluge.codexbenchmarks.svc.cluster.local' + daemon_port: 6890 + listen_ports: [ 6891, 6892 ] + """) + + config = DelugeExperimentConfig.model_validate(yaml.safe_load(config_file)['deluge_experiment']) + + # Need to patch mkdir, or we'll try to actually create the folder when DelugeNode gets initialized. + with patch('pathlib.Path.mkdir'): + experiment = config.build() + repetitions = list(experiment.experiments) + + assert len(repetitions) == 3 * 2 + + experiment_set1_1 = cast(StaticDisseminationExperiment, repetitions[0].experiment) + experiment_set1_2 = cast(StaticDisseminationExperiment, repetitions[2].experiment) + experiment_set2_1 = cast(StaticDisseminationExperiment, repetitions[3].experiment) + + # FIXME Ehm... this test might actually fail with a very low probability if the seeder sets end + # up being the same by chance, but the probability is very small (you're drawing 3 out of 100 twice + # and the result needs to be the same). The fix would be having a deterministic sampler but I feel + # lazy right now. :-) + assert experiment_set1_1.seeders == experiment_set1_2.seeders + assert experiment_set1_1.seeders != experiment_set2_1.seeders diff --git a/experiments.k8s.yaml b/experiments.k8s.yaml index 54ed95f..bd25bcb 100644 --- a/experiments.k8s.yaml +++ b/experiments.k8s.yaml @@ -1,4 +1,5 @@ deluge_experiment: + seeder_sets: ${SEEDER_SETS} seeders: ${SEEDERS} tracker_announce_url: ${TRACKER_ANNOUNCE_URL} file_size: ${FILE_SIZE} diff --git a/k8s/testrunner-job.yaml b/k8s/testrunner-job.yaml index ed527b7..1fd66eb 100644 --- a/k8s/testrunner-job.yaml +++ b/k8s/testrunner-job.yaml @@ -5,11 +5,13 @@ metadata: namespace: codex-benchmarks labels: app: testrunner + spec: template: metadata: labels: app: testrunner + spec: containers: - name: testrunner @@ -18,11 +20,13 @@ spec: args: [ "run", "deluge_experiment" ] env: - name: NETWORK_SIZE - value: "100" + value: "10" - name: SEEDERS value: "4" - name: REPETITIONS value: "10" + - name: SEEDER_SETS + value: "2" - name: FILE_SIZE value: "104857600" - name: TRACKER_ANNOUNCE_URL @@ -47,9 +51,11 @@ spec: limits: cpu: "1" memory: "2Gi" + volumes: - name: benchmark-volume persistentVolumeClaim: claimName: deluge-pvc restartPolicy: Never + backoffLimit: 0