From aebd070f3eacb57c5ea04af04025546e295b5d13 Mon Sep 17 00:00:00 2001 From: gmega Date: Mon, 25 Nov 2024 16:08:22 -0300 Subject: [PATCH] add setup/teardown sequences to experiments, add generated file cleanup at end --- benchmarks/core/network.py | 1 - benchmarks/core/utils.py | 28 ++++++- benchmarks/experiments/experiments.py | 30 ++++++++ benchmarks/experiments/static_experiment.py | 74 ++++++++++++------- .../tests/test_static_experiment.py | 45 +++++++---- benchmarks/experiments/tests/utils.py | 30 ++++++++ 6 files changed, 164 insertions(+), 44 deletions(-) create mode 100644 benchmarks/experiments/experiments.py create mode 100644 benchmarks/experiments/tests/utils.py diff --git a/benchmarks/core/network.py b/benchmarks/core/network.py index 0df4197..ae7f689 100644 --- a/benchmarks/core/network.py +++ b/benchmarks/core/network.py @@ -1,7 +1,6 @@ import shutil from abc import abstractmethod, ABC from pathlib import Path -from typing import Sequence from typing_extensions import Generic, TypeVar, Union diff --git a/benchmarks/core/utils.py b/benchmarks/core/utils.py index 8229b2d..09c050d 100644 --- a/benchmarks/core/utils.py +++ b/benchmarks/core/utils.py @@ -1,12 +1,36 @@ import random +from abc import ABC, abstractmethod +from dataclasses import dataclass from pathlib import Path from typing import Callable, Iterator, Tuple +from typing_extensions import Generic + +from benchmarks.core.network import TInitialMetadata + # A Sampler samples without replacement from [0, ..., n]. type Sampler = Callable[[int], Iterator[int]] -# A DataGenerator generates files for experiments. -type DataGenerator[TInitialMetadata] = Callable[[], Tuple[TInitialMetadata, Path]] + +@dataclass +class DataHandle(Generic[TInitialMetadata], ABC): + """A :class:`DataHandle` knows how to clean up data and metadata that has been generated + by a :class:`DataGenerator`.""" + meta: TInitialMetadata + data: Path + + def cleanup(self): + if self.data.exists(): + self.data.unlink() + + +class DataGenerator(Generic[TInitialMetadata], ABC): + """A :class:`DataGenerator` knows how to generate data for an :class:`Experiment`.""" + + @abstractmethod + def generate(self) -> DataHandle[TInitialMetadata]: + """Generates fresh data and metadata and returns a :class:`DataHandle`.""" + pass def sample(n: int) -> Iterator[int]: diff --git a/benchmarks/experiments/experiments.py b/benchmarks/experiments/experiments.py new file mode 100644 index 0000000..63afd1f --- /dev/null +++ b/benchmarks/experiments/experiments.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod + +from typing_extensions import Generic, TypeVar + +TRunnableExperiment = TypeVar('TRunnableExperiment', bound='RunnableExperiment') + + +class Experiment(Generic[TRunnableExperiment]): + """An :class:`Experiment` represents a self-contained experimental unit which may be repeated + multiple times. :class:`Experiment`s, unlike tests, have the generation of metrics as a side effect + as their main outcome.""" + + @abstractmethod + def setup(self) -> TRunnableExperiment: + pass + + +class RunnableExperiment(ABC): + def run(self): + try: + self._run() + finally: + self.teardown() + + @abstractmethod + def _run(self): + pass + + def teardown(self): + pass diff --git a/benchmarks/experiments/static_experiment.py b/benchmarks/experiments/static_experiment.py index 9eb2afa..b9dc068 100644 --- a/benchmarks/experiments/static_experiment.py +++ b/benchmarks/experiments/static_experiment.py @@ -1,43 +1,65 @@ +from pathlib import Path + from typing_extensions import Generic, List from benchmarks.core.network import TInitialMetadata, TNetworkHandle, Node -from benchmarks.core.utils import Sampler, DataGenerator +from benchmarks.core.utils import Sampler, DataGenerator, DataHandle +from benchmarks.experiments.experiments import Experiment, RunnableExperiment, TRunnableExperiment -class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata]): +class _RunnableSDE(RunnableExperiment, Generic[TNetworkHandle, TInitialMetadata]): + def __init__( + self, + network: List[Node[TNetworkHandle, TInitialMetadata]], + seeders: List[int], + data_handle: DataHandle[TInitialMetadata], + ): + self.nodes = network + self.seeders = seeders + self.data_handle = data_handle + + def _run(self): + seeders, leechers = ( + [ + self.nodes[i] + for i in self.seeders + ], + [ + self.nodes[i] + for i in range(0, len(self.nodes)) + if i not in self.seeders + ] + ) + + handle = self.data_handle.meta + for node in seeders: + handle = node.seed(self.data_handle.data, handle) + + handles = [node.leech(handle) for node in leechers] + for handle in handles: + handle.await_for_completion() + + def teardown(self): + self.data_handle.cleanup() + + +class StaticDisseminationExperiment(Experiment[_RunnableSDE[TNetworkHandle, TInitialMetadata]]): def __init__( self, network: List[Node[TNetworkHandle, TInitialMetadata]], seeders: int, sampler: Sampler, - generator: DataGenerator + generator: DataGenerator[TInitialMetadata], ): self.nodes = network self.sampler = sampler - self.generate_data = generator + self.generator = generator self.seeders = seeders - def run(self): + def setup(self) -> _RunnableSDE[TNetworkHandle, TInitialMetadata]: sample = self.sampler(len(self.nodes)) - seeder_indexes = [next(sample) for _ in range(0, self.seeders)] - seeders, leechers = ( - [ - self.nodes[i] - for i in seeder_indexes - ], - [ - self.nodes[i] - for i in range(0, len(self.nodes)) - if i not in seeder_indexes - ] + return _RunnableSDE( + network=self.nodes, + seeders=[next(sample) for _ in range(0, self.seeders)], + data_handle=self.generator.generate() ) - - meta, data = self.generate_data() - - handle = meta - for node in seeders: - handle = node.seed(data, handle) - - handles = [node.leech(handle) for node in leechers] - for handle in handles: - handle.await_for_completion() diff --git a/benchmarks/experiments/tests/test_static_experiment.py b/benchmarks/experiments/tests/test_static_experiment.py index 4a6ca1c..92043cb 100644 --- a/benchmarks/experiments/tests/test_static_experiment.py +++ b/benchmarks/experiments/tests/test_static_experiment.py @@ -1,10 +1,10 @@ from dataclasses import dataclass from pathlib import Path -from typing import Optional, List, Tuple, Union, Sequence +from typing import Optional, List, Tuple, Union from benchmarks.core.network import Node, DownloadHandle -from benchmarks.core.utils import Sampler from benchmarks.experiments.static_experiment import StaticDisseminationExperiment +from benchmarks.experiments.tests.utils import mock_sampler, MockGenerator @dataclass @@ -13,10 +13,6 @@ class MockHandle: name: str -def mock_sampler(elements: List[int]) -> Sampler: - return lambda _: iter(elements) - - class MockNode(Node[MockHandle, str]): def __init__(self) -> None: @@ -56,48 +52,67 @@ def mock_network(n: int) -> List[MockNode]: def test_should_place_seeders(): network = mock_network(n=13) - file = Path('/path/to/data') + generator = MockGenerator(meta='data', data=Path('/path/to/data')) seeder_indexes = [9, 6, 3] experiment = StaticDisseminationExperiment( seeders=3, sampler=mock_sampler(seeder_indexes), network=network, - generator=lambda: ('data', Path('/path/to/data')), + generator=generator, ) - experiment.run() + runnable = experiment.setup() + runnable.run() actual_seeders = set() for index, node in enumerate(network): if node.seeding is not None: actual_seeders.add(index) - assert node.seeding[0] == MockHandle(name='data', path=file) + assert node.seeding[0] == MockHandle(name=generator.meta, path=generator.data) assert actual_seeders == set(seeder_indexes) def test_should_download_at_remaining_nodes(): network = mock_network(n=13) - file = Path('/path/to/data') + generator = MockGenerator(meta='data', data=Path('/path/to/data')) seeder_indexes = [9, 6, 3] experiment = StaticDisseminationExperiment( seeders=3, sampler=mock_sampler(seeder_indexes), network=network, - generator=lambda: ('data', Path('/path/to/data')), + generator=generator, ) - experiment.run() + runnable = experiment.setup() + runnable.run() actual_leechers = set() for index, node in enumerate(network): if node.leeching is not None: - assert node.leeching.path == file - assert node.leeching.name == 'data' + assert node.leeching.path == generator.data + assert node.leeching.name == generator.meta assert node.seeding is None assert node.download_was_awaited actual_leechers.add(index) assert actual_leechers == set(range(13)) - set(seeder_indexes) + +def test_should_delete_generated_file_at_end_of_experiment(): + network = mock_network(n=2) + generator = MockGenerator(meta='data', data=Path('/path/to/data')) + seeder_indexes = [1] + + experiment = StaticDisseminationExperiment( + seeders=1, + sampler=mock_sampler(seeder_indexes), + network=network, + generator=generator, + ) + + runnable = experiment.setup() + runnable.run() + + assert generator.cleanup_called \ No newline at end of file diff --git a/benchmarks/experiments/tests/utils.py b/benchmarks/experiments/tests/utils.py new file mode 100644 index 0000000..b240282 --- /dev/null +++ b/benchmarks/experiments/tests/utils.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import List + +from benchmarks.core.network import TInitialMetadata +from benchmarks.core.utils import Sampler, DataGenerator, DataHandle + + +def mock_sampler(elements: List[int]) -> Sampler: + return lambda _: iter(elements) + + +class MockGenerator(DataGenerator[TInitialMetadata]): + def __init__(self, meta: TInitialMetadata, data: Path): + self.cleanup_called = False + self.meta = meta + self.data = data + + def generate(self) -> DataHandle[TInitialMetadata]: + return MockHandle(self.meta, self.data, self) + + +class MockHandle(DataHandle[TInitialMetadata]): + def __init__(self, meta: TInitialMetadata, data: Path, parent: MockGenerator): + self.meta = meta + self.data = data + self.parent = parent + + def cleanup(self): + assert not self.parent.cleanup_called + self.parent.cleanup_called = True