add setup/teardown sequences to experiments, add generated file cleanup at end

This commit is contained in:
gmega 2024-11-25 16:08:22 -03:00
parent d2f697e2fa
commit aebd070f3e
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
6 changed files with 164 additions and 44 deletions

View File

@ -1,7 +1,6 @@
import shutil
from abc import abstractmethod, ABC
from pathlib import Path
from typing import Sequence
from typing_extensions import Generic, TypeVar, Union

View File

@ -1,12 +1,36 @@
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Iterator, Tuple
from typing_extensions import Generic
from benchmarks.core.network import TInitialMetadata
# A Sampler samples without replacement from [0, ..., n].
type Sampler = Callable[[int], Iterator[int]]
# A DataGenerator generates files for experiments.
type DataGenerator[TInitialMetadata] = Callable[[], Tuple[TInitialMetadata, Path]]
@dataclass
class DataHandle(Generic[TInitialMetadata], ABC):
"""A :class:`DataHandle` knows how to clean up data and metadata that has been generated
by a :class:`DataGenerator`."""
meta: TInitialMetadata
data: Path
def cleanup(self):
if self.data.exists():
self.data.unlink()
class DataGenerator(Generic[TInitialMetadata], ABC):
"""A :class:`DataGenerator` knows how to generate data for an :class:`Experiment`."""
@abstractmethod
def generate(self) -> DataHandle[TInitialMetadata]:
"""Generates fresh data and metadata and returns a :class:`DataHandle`."""
pass
def sample(n: int) -> Iterator[int]:

View File

@ -0,0 +1,30 @@
from abc import ABC, abstractmethod
from typing_extensions import Generic, TypeVar
TRunnableExperiment = TypeVar('TRunnableExperiment', bound='RunnableExperiment')
class Experiment(Generic[TRunnableExperiment]):
"""An :class:`Experiment` represents a self-contained experimental unit which may be repeated
multiple times. :class:`Experiment`s, unlike tests, have the generation of metrics as a side effect
as their main outcome."""
@abstractmethod
def setup(self) -> TRunnableExperiment:
pass
class RunnableExperiment(ABC):
def run(self):
try:
self._run()
finally:
self.teardown()
@abstractmethod
def _run(self):
pass
def teardown(self):
pass

View File

@ -1,43 +1,65 @@
from pathlib import Path
from typing_extensions import Generic, List
from benchmarks.core.network import TInitialMetadata, TNetworkHandle, Node
from benchmarks.core.utils import Sampler, DataGenerator
from benchmarks.core.utils import Sampler, DataGenerator, DataHandle
from benchmarks.experiments.experiments import Experiment, RunnableExperiment, TRunnableExperiment
class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata]):
class _RunnableSDE(RunnableExperiment, Generic[TNetworkHandle, TInitialMetadata]):
def __init__(
self,
network: List[Node[TNetworkHandle, TInitialMetadata]],
seeders: List[int],
data_handle: DataHandle[TInitialMetadata],
):
self.nodes = network
self.seeders = seeders
self.data_handle = data_handle
def _run(self):
seeders, leechers = (
[
self.nodes[i]
for i in self.seeders
],
[
self.nodes[i]
for i in range(0, len(self.nodes))
if i not in self.seeders
]
)
handle = self.data_handle.meta
for node in seeders:
handle = node.seed(self.data_handle.data, handle)
handles = [node.leech(handle) for node in leechers]
for handle in handles:
handle.await_for_completion()
def teardown(self):
self.data_handle.cleanup()
class StaticDisseminationExperiment(Experiment[_RunnableSDE[TNetworkHandle, TInitialMetadata]]):
def __init__(
self,
network: List[Node[TNetworkHandle, TInitialMetadata]],
seeders: int,
sampler: Sampler,
generator: DataGenerator
generator: DataGenerator[TInitialMetadata],
):
self.nodes = network
self.sampler = sampler
self.generate_data = generator
self.generator = generator
self.seeders = seeders
def run(self):
def setup(self) -> _RunnableSDE[TNetworkHandle, TInitialMetadata]:
sample = self.sampler(len(self.nodes))
seeder_indexes = [next(sample) for _ in range(0, self.seeders)]
seeders, leechers = (
[
self.nodes[i]
for i in seeder_indexes
],
[
self.nodes[i]
for i in range(0, len(self.nodes))
if i not in seeder_indexes
]
return _RunnableSDE(
network=self.nodes,
seeders=[next(sample) for _ in range(0, self.seeders)],
data_handle=self.generator.generate()
)
meta, data = self.generate_data()
handle = meta
for node in seeders:
handle = node.seed(data, handle)
handles = [node.leech(handle) for node in leechers]
for handle in handles:
handle.await_for_completion()

View File

@ -1,10 +1,10 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, List, Tuple, Union, Sequence
from typing import Optional, List, Tuple, Union
from benchmarks.core.network import Node, DownloadHandle
from benchmarks.core.utils import Sampler
from benchmarks.experiments.static_experiment import StaticDisseminationExperiment
from benchmarks.experiments.tests.utils import mock_sampler, MockGenerator
@dataclass
@ -13,10 +13,6 @@ class MockHandle:
name: str
def mock_sampler(elements: List[int]) -> Sampler:
return lambda _: iter(elements)
class MockNode(Node[MockHandle, str]):
def __init__(self) -> None:
@ -56,48 +52,67 @@ def mock_network(n: int) -> List[MockNode]:
def test_should_place_seeders():
network = mock_network(n=13)
file = Path('/path/to/data')
generator = MockGenerator(meta='data', data=Path('/path/to/data'))
seeder_indexes = [9, 6, 3]
experiment = StaticDisseminationExperiment(
seeders=3,
sampler=mock_sampler(seeder_indexes),
network=network,
generator=lambda: ('data', Path('/path/to/data')),
generator=generator,
)
experiment.run()
runnable = experiment.setup()
runnable.run()
actual_seeders = set()
for index, node in enumerate(network):
if node.seeding is not None:
actual_seeders.add(index)
assert node.seeding[0] == MockHandle(name='data', path=file)
assert node.seeding[0] == MockHandle(name=generator.meta, path=generator.data)
assert actual_seeders == set(seeder_indexes)
def test_should_download_at_remaining_nodes():
network = mock_network(n=13)
file = Path('/path/to/data')
generator = MockGenerator(meta='data', data=Path('/path/to/data'))
seeder_indexes = [9, 6, 3]
experiment = StaticDisseminationExperiment(
seeders=3,
sampler=mock_sampler(seeder_indexes),
network=network,
generator=lambda: ('data', Path('/path/to/data')),
generator=generator,
)
experiment.run()
runnable = experiment.setup()
runnable.run()
actual_leechers = set()
for index, node in enumerate(network):
if node.leeching is not None:
assert node.leeching.path == file
assert node.leeching.name == 'data'
assert node.leeching.path == generator.data
assert node.leeching.name == generator.meta
assert node.seeding is None
assert node.download_was_awaited
actual_leechers.add(index)
assert actual_leechers == set(range(13)) - set(seeder_indexes)
def test_should_delete_generated_file_at_end_of_experiment():
network = mock_network(n=2)
generator = MockGenerator(meta='data', data=Path('/path/to/data'))
seeder_indexes = [1]
experiment = StaticDisseminationExperiment(
seeders=1,
sampler=mock_sampler(seeder_indexes),
network=network,
generator=generator,
)
runnable = experiment.setup()
runnable.run()
assert generator.cleanup_called

View File

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List
from benchmarks.core.network import TInitialMetadata
from benchmarks.core.utils import Sampler, DataGenerator, DataHandle
def mock_sampler(elements: List[int]) -> Sampler:
return lambda _: iter(elements)
class MockGenerator(DataGenerator[TInitialMetadata]):
def __init__(self, meta: TInitialMetadata, data: Path):
self.cleanup_called = False
self.meta = meta
self.data = data
def generate(self) -> DataHandle[TInitialMetadata]:
return MockHandle(self.meta, self.data, self)
class MockHandle(DataHandle[TInitialMetadata]):
def __init__(self, meta: TInitialMetadata, data: Path, parent: MockGenerator):
self.meta = meta
self.data = data
self.parent = parent
def cleanup(self):
assert not self.parent.cleanup_called
self.parent.cleanup_called = True