mirror of
https://github.com/codex-storage/bittorrent-benchmarks.git
synced 2025-01-10 03:15:48 +00:00
standardize logging; add experiment log parser
This commit is contained in:
parent
2c17527954
commit
f68a3e714a
2
.gitignore
vendored
2
.gitignore
vendored
@ -5,3 +5,5 @@
|
|||||||
.RData
|
.RData
|
||||||
.Rhistory
|
.Rhistory
|
||||||
.Rproj.user
|
.Rproj.user
|
||||||
|
*.Rproj
|
||||||
|
*.nb.html
|
||||||
|
@ -1,17 +1,23 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
from typing import Dict, Iterable
|
||||||
|
|
||||||
|
from mypy.memprofile import defaultdict
|
||||||
from pydantic_core import ValidationError
|
from pydantic_core import ValidationError
|
||||||
|
|
||||||
from benchmarks.core.config import ConfigParser, ExperimentBuilder
|
from benchmarks.core.config import ConfigParser, ExperimentBuilder
|
||||||
from benchmarks.core.experiments.experiments import Experiment
|
from benchmarks.core.experiments.experiments import Experiment
|
||||||
|
from benchmarks.core.logging import basic_log_parser, LogEntry, LogSplitter
|
||||||
from benchmarks.deluge.config import DelugeExperimentConfig
|
from benchmarks.deluge.config import DelugeExperimentConfig
|
||||||
|
from benchmarks.deluge.logging import DelugeTorrentDownload
|
||||||
|
|
||||||
config_parser = ConfigParser()
|
config_parser = ConfigParser()
|
||||||
config_parser.register(DelugeExperimentConfig)
|
config_parser.register(DelugeExperimentConfig)
|
||||||
|
|
||||||
|
log_parser = basic_log_parser()
|
||||||
|
log_parser.register(DelugeTorrentDownload)
|
||||||
|
|
||||||
|
|
||||||
def cmd_list(experiments: Dict[str, ExperimentBuilder[Experiment]], _):
|
def cmd_list(experiments: Dict[str, ExperimentBuilder[Experiment]], _):
|
||||||
print(f'Available experiments are:')
|
print(f'Available experiments are:')
|
||||||
@ -36,6 +42,25 @@ def cmd_describe(args):
|
|||||||
print(config_parser.experiment_types[args.type].schema_json(indent=2))
|
print(config_parser.experiment_types[args.type].schema_json(indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_logs(log: Path, output: Path):
|
||||||
|
if not log.exists():
|
||||||
|
print(f'Log file {log} does not exist.')
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if not output.parent.exists():
|
||||||
|
print(f'Folder {output.parent} does not exist.')
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
output.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
def output_factory(event_type: str):
|
||||||
|
return (output / f'{event_type}.csv').open('w', encoding='utf-8')
|
||||||
|
|
||||||
|
with (log.open('r', encoding='utf-8') as istream,
|
||||||
|
LogSplitter(output_factory) as splitter):
|
||||||
|
splitter.split(log_parser.parse(istream))
|
||||||
|
|
||||||
|
|
||||||
def _parse_config(config: Path) -> Dict[str, ExperimentBuilder[Experiment]]:
|
def _parse_config(config: Path) -> Dict[str, ExperimentBuilder[Experiment]]:
|
||||||
if not config.exists():
|
if not config.exists():
|
||||||
print(f'Config file {config} does not exist.')
|
print(f'Config file {config} does not exist.')
|
||||||
@ -82,6 +107,11 @@ def main():
|
|||||||
|
|
||||||
describe.set_defaults(func=cmd_describe)
|
describe.set_defaults(func=cmd_describe)
|
||||||
|
|
||||||
|
logs = commands.add_parser('logs', help='Parse logs.')
|
||||||
|
logs.add_argument('log', type=Path, help='Path to the log file.')
|
||||||
|
logs.add_argument('output_dir', type=Path, help='Path to an output folder.')
|
||||||
|
logs.set_defaults(func=lambda args: cmd_logs(args.log, args.output_dir))
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
_init_logging()
|
_init_logging()
|
||||||
|
@ -1 +1,2 @@
|
|||||||
from benchmarks.deluge.tests.fixtures import *
|
from benchmarks.deluge.tests.fixtures import *
|
||||||
|
from benchmarks.core.tests.fixtures import *
|
||||||
|
@ -1,48 +1,17 @@
|
|||||||
"""Basic utilities for structuring experiment configurations based on Pydantic schemas."""
|
"""Basic utilities for structuring experiment configurations based on Pydantic schemas."""
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from io import TextIOBase
|
from io import TextIOBase
|
||||||
from typing import Annotated, Type, Dict, TextIO, Callable, cast
|
from typing import Type, Dict, TextIO, Callable, cast
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel, IPvAnyAddress, AfterValidator, TypeAdapter
|
|
||||||
from typing_extensions import Generic, overload
|
from typing_extensions import Generic, overload
|
||||||
|
|
||||||
from benchmarks.core.experiments.experiments import TExperiment, Experiment
|
from benchmarks.core.experiments.experiments import TExperiment
|
||||||
|
from benchmarks.core.pydantic import SnakeCaseModel
|
||||||
|
|
||||||
|
|
||||||
def drop_config_suffix(name: str) -> str:
|
class ExperimentBuilder(SnakeCaseModel, Generic[TExperiment]):
|
||||||
return name[:-6] if name.endswith('Config') else name
|
|
||||||
|
|
||||||
|
|
||||||
def to_snake_case(name: str) -> str:
|
|
||||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
|
||||||
|
|
||||||
|
|
||||||
class ConfigModel(BaseModel):
|
|
||||||
model_config = {
|
|
||||||
'alias_generator': lambda x: to_snake_case(drop_config_suffix(x))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# This is a simple regex which is not by any means exhaustive but should cover gross syntax errors.
|
|
||||||
VALID_DOMAIN_NAME = re.compile(r"^localhost$|^(?!-)([A-Za-z0-9-]+\.)+[A-Za-z]{2,6}$")
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_domain_name(domain_name: str):
|
|
||||||
stripped = domain_name.strip()
|
|
||||||
matches = VALID_DOMAIN_NAME.match(stripped)
|
|
||||||
assert matches is not None
|
|
||||||
return stripped
|
|
||||||
|
|
||||||
|
|
||||||
DomainName = Annotated[str, AfterValidator(is_valid_domain_name)]
|
|
||||||
|
|
||||||
type Host = IPvAnyAddress | DomainName
|
|
||||||
|
|
||||||
|
|
||||||
class ExperimentBuilder(ConfigModel, Generic[TExperiment]):
|
|
||||||
""":class:`ExperimentBuilders` can build real :class:`Experiment`s out of :class:`ConfigModel`s. """
|
""":class:`ExperimentBuilders` can build real :class:`Experiment`s out of :class:`ConfigModel`s. """
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@ -60,10 +29,7 @@ class ConfigParser:
|
|||||||
self.experiment_types = {}
|
self.experiment_types = {}
|
||||||
|
|
||||||
def register(self, root: Type[ExperimentBuilder[TExperiment]]):
|
def register(self, root: Type[ExperimentBuilder[TExperiment]]):
|
||||||
name = root.__name__
|
self.experiment_types[root.alias()] = root
|
||||||
alias = cast(Callable[[str], str],
|
|
||||||
root.model_config.get('alias_generator', lambda x: x))(name)
|
|
||||||
self.experiment_types[alias] = root
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def parse(self, data: dict) -> Dict[str, ExperimentBuilder[TExperiment]]:
|
def parse(self, data: dict) -> Dict[str, ExperimentBuilder[TExperiment]]:
|
||||||
|
@ -59,7 +59,6 @@ class ExperimentEnvironment:
|
|||||||
sleep(self.polling_interval)
|
sleep(self.polling_interval)
|
||||||
|
|
||||||
if (timeout != 0) and (time() - start_time > timeout):
|
if (timeout != 0) and (time() - start_time > timeout):
|
||||||
print("timeout")
|
|
||||||
logger.info(f'Some components timed out: {self._component_names(not_ready)}')
|
logger.info(f'Some components timed out: {self._component_names(not_ready)}')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
import logging
|
import logging
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from typing import Sequence
|
from typing import Sequence, Optional
|
||||||
|
|
||||||
from typing_extensions import Generic, List, Tuple
|
from typing_extensions import Generic, List, Tuple
|
||||||
|
|
||||||
from benchmarks.core.experiments.experiments import Experiment
|
from benchmarks.core.experiments.experiments import Experiment
|
||||||
|
from benchmarks.core.logging import RequestEvent, RequestEventType
|
||||||
from benchmarks.core.network import TInitialMetadata, TNetworkHandle, Node, DownloadHandle
|
from benchmarks.core.network import TInitialMetadata, TNetworkHandle, Node, DownloadHandle
|
||||||
from benchmarks.core.utils import ExperimentData
|
from benchmarks.core.utils import ExperimentData
|
||||||
|
|
||||||
@ -17,11 +18,12 @@ class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata], E
|
|||||||
network: Sequence[Node[TNetworkHandle, TInitialMetadata]],
|
network: Sequence[Node[TNetworkHandle, TInitialMetadata]],
|
||||||
seeders: List[int],
|
seeders: List[int],
|
||||||
data: ExperimentData[TInitialMetadata],
|
data: ExperimentData[TInitialMetadata],
|
||||||
|
concurrency: Optional[int] = None
|
||||||
):
|
):
|
||||||
self.nodes = network
|
self.nodes = network
|
||||||
self.seeders = seeders
|
self.seeders = seeders
|
||||||
self.data = data
|
self.data = data
|
||||||
self._pool = ThreadPool(processes=len(network) - len(seeders))
|
self._pool = ThreadPool(processes=len(network) - len(seeders) if concurrency is None else concurrency)
|
||||||
|
|
||||||
def run(self, run: int = 0):
|
def run(self, run: int = 0):
|
||||||
seeders, leechers = self._split_nodes()
|
seeders, leechers = self._split_nodes()
|
||||||
@ -32,13 +34,45 @@ class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata], E
|
|||||||
with self.data as (meta, data):
|
with self.data as (meta, data):
|
||||||
cid = None
|
cid = None
|
||||||
for node in seeders:
|
for node in seeders:
|
||||||
logger.info(f'Seeding data: {str(node)}')
|
logger.info(RequestEvent(
|
||||||
|
node='runner',
|
||||||
|
destination=node.name,
|
||||||
|
name='seed',
|
||||||
|
request_id=str(meta),
|
||||||
|
type=RequestEventType.start
|
||||||
|
))
|
||||||
cid = node.seed(data, meta if cid is None else cid)
|
cid = node.seed(data, meta if cid is None else cid)
|
||||||
|
logger.info(RequestEvent(
|
||||||
|
node='runner',
|
||||||
|
destination=node.name,
|
||||||
|
name='seed',
|
||||||
|
request_id=str(meta),
|
||||||
|
type=RequestEventType.end
|
||||||
|
))
|
||||||
|
|
||||||
assert cid is not None # to please mypy
|
assert cid is not None # to please mypy
|
||||||
|
|
||||||
logger.info(f'Setting up leechers: {[str(leecher) for leecher in leechers]}')
|
logger.info(f'Setting up leechers: {[str(leecher) for leecher in leechers]}')
|
||||||
downloads = list(self._pool.imap_unordered(lambda leecher: leecher.leech(cid), leechers))
|
|
||||||
|
def _leech(leecher):
|
||||||
|
logger.info(RequestEvent(
|
||||||
|
node='runner',
|
||||||
|
destination=leecher.name,
|
||||||
|
name='leech',
|
||||||
|
request_id=str(meta),
|
||||||
|
type=RequestEventType.start
|
||||||
|
))
|
||||||
|
download = leecher.leech(cid)
|
||||||
|
logger.info(RequestEvent(
|
||||||
|
node='runner',
|
||||||
|
destination=leecher.name,
|
||||||
|
name='leech',
|
||||||
|
request_id=str(meta),
|
||||||
|
type=RequestEventType.end
|
||||||
|
))
|
||||||
|
return download
|
||||||
|
|
||||||
|
downloads = list(self._pool.imap_unordered(_leech, leechers))
|
||||||
|
|
||||||
logger.info('Now waiting for downloads to complete')
|
logger.info('Now waiting for downloads to complete')
|
||||||
|
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, List, Tuple, Union
|
from typing import Optional, List, Tuple, Union
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
||||||
from benchmarks.core.experiments.tests.utils import MockExperimentData
|
from benchmarks.core.experiments.tests.utils import MockExperimentData
|
||||||
|
from benchmarks.core.logging import LogParser, RequestEvent, RequestEventType
|
||||||
from benchmarks.core.network import Node, DownloadHandle
|
from benchmarks.core.network import Node, DownloadHandle
|
||||||
|
|
||||||
|
|
||||||
@ -12,17 +15,21 @@ class MockHandle:
|
|||||||
path: Path
|
path: Path
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
class MockNode(Node[MockHandle, str]):
|
class MockNode(Node[MockHandle, str]):
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, name='mock_node') -> None:
|
||||||
|
self._name = name
|
||||||
self.seeding: Optional[Tuple[MockHandle, Path]] = None
|
self.seeding: Optional[Tuple[MockHandle, Path]] = None
|
||||||
self.leeching: Optional[MockHandle] = None
|
self.leeching: Optional[MockHandle] = None
|
||||||
self.download_was_awaited = False
|
self.download_was_awaited = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return 'mock_node'
|
return self._name
|
||||||
|
|
||||||
def seed(
|
def seed(
|
||||||
self,
|
self,
|
||||||
@ -53,7 +60,7 @@ class MockDownloadHandle(DownloadHandle):
|
|||||||
|
|
||||||
|
|
||||||
def mock_network(n: int) -> List[MockNode]:
|
def mock_network(n: int) -> List[MockNode]:
|
||||||
return [MockNode() for _ in range(n)]
|
return [MockNode(f'node-{i}') for i in range(n)]
|
||||||
|
|
||||||
|
|
||||||
def test_should_place_seeders():
|
def test_should_place_seeders():
|
||||||
@ -117,3 +124,76 @@ def test_should_delete_generated_file_at_end_of_experiment():
|
|||||||
experiment.run()
|
experiment.run()
|
||||||
|
|
||||||
assert data.cleanup_called
|
assert data.cleanup_called
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_log_requests_to_seeders_and_leechers(mock_logger):
|
||||||
|
logger, output = mock_logger
|
||||||
|
with patch('benchmarks.core.experiments.static_experiment.logger', logger):
|
||||||
|
network = mock_network(n=3)
|
||||||
|
data = MockExperimentData(meta='dataset-1', data=Path('/path/to/data'))
|
||||||
|
seeders = [1]
|
||||||
|
|
||||||
|
experiment = StaticDisseminationExperiment(
|
||||||
|
seeders=seeders,
|
||||||
|
network=network,
|
||||||
|
data=data,
|
||||||
|
concurrency=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
experiment.run()
|
||||||
|
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(RequestEvent)
|
||||||
|
|
||||||
|
events = list(parser.parse(StringIO(output.getvalue())))
|
||||||
|
|
||||||
|
assert events == [
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-1',
|
||||||
|
node='runner',
|
||||||
|
name='seed',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.start,
|
||||||
|
timestamp=events[0].timestamp,
|
||||||
|
),
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-1',
|
||||||
|
node='runner',
|
||||||
|
name='seed',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.end,
|
||||||
|
timestamp=events[1].timestamp,
|
||||||
|
),
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-0',
|
||||||
|
node='runner',
|
||||||
|
name='leech',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.start,
|
||||||
|
timestamp=events[2].timestamp,
|
||||||
|
),
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-0',
|
||||||
|
node='runner',
|
||||||
|
name='leech',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.end,
|
||||||
|
timestamp=events[3].timestamp,
|
||||||
|
),
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-2',
|
||||||
|
node='runner',
|
||||||
|
name='leech',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.start,
|
||||||
|
timestamp=events[4].timestamp,
|
||||||
|
),
|
||||||
|
RequestEvent(
|
||||||
|
destination='node-2',
|
||||||
|
node='runner',
|
||||||
|
name='leech',
|
||||||
|
request_id='dataset-1',
|
||||||
|
type=RequestEventType.end,
|
||||||
|
timestamp=events[5].timestamp,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
124
benchmarks/core/logging.py
Normal file
124
benchmarks/core/logging.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from csv import DictWriter
|
||||||
|
from enum import Enum
|
||||||
|
from json import JSONDecodeError
|
||||||
|
from typing import Type, TextIO, Iterable, Callable, Dict, Tuple
|
||||||
|
|
||||||
|
from pydantic import ValidationError, computed_field, Field
|
||||||
|
|
||||||
|
from benchmarks.core.pydantic import SnakeCaseModel
|
||||||
|
|
||||||
|
MARKER = '>>'
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LogEntry(SnakeCaseModel):
|
||||||
|
def __str__(self):
|
||||||
|
return f"{MARKER}{self.model_dump_json()}"
|
||||||
|
|
||||||
|
@computed_field # type: ignore
|
||||||
|
@property
|
||||||
|
def entry_type(self) -> str:
|
||||||
|
return self.alias()
|
||||||
|
|
||||||
|
|
||||||
|
class LogParser:
|
||||||
|
def __init__(self):
|
||||||
|
self.entry_types = {}
|
||||||
|
self.warn_counts = 10
|
||||||
|
|
||||||
|
def register(self, entry_type: Type[SnakeCaseModel]):
|
||||||
|
self.entry_types[entry_type.alias()] = entry_type
|
||||||
|
|
||||||
|
def parse(self, log: TextIO) -> Iterable[LogEntry]:
|
||||||
|
marker_len = len(MARKER)
|
||||||
|
for line in log:
|
||||||
|
index = line.find(MARKER)
|
||||||
|
if index == -1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
type_tag = '' # just to calm down mypy
|
||||||
|
try:
|
||||||
|
# Should probably test this against a regex for the type tag to see which is faster.
|
||||||
|
json_line = json.loads(line[index + marker_len:])
|
||||||
|
type_tag = json_line.get('entry_type')
|
||||||
|
if not type_tag or (type_tag not in self.entry_types):
|
||||||
|
continue
|
||||||
|
yield self.entry_types[type_tag].model_validate(json_line)
|
||||||
|
except JSONDecodeError:
|
||||||
|
pass
|
||||||
|
except ValidationError as err:
|
||||||
|
# This is usually something we want to know about, as if the message has a type_tag
|
||||||
|
# that we know, then we should probably be able to parse it.
|
||||||
|
self.warn_counts -= 1 # avoid flooding everything with warnings
|
||||||
|
if self.warn_counts > 0:
|
||||||
|
logger.warning(f"Schema failed for line with known type tag {type_tag}: {err}")
|
||||||
|
elif self.warn_counts == 0:
|
||||||
|
logger.warning("Too many errors: suppressing further schema warnings.")
|
||||||
|
|
||||||
|
|
||||||
|
class LogSplitter:
|
||||||
|
def __init__(self, output_factory=Callable[[str], TextIO], output_entry_type=False) -> None:
|
||||||
|
self.output_factory = output_factory
|
||||||
|
self.dump = (
|
||||||
|
(lambda model: model.model_dump())
|
||||||
|
if output_entry_type
|
||||||
|
else (lambda model: model.model_dump(exclude={'entry_type'}))
|
||||||
|
)
|
||||||
|
|
||||||
|
self.outputs: Dict[str, Tuple[DictWriter, TextIO]] = {}
|
||||||
|
|
||||||
|
def split(self, log: Iterable[LogEntry]):
|
||||||
|
for entry in log:
|
||||||
|
writer, _ = self.outputs.get(entry.entry_type, (None, None))
|
||||||
|
entry_dict = self.dump(entry)
|
||||||
|
|
||||||
|
if writer is None:
|
||||||
|
output = self.output_factory(entry.entry_type)
|
||||||
|
writer = DictWriter(output, fieldnames=entry_dict.keys())
|
||||||
|
self.outputs[entry.entry_type] = writer, output
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
writer.writerow(entry_dict)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
for _, output in self.outputs.values():
|
||||||
|
output.close()
|
||||||
|
|
||||||
|
|
||||||
|
type NodeId = str
|
||||||
|
|
||||||
|
|
||||||
|
class Event(LogEntry):
|
||||||
|
node: NodeId
|
||||||
|
name: str # XXX this ends up being redundant for custom event schemas... need to think of a better solution.
|
||||||
|
timestamp: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.UTC))
|
||||||
|
|
||||||
|
|
||||||
|
class Metric(Event):
|
||||||
|
value: int | float
|
||||||
|
|
||||||
|
|
||||||
|
class RequestEventType(Enum):
|
||||||
|
start = 'start'
|
||||||
|
end = 'end'
|
||||||
|
|
||||||
|
|
||||||
|
class RequestEvent(Event):
|
||||||
|
destination: NodeId
|
||||||
|
request_id: str
|
||||||
|
type: RequestEventType
|
||||||
|
|
||||||
|
|
||||||
|
def basic_log_parser() -> LogParser:
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(Event)
|
||||||
|
parser.register(Metric)
|
||||||
|
parser.register(RequestEvent)
|
||||||
|
return parser
|
38
benchmarks/core/pydantic.py
Normal file
38
benchmarks/core/pydantic.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
from pydantic import BaseModel, AfterValidator, IPvAnyAddress
|
||||||
|
|
||||||
|
|
||||||
|
def drop_config_suffix(name: str) -> str:
|
||||||
|
return name[:-6] if name.endswith('Config') else name
|
||||||
|
|
||||||
|
|
||||||
|
def to_snake_case(name: str) -> str:
|
||||||
|
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
||||||
|
|
||||||
|
|
||||||
|
class SnakeCaseModel(BaseModel):
|
||||||
|
model_config = {
|
||||||
|
'alias_generator': lambda x: to_snake_case(drop_config_suffix(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def alias(cls):
|
||||||
|
return cls.model_config['alias_generator'](cls.__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# This is a simple regex which is not by any means exhaustive but should cover gross syntax errors.
|
||||||
|
VALID_DOMAIN_NAME = re.compile(r"^localhost$|^(?!-)([A-Za-z0-9-]+\.)+[A-Za-z]{2,6}$")
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_domain_name(domain_name: str):
|
||||||
|
stripped = domain_name.strip()
|
||||||
|
matches = VALID_DOMAIN_NAME.match(stripped)
|
||||||
|
assert matches is not None
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
|
DomainName = Annotated[str, AfterValidator(is_valid_domain_name)]
|
||||||
|
|
||||||
|
type Host = IPvAnyAddress | DomainName
|
21
benchmarks/core/tests/fixtures.py
Normal file
21
benchmarks/core/tests/fixtures.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import logging
|
||||||
|
from io import StringIO
|
||||||
|
from typing import Tuple, Generator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_logger() -> Generator[Tuple[logging.Logger, StringIO], None, None]:
|
||||||
|
output = StringIO()
|
||||||
|
logger = logging.getLogger('test_logger')
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
for handler in logger.handlers:
|
||||||
|
logger.removeHandler(handler)
|
||||||
|
handler = logging.StreamHandler(output)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
yield logger, output
|
||||||
|
|
||||||
|
logger.removeHandler(handler)
|
||||||
|
handler.close()
|
@ -1,63 +1,18 @@
|
|||||||
import os
|
import os
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from ipaddress import IPv4Address, IPv6Address
|
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
import pytest
|
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import ValidationError, TypeAdapter
|
|
||||||
|
|
||||||
from benchmarks.core.config import Host, DomainName, ConfigParser, ConfigModel
|
from benchmarks.core.config import ConfigParser
|
||||||
|
from benchmarks.core.pydantic import SnakeCaseModel
|
||||||
|
|
||||||
|
|
||||||
def test_should_parse_ipv4_address():
|
class Root1(SnakeCaseModel):
|
||||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
|
||||||
assert h == IPv4Address('192.168.1.1')
|
|
||||||
|
|
||||||
|
|
||||||
def test_should_parse_ipv6_address():
|
|
||||||
h = TypeAdapter(Host).validate_strings('2001:0000:130F:0000:0000:09C0:876A:130B')
|
|
||||||
assert h == IPv6Address('2001:0000:130F:0000:0000:09C0:876A:130B')
|
|
||||||
|
|
||||||
|
|
||||||
def test_should_parse_simple_dns_names():
|
|
||||||
h = TypeAdapter(Host).validate_strings('node-1.local.svc')
|
|
||||||
assert h == DomainName('node-1.local.svc')
|
|
||||||
|
|
||||||
|
|
||||||
def test_should_parse_localhost():
|
|
||||||
h = TypeAdapter(Host).validate_strings('localhost')
|
|
||||||
assert h == DomainName('localhost')
|
|
||||||
|
|
||||||
|
|
||||||
def test_should_return_correct_string_representation_for_addresses():
|
|
||||||
h = TypeAdapter(Host).validate_strings('localhost')
|
|
||||||
assert h == DomainName('localhost')
|
|
||||||
|
|
||||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
|
||||||
assert h == IPv4Address('192.168.1.1')
|
|
||||||
|
|
||||||
|
|
||||||
def test_should_fail_invalid_names():
|
|
||||||
invalid_names = [
|
|
||||||
'-node-1.local.svc',
|
|
||||||
'node-1.local..svc',
|
|
||||||
'node-1.local.svc.',
|
|
||||||
'node-1.local.reallylongsubdomain',
|
|
||||||
'node-1.local.s-dash',
|
|
||||||
'notlocalhost',
|
|
||||||
]
|
|
||||||
|
|
||||||
for invalid_name in invalid_names:
|
|
||||||
with pytest.raises(ValidationError):
|
|
||||||
TypeAdapter(Host).validate_strings(invalid_name)
|
|
||||||
|
|
||||||
|
|
||||||
class Root1(ConfigModel):
|
|
||||||
index: int
|
index: int
|
||||||
|
|
||||||
|
|
||||||
class Root2(ConfigModel):
|
class Root2(SnakeCaseModel):
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
168
benchmarks/core/tests/test_logging.py
Normal file
168
benchmarks/core/tests/test_logging.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
from benchmarks.core.logging import LogEntry, LogParser, LogSplitter
|
||||||
|
from benchmarks.tests.utils import compact
|
||||||
|
|
||||||
|
|
||||||
|
class MetricsEvent(LogEntry):
|
||||||
|
name: str
|
||||||
|
timestamp: datetime.datetime
|
||||||
|
value: float
|
||||||
|
node: str
|
||||||
|
|
||||||
|
|
||||||
|
def test_log_entry_should_serialize_to_expected_format():
|
||||||
|
event = MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.245,
|
||||||
|
node='node1',
|
||||||
|
)
|
||||||
|
|
||||||
|
assert str(
|
||||||
|
event) == ('>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,'
|
||||||
|
'"node":"node1","entry_type":"metrics_event"}')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_parse_logs():
|
||||||
|
log = StringIO("""
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node1","entry_type":"metrics_event"}
|
||||||
|
[some garbage introduced by the log formatter: bla bla bla] -:>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||||
|
""")
|
||||||
|
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(MetricsEvent)
|
||||||
|
|
||||||
|
assert list(parser.parse(log)) == [
|
||||||
|
MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.245,
|
||||||
|
node='node1',
|
||||||
|
),
|
||||||
|
MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.246,
|
||||||
|
node='node2',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_skip_unparseable_lines():
|
||||||
|
log = StringIO("""
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node0","entry_type":"metrics_event"
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node5","entry_type":"metcs_event"}
|
||||||
|
some random gibberish
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node3","entry_type":"metrics_event"}
|
||||||
|
""")
|
||||||
|
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(MetricsEvent)
|
||||||
|
|
||||||
|
assert list(parser.parse(log)) == [
|
||||||
|
MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.246,
|
||||||
|
node='node2',
|
||||||
|
),
|
||||||
|
MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.246,
|
||||||
|
node='node3',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class StateChangeEvent(LogEntry):
|
||||||
|
old: str
|
||||||
|
new: str
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_log_events_correctly(mock_logger):
|
||||||
|
logger, output = mock_logger
|
||||||
|
|
||||||
|
events = [
|
||||||
|
StateChangeEvent(old='stopped', new='started'),
|
||||||
|
MetricsEvent(
|
||||||
|
name='download',
|
||||||
|
timestamp=datetime.datetime(
|
||||||
|
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||||
|
),
|
||||||
|
value=0.246,
|
||||||
|
node='node3',
|
||||||
|
),
|
||||||
|
StateChangeEvent(old='started', new='stopped'),
|
||||||
|
]
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
logger.info(event)
|
||||||
|
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(MetricsEvent)
|
||||||
|
parser.register(StateChangeEvent)
|
||||||
|
|
||||||
|
assert list(parser.parse(StringIO(output.getvalue()))) == events
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleEvent(LogEntry):
|
||||||
|
name: str
|
||||||
|
timestamp: datetime.datetime
|
||||||
|
|
||||||
|
|
||||||
|
class Person(LogEntry):
|
||||||
|
name: str
|
||||||
|
surname: str
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_split_intertwined_logs_by_entry_type():
|
||||||
|
log = StringIO("""
|
||||||
|
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||||
|
>>{"name":"start","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
|
||||||
|
>>{"name":"John","surname":"Doe","timestamp":"2021-01-01T00:00:00Z","entry_type":"person"}
|
||||||
|
>>{"name":"start2","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
|
||||||
|
""")
|
||||||
|
|
||||||
|
parser = LogParser()
|
||||||
|
parser.register(MetricsEvent)
|
||||||
|
parser.register(SimpleEvent)
|
||||||
|
parser.register(Person)
|
||||||
|
|
||||||
|
outputs = defaultdict(StringIO)
|
||||||
|
|
||||||
|
splitter = LogSplitter(
|
||||||
|
output_factory=lambda entry_type: outputs[entry_type],
|
||||||
|
)
|
||||||
|
|
||||||
|
splitter.split(parser.parse(log))
|
||||||
|
|
||||||
|
assert compact(outputs['metrics_event'].getvalue()) == (compact("""
|
||||||
|
name,timestamp,value,node
|
||||||
|
download,2021-01-01 00:00:00+00:00,0.246,node2
|
||||||
|
"""))
|
||||||
|
|
||||||
|
assert compact(outputs['simple_event'].getvalue()) == (compact("""
|
||||||
|
name,timestamp
|
||||||
|
start,2021-01-01 00:00:00+00:00
|
||||||
|
start2,2021-01-01 00:00:00+00:00
|
||||||
|
"""))
|
||||||
|
|
||||||
|
assert compact(outputs['person'].getvalue()) == (compact("""
|
||||||
|
name,surname
|
||||||
|
John,Doe
|
||||||
|
"""))
|
49
benchmarks/core/tests/test_pydantic.py
Normal file
49
benchmarks/core/tests/test_pydantic.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
from ipaddress import IPv4Address, IPv6Address
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from pydantic import TypeAdapter, ValidationError
|
||||||
|
|
||||||
|
from benchmarks.core.pydantic import DomainName, Host
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_parse_ipv4_address():
|
||||||
|
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||||
|
assert h == IPv4Address('192.168.1.1')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_parse_ipv6_address():
|
||||||
|
h = TypeAdapter(Host).validate_strings('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||||
|
assert h == IPv6Address('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_parse_simple_dns_names():
|
||||||
|
h = TypeAdapter(Host).validate_strings('node-1.local.svc')
|
||||||
|
assert h == DomainName('node-1.local.svc')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_parse_localhost():
|
||||||
|
h = TypeAdapter(Host).validate_strings('localhost')
|
||||||
|
assert h == DomainName('localhost')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_return_correct_string_representation_for_addresses():
|
||||||
|
h = TypeAdapter(Host).validate_strings('localhost')
|
||||||
|
assert h == DomainName('localhost')
|
||||||
|
|
||||||
|
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||||
|
assert h == IPv4Address('192.168.1.1')
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_fail_invalid_names():
|
||||||
|
invalid_names = [
|
||||||
|
'-node-1.local.svc',
|
||||||
|
'node-1.local..svc',
|
||||||
|
'node-1.local.svc.',
|
||||||
|
'node-1.local.reallylongsubdomain',
|
||||||
|
'node-1.local.s-dash',
|
||||||
|
'notlocalhost',
|
||||||
|
]
|
||||||
|
|
||||||
|
for invalid_name in invalid_names:
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
TypeAdapter(Host).validate_strings(invalid_name)
|
@ -6,10 +6,10 @@ from pydantic import BaseModel, Field, model_validator, HttpUrl
|
|||||||
from torrentool.torrent import Torrent
|
from torrentool.torrent import Torrent
|
||||||
from urllib3.util import parse_url
|
from urllib3.util import parse_url
|
||||||
|
|
||||||
from benchmarks.core.config import Host, ExperimentBuilder
|
from benchmarks.core.config import ExperimentBuilder
|
||||||
from benchmarks.core.experiments.experiments import IteratedExperiment, ExperimentEnvironment, Experiment, \
|
from benchmarks.core.experiments.experiments import IteratedExperiment, ExperimentEnvironment, BoundExperiment
|
||||||
BoundExperiment
|
|
||||||
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
||||||
|
from benchmarks.core.pydantic import Host
|
||||||
from benchmarks.core.utils import sample, RandomTempData
|
from benchmarks.core.utils import sample, RandomTempData
|
||||||
from benchmarks.deluge.deluge_node import DelugeMeta, DelugeNode
|
from benchmarks.deluge.deluge_node import DelugeMeta, DelugeNode
|
||||||
from benchmarks.deluge.tracker import Tracker
|
from benchmarks.deluge.tracker import Tracker
|
||||||
|
5
benchmarks/deluge/logging.py
Normal file
5
benchmarks/deluge/logging.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from benchmarks.core.tests.test_logging import MetricsEvent
|
||||||
|
|
||||||
|
|
||||||
|
class DelugeTorrentDownload(MetricsEvent):
|
||||||
|
torrent_name: str
|
@ -3,3 +3,6 @@ from pathlib import Path
|
|||||||
|
|
||||||
def shared_volume() -> Path:
|
def shared_volume() -> Path:
|
||||||
return Path(__file__).parent.parent.parent.joinpath('volume')
|
return Path(__file__).parent.parent.parent.joinpath('volume')
|
||||||
|
|
||||||
|
def compact(a_string: str) -> str:
|
||||||
|
return '\n'.join([line.strip() for line in a_string.splitlines() if line.strip()])
|
Loading…
x
Reference in New Issue
Block a user