standardize logging; add experiment log parser
This commit is contained in:
parent
2c17527954
commit
f68a3e714a
|
@ -5,3 +5,5 @@
|
|||
.RData
|
||||
.Rhistory
|
||||
.Rproj.user
|
||||
*.Rproj
|
||||
*.nb.html
|
||||
|
|
|
@ -1,17 +1,23 @@
|
|||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from typing import Dict, Iterable
|
||||
|
||||
from mypy.memprofile import defaultdict
|
||||
from pydantic_core import ValidationError
|
||||
|
||||
from benchmarks.core.config import ConfigParser, ExperimentBuilder
|
||||
from benchmarks.core.experiments.experiments import Experiment
|
||||
from benchmarks.core.logging import basic_log_parser, LogEntry, LogSplitter
|
||||
from benchmarks.deluge.config import DelugeExperimentConfig
|
||||
from benchmarks.deluge.logging import DelugeTorrentDownload
|
||||
|
||||
config_parser = ConfigParser()
|
||||
config_parser.register(DelugeExperimentConfig)
|
||||
|
||||
log_parser = basic_log_parser()
|
||||
log_parser.register(DelugeTorrentDownload)
|
||||
|
||||
|
||||
def cmd_list(experiments: Dict[str, ExperimentBuilder[Experiment]], _):
|
||||
print(f'Available experiments are:')
|
||||
|
@ -36,6 +42,25 @@ def cmd_describe(args):
|
|||
print(config_parser.experiment_types[args.type].schema_json(indent=2))
|
||||
|
||||
|
||||
def cmd_logs(log: Path, output: Path):
|
||||
if not log.exists():
|
||||
print(f'Log file {log} does not exist.')
|
||||
sys.exit(-1)
|
||||
|
||||
if not output.parent.exists():
|
||||
print(f'Folder {output.parent} does not exist.')
|
||||
sys.exit(-1)
|
||||
|
||||
output.mkdir(exist_ok=True)
|
||||
|
||||
def output_factory(event_type: str):
|
||||
return (output / f'{event_type}.csv').open('w', encoding='utf-8')
|
||||
|
||||
with (log.open('r', encoding='utf-8') as istream,
|
||||
LogSplitter(output_factory) as splitter):
|
||||
splitter.split(log_parser.parse(istream))
|
||||
|
||||
|
||||
def _parse_config(config: Path) -> Dict[str, ExperimentBuilder[Experiment]]:
|
||||
if not config.exists():
|
||||
print(f'Config file {config} does not exist.')
|
||||
|
@ -82,6 +107,11 @@ def main():
|
|||
|
||||
describe.set_defaults(func=cmd_describe)
|
||||
|
||||
logs = commands.add_parser('logs', help='Parse logs.')
|
||||
logs.add_argument('log', type=Path, help='Path to the log file.')
|
||||
logs.add_argument('output_dir', type=Path, help='Path to an output folder.')
|
||||
logs.set_defaults(func=lambda args: cmd_logs(args.log, args.output_dir))
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
_init_logging()
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
from benchmarks.deluge.tests.fixtures import *
|
||||
from benchmarks.core.tests.fixtures import *
|
||||
|
|
|
@ -1,48 +1,17 @@
|
|||
"""Basic utilities for structuring experiment configurations based on Pydantic schemas."""
|
||||
import os
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from io import TextIOBase
|
||||
from typing import Annotated, Type, Dict, TextIO, Callable, cast
|
||||
from typing import Type, Dict, TextIO, Callable, cast
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel, IPvAnyAddress, AfterValidator, TypeAdapter
|
||||
from typing_extensions import Generic, overload
|
||||
|
||||
from benchmarks.core.experiments.experiments import TExperiment, Experiment
|
||||
from benchmarks.core.experiments.experiments import TExperiment
|
||||
from benchmarks.core.pydantic import SnakeCaseModel
|
||||
|
||||
|
||||
def drop_config_suffix(name: str) -> str:
|
||||
return name[:-6] if name.endswith('Config') else name
|
||||
|
||||
|
||||
def to_snake_case(name: str) -> str:
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
||||
|
||||
|
||||
class ConfigModel(BaseModel):
|
||||
model_config = {
|
||||
'alias_generator': lambda x: to_snake_case(drop_config_suffix(x))
|
||||
}
|
||||
|
||||
|
||||
# This is a simple regex which is not by any means exhaustive but should cover gross syntax errors.
|
||||
VALID_DOMAIN_NAME = re.compile(r"^localhost$|^(?!-)([A-Za-z0-9-]+\.)+[A-Za-z]{2,6}$")
|
||||
|
||||
|
||||
def is_valid_domain_name(domain_name: str):
|
||||
stripped = domain_name.strip()
|
||||
matches = VALID_DOMAIN_NAME.match(stripped)
|
||||
assert matches is not None
|
||||
return stripped
|
||||
|
||||
|
||||
DomainName = Annotated[str, AfterValidator(is_valid_domain_name)]
|
||||
|
||||
type Host = IPvAnyAddress | DomainName
|
||||
|
||||
|
||||
class ExperimentBuilder(ConfigModel, Generic[TExperiment]):
|
||||
class ExperimentBuilder(SnakeCaseModel, Generic[TExperiment]):
|
||||
""":class:`ExperimentBuilders` can build real :class:`Experiment`s out of :class:`ConfigModel`s. """
|
||||
|
||||
@abstractmethod
|
||||
|
@ -60,10 +29,7 @@ class ConfigParser:
|
|||
self.experiment_types = {}
|
||||
|
||||
def register(self, root: Type[ExperimentBuilder[TExperiment]]):
|
||||
name = root.__name__
|
||||
alias = cast(Callable[[str], str],
|
||||
root.model_config.get('alias_generator', lambda x: x))(name)
|
||||
self.experiment_types[alias] = root
|
||||
self.experiment_types[root.alias()] = root
|
||||
|
||||
@overload
|
||||
def parse(self, data: dict) -> Dict[str, ExperimentBuilder[TExperiment]]:
|
||||
|
|
|
@ -59,7 +59,6 @@ class ExperimentEnvironment:
|
|||
sleep(self.polling_interval)
|
||||
|
||||
if (timeout != 0) and (time() - start_time > timeout):
|
||||
print("timeout")
|
||||
logger.info(f'Some components timed out: {self._component_names(not_ready)}')
|
||||
return False
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import logging
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from typing import Sequence
|
||||
from typing import Sequence, Optional
|
||||
|
||||
from typing_extensions import Generic, List, Tuple
|
||||
|
||||
from benchmarks.core.experiments.experiments import Experiment
|
||||
from benchmarks.core.logging import RequestEvent, RequestEventType
|
||||
from benchmarks.core.network import TInitialMetadata, TNetworkHandle, Node, DownloadHandle
|
||||
from benchmarks.core.utils import ExperimentData
|
||||
|
||||
|
@ -17,11 +18,12 @@ class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata], E
|
|||
network: Sequence[Node[TNetworkHandle, TInitialMetadata]],
|
||||
seeders: List[int],
|
||||
data: ExperimentData[TInitialMetadata],
|
||||
concurrency: Optional[int] = None
|
||||
):
|
||||
self.nodes = network
|
||||
self.seeders = seeders
|
||||
self.data = data
|
||||
self._pool = ThreadPool(processes=len(network) - len(seeders))
|
||||
self._pool = ThreadPool(processes=len(network) - len(seeders) if concurrency is None else concurrency)
|
||||
|
||||
def run(self, run: int = 0):
|
||||
seeders, leechers = self._split_nodes()
|
||||
|
@ -32,13 +34,45 @@ class StaticDisseminationExperiment(Generic[TNetworkHandle, TInitialMetadata], E
|
|||
with self.data as (meta, data):
|
||||
cid = None
|
||||
for node in seeders:
|
||||
logger.info(f'Seeding data: {str(node)}')
|
||||
logger.info(RequestEvent(
|
||||
node='runner',
|
||||
destination=node.name,
|
||||
name='seed',
|
||||
request_id=str(meta),
|
||||
type=RequestEventType.start
|
||||
))
|
||||
cid = node.seed(data, meta if cid is None else cid)
|
||||
logger.info(RequestEvent(
|
||||
node='runner',
|
||||
destination=node.name,
|
||||
name='seed',
|
||||
request_id=str(meta),
|
||||
type=RequestEventType.end
|
||||
))
|
||||
|
||||
assert cid is not None # to please mypy
|
||||
|
||||
logger.info(f'Setting up leechers: {[str(leecher) for leecher in leechers]}')
|
||||
downloads = list(self._pool.imap_unordered(lambda leecher: leecher.leech(cid), leechers))
|
||||
|
||||
def _leech(leecher):
|
||||
logger.info(RequestEvent(
|
||||
node='runner',
|
||||
destination=leecher.name,
|
||||
name='leech',
|
||||
request_id=str(meta),
|
||||
type=RequestEventType.start
|
||||
))
|
||||
download = leecher.leech(cid)
|
||||
logger.info(RequestEvent(
|
||||
node='runner',
|
||||
destination=leecher.name,
|
||||
name='leech',
|
||||
request_id=str(meta),
|
||||
type=RequestEventType.end
|
||||
))
|
||||
return download
|
||||
|
||||
downloads = list(self._pool.imap_unordered(_leech, leechers))
|
||||
|
||||
logger.info('Now waiting for downloads to complete')
|
||||
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Tuple, Union
|
||||
from unittest.mock import patch
|
||||
|
||||
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
||||
from benchmarks.core.experiments.tests.utils import MockExperimentData
|
||||
from benchmarks.core.logging import LogParser, RequestEvent, RequestEventType
|
||||
from benchmarks.core.network import Node, DownloadHandle
|
||||
|
||||
|
||||
|
@ -12,17 +15,21 @@ class MockHandle:
|
|||
path: Path
|
||||
name: str
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class MockNode(Node[MockHandle, str]):
|
||||
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, name='mock_node') -> None:
|
||||
self._name = name
|
||||
self.seeding: Optional[Tuple[MockHandle, Path]] = None
|
||||
self.leeching: Optional[MockHandle] = None
|
||||
self.download_was_awaited = False
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return 'mock_node'
|
||||
return self._name
|
||||
|
||||
def seed(
|
||||
self,
|
||||
|
@ -53,7 +60,7 @@ class MockDownloadHandle(DownloadHandle):
|
|||
|
||||
|
||||
def mock_network(n: int) -> List[MockNode]:
|
||||
return [MockNode() for _ in range(n)]
|
||||
return [MockNode(f'node-{i}') for i in range(n)]
|
||||
|
||||
|
||||
def test_should_place_seeders():
|
||||
|
@ -117,3 +124,76 @@ def test_should_delete_generated_file_at_end_of_experiment():
|
|||
experiment.run()
|
||||
|
||||
assert data.cleanup_called
|
||||
|
||||
|
||||
def test_should_log_requests_to_seeders_and_leechers(mock_logger):
|
||||
logger, output = mock_logger
|
||||
with patch('benchmarks.core.experiments.static_experiment.logger', logger):
|
||||
network = mock_network(n=3)
|
||||
data = MockExperimentData(meta='dataset-1', data=Path('/path/to/data'))
|
||||
seeders = [1]
|
||||
|
||||
experiment = StaticDisseminationExperiment(
|
||||
seeders=seeders,
|
||||
network=network,
|
||||
data=data,
|
||||
concurrency=1,
|
||||
)
|
||||
|
||||
experiment.run()
|
||||
|
||||
parser = LogParser()
|
||||
parser.register(RequestEvent)
|
||||
|
||||
events = list(parser.parse(StringIO(output.getvalue())))
|
||||
|
||||
assert events == [
|
||||
RequestEvent(
|
||||
destination='node-1',
|
||||
node='runner',
|
||||
name='seed',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.start,
|
||||
timestamp=events[0].timestamp,
|
||||
),
|
||||
RequestEvent(
|
||||
destination='node-1',
|
||||
node='runner',
|
||||
name='seed',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.end,
|
||||
timestamp=events[1].timestamp,
|
||||
),
|
||||
RequestEvent(
|
||||
destination='node-0',
|
||||
node='runner',
|
||||
name='leech',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.start,
|
||||
timestamp=events[2].timestamp,
|
||||
),
|
||||
RequestEvent(
|
||||
destination='node-0',
|
||||
node='runner',
|
||||
name='leech',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.end,
|
||||
timestamp=events[3].timestamp,
|
||||
),
|
||||
RequestEvent(
|
||||
destination='node-2',
|
||||
node='runner',
|
||||
name='leech',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.start,
|
||||
timestamp=events[4].timestamp,
|
||||
),
|
||||
RequestEvent(
|
||||
destination='node-2',
|
||||
node='runner',
|
||||
name='leech',
|
||||
request_id='dataset-1',
|
||||
type=RequestEventType.end,
|
||||
timestamp=events[5].timestamp,
|
||||
)
|
||||
]
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
import datetime
|
||||
import json
|
||||
import logging
|
||||
from csv import DictWriter
|
||||
from enum import Enum
|
||||
from json import JSONDecodeError
|
||||
from typing import Type, TextIO, Iterable, Callable, Dict, Tuple
|
||||
|
||||
from pydantic import ValidationError, computed_field, Field
|
||||
|
||||
from benchmarks.core.pydantic import SnakeCaseModel
|
||||
|
||||
MARKER = '>>'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogEntry(SnakeCaseModel):
|
||||
def __str__(self):
|
||||
return f"{MARKER}{self.model_dump_json()}"
|
||||
|
||||
@computed_field # type: ignore
|
||||
@property
|
||||
def entry_type(self) -> str:
|
||||
return self.alias()
|
||||
|
||||
|
||||
class LogParser:
|
||||
def __init__(self):
|
||||
self.entry_types = {}
|
||||
self.warn_counts = 10
|
||||
|
||||
def register(self, entry_type: Type[SnakeCaseModel]):
|
||||
self.entry_types[entry_type.alias()] = entry_type
|
||||
|
||||
def parse(self, log: TextIO) -> Iterable[LogEntry]:
|
||||
marker_len = len(MARKER)
|
||||
for line in log:
|
||||
index = line.find(MARKER)
|
||||
if index == -1:
|
||||
continue
|
||||
|
||||
type_tag = '' # just to calm down mypy
|
||||
try:
|
||||
# Should probably test this against a regex for the type tag to see which is faster.
|
||||
json_line = json.loads(line[index + marker_len:])
|
||||
type_tag = json_line.get('entry_type')
|
||||
if not type_tag or (type_tag not in self.entry_types):
|
||||
continue
|
||||
yield self.entry_types[type_tag].model_validate(json_line)
|
||||
except JSONDecodeError:
|
||||
pass
|
||||
except ValidationError as err:
|
||||
# This is usually something we want to know about, as if the message has a type_tag
|
||||
# that we know, then we should probably be able to parse it.
|
||||
self.warn_counts -= 1 # avoid flooding everything with warnings
|
||||
if self.warn_counts > 0:
|
||||
logger.warning(f"Schema failed for line with known type tag {type_tag}: {err}")
|
||||
elif self.warn_counts == 0:
|
||||
logger.warning("Too many errors: suppressing further schema warnings.")
|
||||
|
||||
|
||||
class LogSplitter:
|
||||
def __init__(self, output_factory=Callable[[str], TextIO], output_entry_type=False) -> None:
|
||||
self.output_factory = output_factory
|
||||
self.dump = (
|
||||
(lambda model: model.model_dump())
|
||||
if output_entry_type
|
||||
else (lambda model: model.model_dump(exclude={'entry_type'}))
|
||||
)
|
||||
|
||||
self.outputs: Dict[str, Tuple[DictWriter, TextIO]] = {}
|
||||
|
||||
def split(self, log: Iterable[LogEntry]):
|
||||
for entry in log:
|
||||
writer, _ = self.outputs.get(entry.entry_type, (None, None))
|
||||
entry_dict = self.dump(entry)
|
||||
|
||||
if writer is None:
|
||||
output = self.output_factory(entry.entry_type)
|
||||
writer = DictWriter(output, fieldnames=entry_dict.keys())
|
||||
self.outputs[entry.entry_type] = writer, output
|
||||
writer.writeheader()
|
||||
|
||||
writer.writerow(entry_dict)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
for _, output in self.outputs.values():
|
||||
output.close()
|
||||
|
||||
|
||||
type NodeId = str
|
||||
|
||||
|
||||
class Event(LogEntry):
|
||||
node: NodeId
|
||||
name: str # XXX this ends up being redundant for custom event schemas... need to think of a better solution.
|
||||
timestamp: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.UTC))
|
||||
|
||||
|
||||
class Metric(Event):
|
||||
value: int | float
|
||||
|
||||
|
||||
class RequestEventType(Enum):
|
||||
start = 'start'
|
||||
end = 'end'
|
||||
|
||||
|
||||
class RequestEvent(Event):
|
||||
destination: NodeId
|
||||
request_id: str
|
||||
type: RequestEventType
|
||||
|
||||
|
||||
def basic_log_parser() -> LogParser:
|
||||
parser = LogParser()
|
||||
parser.register(Event)
|
||||
parser.register(Metric)
|
||||
parser.register(RequestEvent)
|
||||
return parser
|
|
@ -0,0 +1,38 @@
|
|||
import re
|
||||
from typing import Annotated
|
||||
|
||||
from pydantic import BaseModel, AfterValidator, IPvAnyAddress
|
||||
|
||||
|
||||
def drop_config_suffix(name: str) -> str:
|
||||
return name[:-6] if name.endswith('Config') else name
|
||||
|
||||
|
||||
def to_snake_case(name: str) -> str:
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
||||
|
||||
|
||||
class SnakeCaseModel(BaseModel):
|
||||
model_config = {
|
||||
'alias_generator': lambda x: to_snake_case(drop_config_suffix(x))
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def alias(cls):
|
||||
return cls.model_config['alias_generator'](cls.__name__)
|
||||
|
||||
|
||||
# This is a simple regex which is not by any means exhaustive but should cover gross syntax errors.
|
||||
VALID_DOMAIN_NAME = re.compile(r"^localhost$|^(?!-)([A-Za-z0-9-]+\.)+[A-Za-z]{2,6}$")
|
||||
|
||||
|
||||
def is_valid_domain_name(domain_name: str):
|
||||
stripped = domain_name.strip()
|
||||
matches = VALID_DOMAIN_NAME.match(stripped)
|
||||
assert matches is not None
|
||||
return stripped
|
||||
|
||||
|
||||
DomainName = Annotated[str, AfterValidator(is_valid_domain_name)]
|
||||
|
||||
type Host = IPvAnyAddress | DomainName
|
|
@ -0,0 +1,21 @@
|
|||
import logging
|
||||
from io import StringIO
|
||||
from typing import Tuple, Generator
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_logger() -> Generator[Tuple[logging.Logger, StringIO], None, None]:
|
||||
output = StringIO()
|
||||
logger = logging.getLogger('test_logger')
|
||||
logger.setLevel(logging.INFO)
|
||||
for handler in logger.handlers:
|
||||
logger.removeHandler(handler)
|
||||
handler = logging.StreamHandler(output)
|
||||
logger.addHandler(handler)
|
||||
|
||||
yield logger, output
|
||||
|
||||
logger.removeHandler(handler)
|
||||
handler.close()
|
|
@ -1,63 +1,18 @@
|
|||
import os
|
||||
from io import StringIO
|
||||
from ipaddress import IPv4Address, IPv6Address
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from pydantic import ValidationError, TypeAdapter
|
||||
|
||||
from benchmarks.core.config import Host, DomainName, ConfigParser, ConfigModel
|
||||
from benchmarks.core.config import ConfigParser
|
||||
from benchmarks.core.pydantic import SnakeCaseModel
|
||||
|
||||
|
||||
def test_should_parse_ipv4_address():
|
||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||
assert h == IPv4Address('192.168.1.1')
|
||||
|
||||
|
||||
def test_should_parse_ipv6_address():
|
||||
h = TypeAdapter(Host).validate_strings('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||
assert h == IPv6Address('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||
|
||||
|
||||
def test_should_parse_simple_dns_names():
|
||||
h = TypeAdapter(Host).validate_strings('node-1.local.svc')
|
||||
assert h == DomainName('node-1.local.svc')
|
||||
|
||||
|
||||
def test_should_parse_localhost():
|
||||
h = TypeAdapter(Host).validate_strings('localhost')
|
||||
assert h == DomainName('localhost')
|
||||
|
||||
|
||||
def test_should_return_correct_string_representation_for_addresses():
|
||||
h = TypeAdapter(Host).validate_strings('localhost')
|
||||
assert h == DomainName('localhost')
|
||||
|
||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||
assert h == IPv4Address('192.168.1.1')
|
||||
|
||||
|
||||
def test_should_fail_invalid_names():
|
||||
invalid_names = [
|
||||
'-node-1.local.svc',
|
||||
'node-1.local..svc',
|
||||
'node-1.local.svc.',
|
||||
'node-1.local.reallylongsubdomain',
|
||||
'node-1.local.s-dash',
|
||||
'notlocalhost',
|
||||
]
|
||||
|
||||
for invalid_name in invalid_names:
|
||||
with pytest.raises(ValidationError):
|
||||
TypeAdapter(Host).validate_strings(invalid_name)
|
||||
|
||||
|
||||
class Root1(ConfigModel):
|
||||
class Root1(SnakeCaseModel):
|
||||
index: int
|
||||
|
||||
|
||||
class Root2(ConfigModel):
|
||||
class Root2(SnakeCaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
import datetime
|
||||
from collections import defaultdict
|
||||
from io import StringIO
|
||||
|
||||
from benchmarks.core.logging import LogEntry, LogParser, LogSplitter
|
||||
from benchmarks.tests.utils import compact
|
||||
|
||||
|
||||
class MetricsEvent(LogEntry):
|
||||
name: str
|
||||
timestamp: datetime.datetime
|
||||
value: float
|
||||
node: str
|
||||
|
||||
|
||||
def test_log_entry_should_serialize_to_expected_format():
|
||||
event = MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.245,
|
||||
node='node1',
|
||||
)
|
||||
|
||||
assert str(
|
||||
event) == ('>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,'
|
||||
'"node":"node1","entry_type":"metrics_event"}')
|
||||
|
||||
|
||||
def test_should_parse_logs():
|
||||
log = StringIO("""
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node1","entry_type":"metrics_event"}
|
||||
[some garbage introduced by the log formatter: bla bla bla] -:>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||
""")
|
||||
|
||||
parser = LogParser()
|
||||
parser.register(MetricsEvent)
|
||||
|
||||
assert list(parser.parse(log)) == [
|
||||
MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.245,
|
||||
node='node1',
|
||||
),
|
||||
MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.246,
|
||||
node='node2',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_should_skip_unparseable_lines():
|
||||
log = StringIO("""
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node0","entry_type":"metrics_event"
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node5","entry_type":"metcs_event"}
|
||||
some random gibberish
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node3","entry_type":"metrics_event"}
|
||||
""")
|
||||
|
||||
parser = LogParser()
|
||||
parser.register(MetricsEvent)
|
||||
|
||||
assert list(parser.parse(log)) == [
|
||||
MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.246,
|
||||
node='node2',
|
||||
),
|
||||
MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.246,
|
||||
node='node3',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class StateChangeEvent(LogEntry):
|
||||
old: str
|
||||
new: str
|
||||
|
||||
|
||||
def test_should_log_events_correctly(mock_logger):
|
||||
logger, output = mock_logger
|
||||
|
||||
events = [
|
||||
StateChangeEvent(old='stopped', new='started'),
|
||||
MetricsEvent(
|
||||
name='download',
|
||||
timestamp=datetime.datetime(
|
||||
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
value=0.246,
|
||||
node='node3',
|
||||
),
|
||||
StateChangeEvent(old='started', new='stopped'),
|
||||
]
|
||||
|
||||
for event in events:
|
||||
logger.info(event)
|
||||
|
||||
parser = LogParser()
|
||||
parser.register(MetricsEvent)
|
||||
parser.register(StateChangeEvent)
|
||||
|
||||
assert list(parser.parse(StringIO(output.getvalue()))) == events
|
||||
|
||||
|
||||
class SimpleEvent(LogEntry):
|
||||
name: str
|
||||
timestamp: datetime.datetime
|
||||
|
||||
|
||||
class Person(LogEntry):
|
||||
name: str
|
||||
surname: str
|
||||
|
||||
|
||||
def test_should_split_intertwined_logs_by_entry_type():
|
||||
log = StringIO("""
|
||||
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
|
||||
>>{"name":"start","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
|
||||
>>{"name":"John","surname":"Doe","timestamp":"2021-01-01T00:00:00Z","entry_type":"person"}
|
||||
>>{"name":"start2","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
|
||||
""")
|
||||
|
||||
parser = LogParser()
|
||||
parser.register(MetricsEvent)
|
||||
parser.register(SimpleEvent)
|
||||
parser.register(Person)
|
||||
|
||||
outputs = defaultdict(StringIO)
|
||||
|
||||
splitter = LogSplitter(
|
||||
output_factory=lambda entry_type: outputs[entry_type],
|
||||
)
|
||||
|
||||
splitter.split(parser.parse(log))
|
||||
|
||||
assert compact(outputs['metrics_event'].getvalue()) == (compact("""
|
||||
name,timestamp,value,node
|
||||
download,2021-01-01 00:00:00+00:00,0.246,node2
|
||||
"""))
|
||||
|
||||
assert compact(outputs['simple_event'].getvalue()) == (compact("""
|
||||
name,timestamp
|
||||
start,2021-01-01 00:00:00+00:00
|
||||
start2,2021-01-01 00:00:00+00:00
|
||||
"""))
|
||||
|
||||
assert compact(outputs['person'].getvalue()) == (compact("""
|
||||
name,surname
|
||||
John,Doe
|
||||
"""))
|
|
@ -0,0 +1,49 @@
|
|||
from ipaddress import IPv4Address, IPv6Address
|
||||
|
||||
import pytest
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
|
||||
from benchmarks.core.pydantic import DomainName, Host
|
||||
|
||||
|
||||
def test_should_parse_ipv4_address():
|
||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||
assert h == IPv4Address('192.168.1.1')
|
||||
|
||||
|
||||
def test_should_parse_ipv6_address():
|
||||
h = TypeAdapter(Host).validate_strings('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||
assert h == IPv6Address('2001:0000:130F:0000:0000:09C0:876A:130B')
|
||||
|
||||
|
||||
def test_should_parse_simple_dns_names():
|
||||
h = TypeAdapter(Host).validate_strings('node-1.local.svc')
|
||||
assert h == DomainName('node-1.local.svc')
|
||||
|
||||
|
||||
def test_should_parse_localhost():
|
||||
h = TypeAdapter(Host).validate_strings('localhost')
|
||||
assert h == DomainName('localhost')
|
||||
|
||||
|
||||
def test_should_return_correct_string_representation_for_addresses():
|
||||
h = TypeAdapter(Host).validate_strings('localhost')
|
||||
assert h == DomainName('localhost')
|
||||
|
||||
h = TypeAdapter(Host).validate_strings('192.168.1.1')
|
||||
assert h == IPv4Address('192.168.1.1')
|
||||
|
||||
|
||||
def test_should_fail_invalid_names():
|
||||
invalid_names = [
|
||||
'-node-1.local.svc',
|
||||
'node-1.local..svc',
|
||||
'node-1.local.svc.',
|
||||
'node-1.local.reallylongsubdomain',
|
||||
'node-1.local.s-dash',
|
||||
'notlocalhost',
|
||||
]
|
||||
|
||||
for invalid_name in invalid_names:
|
||||
with pytest.raises(ValidationError):
|
||||
TypeAdapter(Host).validate_strings(invalid_name)
|
|
@ -6,10 +6,10 @@ from pydantic import BaseModel, Field, model_validator, HttpUrl
|
|||
from torrentool.torrent import Torrent
|
||||
from urllib3.util import parse_url
|
||||
|
||||
from benchmarks.core.config import Host, ExperimentBuilder
|
||||
from benchmarks.core.experiments.experiments import IteratedExperiment, ExperimentEnvironment, Experiment, \
|
||||
BoundExperiment
|
||||
from benchmarks.core.config import ExperimentBuilder
|
||||
from benchmarks.core.experiments.experiments import IteratedExperiment, ExperimentEnvironment, BoundExperiment
|
||||
from benchmarks.core.experiments.static_experiment import StaticDisseminationExperiment
|
||||
from benchmarks.core.pydantic import Host
|
||||
from benchmarks.core.utils import sample, RandomTempData
|
||||
from benchmarks.deluge.deluge_node import DelugeMeta, DelugeNode
|
||||
from benchmarks.deluge.tracker import Tracker
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
from benchmarks.core.tests.test_logging import MetricsEvent
|
||||
|
||||
|
||||
class DelugeTorrentDownload(MetricsEvent):
|
||||
torrent_name: str
|
|
@ -2,4 +2,7 @@ from pathlib import Path
|
|||
|
||||
|
||||
def shared_volume() -> Path:
|
||||
return Path(__file__).parent.parent.parent.joinpath('volume')
|
||||
return Path(__file__).parent.parent.parent.joinpath('volume')
|
||||
|
||||
def compact(a_string: str) -> str:
|
||||
return '\n'.join([line.strip() for line in a_string.splitlines() if line.strip()])
|
Loading…
Reference in New Issue