239 lines
6.7 KiB
Python

import datetime
from collections import defaultdict
from io import StringIO
from benchmarks.logging.logging import (
LogEntry,
LogParser,
LogSplitter,
LogSplitterFormats,
)
from benchmarks.core.pydantic import SnakeCaseModel
from benchmarks.tests.utils import compact
class MetricsEvent(LogEntry):
name: str
timestamp: datetime.datetime
value: float
node: str
def test_log_entry_should_serialize_to_expected_format():
event = MetricsEvent(
name="download",
timestamp=datetime.datetime(2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
value=0.245,
node="node1",
)
assert str(event) == (
'>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,'
'"node":"node1","entry_type":"metrics_event"}'
)
def test_should_parse_logs():
log = StringIO("""
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node1","entry_type":"metrics_event"}
[some garbage introduced by the log formatter: bla bla bla] -:>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
""")
parser = LogParser()
parser.register(MetricsEvent)
assert list(parser.parse(log)) == [
MetricsEvent(
name="download",
timestamp=datetime.datetime(
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
),
value=0.245,
node="node1",
),
MetricsEvent(
name="download",
timestamp=datetime.datetime(
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
),
value=0.246,
node="node2",
),
]
def test_should_skip_unparseable_lines():
log = StringIO("""
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.245,"node":"node0","entry_type":"metrics_event"
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node5","entry_type":"metcs_event"}
some random gibberish
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node3","entry_type":"metrics_event"}
""")
parser = LogParser()
parser.register(MetricsEvent)
assert list(parser.parse(log)) == [
MetricsEvent(
name="download",
timestamp=datetime.datetime(
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
),
value=0.246,
node="node2",
),
MetricsEvent(
name="download",
timestamp=datetime.datetime(
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
),
value=0.246,
node="node3",
),
]
class StateChangeEvent(LogEntry):
old: str
new: str
def test_should_recover_logged_events_at_parsing(mock_logger):
logger, output = mock_logger
events = [
StateChangeEvent(old="stopped", new="started"),
MetricsEvent(
name="download",
timestamp=datetime.datetime(
2021, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc
),
value=0.246,
node="node3",
),
StateChangeEvent(old="started", new="stopped"),
]
for event in events:
logger.info(event)
parser = LogParser()
parser.register(MetricsEvent)
parser.register(StateChangeEvent)
assert list(parser.parse(StringIO(output.getvalue()))) == events
class SimpleEvent(LogEntry):
name: str
timestamp: datetime.datetime
class Person(LogEntry):
name: str
surname: str
def test_should_split_intertwined_logs_by_entry_type():
log = StringIO("""
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
>>{"name":"start","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
>>{"name":"John","surname":"Doe","timestamp":"2021-01-01T00:00:00Z","entry_type":"person"}
>>{"name":"start2","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
""")
parser = LogParser()
parser.register(MetricsEvent)
parser.register(SimpleEvent)
parser.register(Person)
outputs = defaultdict(StringIO)
splitter = LogSplitter(
output_factory=lambda entry_type, _: outputs[entry_type],
)
splitter.split(parser.parse(log))
assert compact(outputs["metrics_event"].getvalue()) == (
compact("""
name,timestamp,value,node
download,2021-01-01 00:00:00+00:00,0.246,node2
""")
)
assert compact(outputs["simple_event"].getvalue()) == (
compact("""
name,timestamp
start,2021-01-01 00:00:00+00:00
start2,2021-01-01 00:00:00+00:00
""")
)
assert compact(outputs["person"].getvalue()) == (
compact("""
name,surname
John,Doe
""")
)
class SomeConfig(SnakeCaseModel):
some_field: str
def test_should_adapt_existing_model_types_to_logging_types():
SomeConfigLogEntry = LogEntry.adapt(SomeConfig)
assert (
str(SomeConfigLogEntry(some_field="value"))
== '>>{"some_field":"value","entry_type":"some_config_log_entry"}'
)
def test_should_adapt_existing_model_instances_to_logging_instances():
SomeConfigLogEntry = LogEntry.adapt(SomeConfig)
instance = SomeConfig(some_field="value")
assert (
str(SomeConfigLogEntry.adapt_instance(instance))
== '>>{"some_field":"value","entry_type":"some_config_log_entry"}'
)
def test_should_store_split_logs_as_jsonl_for_requested_types():
log = StringIO("""
>>{"name":"download","timestamp":"2021-01-01T00:00:00Z","value":0.246,"node":"node2","entry_type":"metrics_event"}
>>{"name":"start","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
>>{"name":"start2","timestamp":"2021-01-01T00:00:00Z","entry_type":"simple_event"}
""")
parser = LogParser()
parser.register(MetricsEvent)
parser.register(SimpleEvent)
outputs = defaultdict(StringIO)
splitter = LogSplitter(
output_factory=lambda entry_type, _: outputs[entry_type],
)
splitter.set_format(SimpleEvent, LogSplitterFormats.jsonl)
splitter.split(parser.parse(log))
assert compact(outputs["metrics_event"].getvalue()) == (
compact("""
name,timestamp,value,node
download,2021-01-01 00:00:00+00:00,0.246,node2
""")
)
assert compact(outputs["simple_event"].getvalue()) == (
compact("""
{"name":"start","timestamp":"2021-01-01T00:00:00Z"}
{"name":"start2","timestamp":"2021-01-01T00:00:00Z"}
""")
)