mirror of
https://github.com/codex-storage/bittorrent-benchmarks.git
synced 2025-01-24 18:08:50 +00:00
output proper extension for jsonl data
This commit is contained in:
parent
d716af5d8b
commit
bf844a6305
@ -8,7 +8,7 @@ from pydantic_core import ValidationError
|
||||
|
||||
from benchmarks.core.config import ConfigParser, ExperimentBuilder
|
||||
from benchmarks.core.experiments.experiments import Experiment
|
||||
from benchmarks.core.logging import basic_log_parser, LogSplitter, LogEntry
|
||||
from benchmarks.core.logging import basic_log_parser, LogSplitter, LogEntry, LogSplitterFormats
|
||||
from benchmarks.deluge.config import DelugeExperimentConfig
|
||||
from benchmarks.deluge.logging import DelugeTorrentDownload
|
||||
|
||||
@ -61,11 +61,12 @@ def cmd_logs(log: Path, output: Path):
|
||||
|
||||
output.mkdir(exist_ok=True)
|
||||
|
||||
def output_factory(event_type: str):
|
||||
return (output / f'{event_type}.csv').open('w', encoding='utf-8')
|
||||
def output_factory(event_type: str, format: LogSplitterFormats):
|
||||
return (output / f'{event_type}.{format.value}').open('w', encoding='utf-8')
|
||||
|
||||
with (log.open('r', encoding='utf-8') as istream,
|
||||
LogSplitter(output_factory) as splitter):
|
||||
splitter.set_format(DECLogEntry, LogSplitterFormats.jsonl)
|
||||
splitter.split(log_parser.parse(istream))
|
||||
|
||||
|
||||
|
@ -74,6 +74,7 @@ class AdaptedLogEntry(LogEntry, ABC):
|
||||
def recover_instance(self) -> SnakeCaseModel:
|
||||
pass
|
||||
|
||||
|
||||
class LogParser:
|
||||
""":class:`LogParser` will pick up log entries from a stream and parse them into :class:`LogEntry` instances.
|
||||
It works by trying to find a special marker (>>>) in the log line, and then parsing the JSON that follows it.
|
||||
@ -123,7 +124,8 @@ class LogSplitter:
|
||||
""":class:`LogSplitter` will split parsed logs into different files based on the entry type.
|
||||
The output format can be set for each entry type."""
|
||||
|
||||
def __init__(self, output_factory=Callable[[str], TextIO], output_entry_type=False) -> None:
|
||||
def __init__(self, output_factory=Callable[[str, LogSplitterFormats], TextIO],
|
||||
output_entry_type=False) -> None:
|
||||
self.output_factory = output_factory
|
||||
self.outputs: Dict[str, Tuple[Callable[[LogEntry], None], TextIO]] = {}
|
||||
self.formats: Dict[str, LogSplitterFormats] = {}
|
||||
@ -137,8 +139,9 @@ class LogSplitter:
|
||||
write, _ = self.outputs.get(entry.entry_type, (None, None))
|
||||
|
||||
if write is None:
|
||||
output_stream = self.output_factory(entry.entry_type)
|
||||
output_format = self.formats.get(entry.entry_type, LogSplitterFormats.csv)
|
||||
output_stream = self.output_factory(entry.entry_type, output_format)
|
||||
|
||||
write = self._formatting_writer(entry, output_stream, output_format)
|
||||
self.outputs[entry.entry_type] = write, output_stream
|
||||
|
||||
|
@ -147,7 +147,7 @@ def test_should_split_intertwined_logs_by_entry_type():
|
||||
outputs = defaultdict(StringIO)
|
||||
|
||||
splitter = LogSplitter(
|
||||
output_factory=lambda entry_type: outputs[entry_type],
|
||||
output_factory=lambda entry_type, _: outputs[entry_type],
|
||||
)
|
||||
|
||||
splitter.split(parser.parse(log))
|
||||
@ -202,7 +202,7 @@ def test_should_store_split_logs_as_jsonl_for_requested_types():
|
||||
outputs = defaultdict(StringIO)
|
||||
|
||||
splitter = LogSplitter(
|
||||
output_factory=lambda entry_type: outputs[entry_type],
|
||||
output_factory=lambda entry_type, _: outputs[entry_type],
|
||||
)
|
||||
|
||||
splitter.set_format(SimpleEvent, LogSplitterFormats.jsonl)
|
||||
|
Loading…
x
Reference in New Issue
Block a user