From bf844a630552b75fefea5ffcc5f0bbe236511fd0 Mon Sep 17 00:00:00 2001
From: gmega <giuliano.mega@gmail.com>
Date: Thu, 12 Dec 2024 12:24:39 -0300
Subject: [PATCH] output proper extension for jsonl data

---
 benchmarks/cli.py                     | 7 ++++---
 benchmarks/core/logging.py            | 7 +++++--
 benchmarks/core/tests/test_logging.py | 4 ++--
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/benchmarks/cli.py b/benchmarks/cli.py
index c6611ed..51fa49a 100644
--- a/benchmarks/cli.py
+++ b/benchmarks/cli.py
@@ -8,7 +8,7 @@ from pydantic_core import ValidationError
 
 from benchmarks.core.config import ConfigParser, ExperimentBuilder
 from benchmarks.core.experiments.experiments import Experiment
-from benchmarks.core.logging import basic_log_parser, LogSplitter, LogEntry
+from benchmarks.core.logging import basic_log_parser, LogSplitter, LogEntry, LogSplitterFormats
 from benchmarks.deluge.config import DelugeExperimentConfig
 from benchmarks.deluge.logging import DelugeTorrentDownload
 
@@ -61,11 +61,12 @@ def cmd_logs(log: Path, output: Path):
 
     output.mkdir(exist_ok=True)
 
-    def output_factory(event_type: str):
-        return (output / f'{event_type}.csv').open('w', encoding='utf-8')
+    def output_factory(event_type: str, format: LogSplitterFormats):
+        return (output / f'{event_type}.{format.value}').open('w', encoding='utf-8')
 
     with (log.open('r', encoding='utf-8') as istream,
           LogSplitter(output_factory) as splitter):
+        splitter.set_format(DECLogEntry, LogSplitterFormats.jsonl)
         splitter.split(log_parser.parse(istream))
 
 
diff --git a/benchmarks/core/logging.py b/benchmarks/core/logging.py
index 296ed2a..df23af0 100644
--- a/benchmarks/core/logging.py
+++ b/benchmarks/core/logging.py
@@ -74,6 +74,7 @@ class AdaptedLogEntry(LogEntry, ABC):
     def recover_instance(self) -> SnakeCaseModel:
         pass
 
+
 class LogParser:
     """:class:`LogParser` will pick up log entries from a stream and parse them into :class:`LogEntry` instances.
     It works by trying to find a special marker (>>>) in the log line, and then parsing the JSON that follows it.
@@ -123,7 +124,8 @@ class LogSplitter:
     """:class:`LogSplitter` will split parsed logs into different files based on the entry type.
     The output format can be set for each entry type."""
 
-    def __init__(self, output_factory=Callable[[str], TextIO], output_entry_type=False) -> None:
+    def __init__(self, output_factory=Callable[[str, LogSplitterFormats], TextIO],
+                 output_entry_type=False) -> None:
         self.output_factory = output_factory
         self.outputs: Dict[str, Tuple[Callable[[LogEntry], None], TextIO]] = {}
         self.formats: Dict[str, LogSplitterFormats] = {}
@@ -137,8 +139,9 @@ class LogSplitter:
             write, _ = self.outputs.get(entry.entry_type, (None, None))
 
             if write is None:
-                output_stream = self.output_factory(entry.entry_type)
                 output_format = self.formats.get(entry.entry_type, LogSplitterFormats.csv)
+                output_stream = self.output_factory(entry.entry_type, output_format)
+
                 write = self._formatting_writer(entry, output_stream, output_format)
                 self.outputs[entry.entry_type] = write, output_stream
 
diff --git a/benchmarks/core/tests/test_logging.py b/benchmarks/core/tests/test_logging.py
index 50441a3..c33bbd5 100644
--- a/benchmarks/core/tests/test_logging.py
+++ b/benchmarks/core/tests/test_logging.py
@@ -147,7 +147,7 @@ def test_should_split_intertwined_logs_by_entry_type():
     outputs = defaultdict(StringIO)
 
     splitter = LogSplitter(
-        output_factory=lambda entry_type: outputs[entry_type],
+        output_factory=lambda entry_type, _: outputs[entry_type],
     )
 
     splitter.split(parser.parse(log))
@@ -202,7 +202,7 @@ def test_should_store_split_logs_as_jsonl_for_requested_types():
     outputs = defaultdict(StringIO)
 
     splitter = LogSplitter(
-        output_factory=lambda entry_type: outputs[entry_type],
+        output_factory=lambda entry_type, _: outputs[entry_type],
     )
 
     splitter.set_format(SimpleEvent, LogSplitterFormats.jsonl)