From 0530b4a10bc4d9a2bc093b233f3b6710548a48bc Mon Sep 17 00:00:00 2001 From: gmega Date: Fri, 14 Feb 2025 18:19:45 -0300 Subject: [PATCH] feat: standardize download metric --- benchmarks/cli.py | 2 - benchmarks/codex/agent/agent.py | 7 +- .../codex/agent/tests/test_codex_agent.py | 81 +++++++++++++------ benchmarks/codex/logging.py | 6 -- benchmarks/deluge/logging.py | 5 -- benchmarks/logging/logging.py | 6 ++ .../benchmarks-2025.01.21-documents.json | 6 +- .../sources/tests/test_logstash_source.py | 19 +++-- 8 files changed, 79 insertions(+), 53 deletions(-) delete mode 100644 benchmarks/codex/logging.py delete mode 100644 benchmarks/deluge/logging.py diff --git a/benchmarks/cli.py b/benchmarks/cli.py index 008de77..2da7d51 100644 --- a/benchmarks/cli.py +++ b/benchmarks/cli.py @@ -17,7 +17,6 @@ from benchmarks.core.config import ConfigParser, Builder from benchmarks.core.experiments.experiments import Experiment, ExperimentBuilder from benchmarks.deluge.agent.api import DelugeAgentConfig from benchmarks.deluge.config import DelugeExperimentConfig -from benchmarks.deluge.logging import DelugeTorrentDownload from benchmarks.logging.logging import ( basic_log_parser, LogSplitter, @@ -41,7 +40,6 @@ agent_config_parser.register(DelugeAgentConfig) agent_config_parser.register(CodexAgentConfig) log_parser = basic_log_parser() -log_parser.register(DelugeTorrentDownload) config_adapters = ConfigToLogAdapters() log_parser.register(config_adapters.adapt(DelugeExperimentConfig)) diff --git a/benchmarks/codex/agent/agent.py b/benchmarks/codex/agent/agent.py index 6141a99..2de32eb 100644 --- a/benchmarks/codex/agent/agent.py +++ b/benchmarks/codex/agent/agent.py @@ -10,8 +10,8 @@ from pydantic import BaseModel from benchmarks.codex.client.async_client import AsyncCodexClient from benchmarks.codex.client.common import Cid from benchmarks.codex.client.common import Manifest -from benchmarks.codex.logging import CodexDownloadMetric from benchmarks.core.utils.random import random_data +from benchmarks.logging.logging import DownloadMetric EMPTY_STREAM_BACKOFF = 2 @@ -60,8 +60,9 @@ class DownloadHandle: if int(self.bytes_downloaded / step_size) > logged_step: logged_step += 1 logger.info( - CodexDownloadMetric( - cid=self.manifest.cid, + DownloadMetric( + dataset_name=self.manifest.filename, + handle=self.manifest.cid, value=step_size * logged_step, node=self.parent.node_id, ) diff --git a/benchmarks/codex/agent/tests/test_codex_agent.py b/benchmarks/codex/agent/tests/test_codex_agent.py index fb94007..a3bba87 100644 --- a/benchmarks/codex/agent/tests/test_codex_agent.py +++ b/benchmarks/codex/agent/tests/test_codex_agent.py @@ -9,10 +9,9 @@ import pytest from benchmarks.codex.agent.agent import CodexAgent, DownloadStatus from benchmarks.codex.client.async_client import AsyncCodexClient from benchmarks.codex.client.common import Manifest, Cid -from benchmarks.codex.logging import CodexDownloadMetric from benchmarks.core.concurrency import await_predicate_async from benchmarks.core.utils.streams import BaseStreamReader -from benchmarks.logging.logging import LogParser +from benchmarks.logging.logging import LogParser, DownloadMetric class FakeCodexClient(AsyncCodexClient): @@ -128,25 +127,42 @@ async def test_should_log_download_progress_as_metric_in_discrete_steps(mock_log await handle.download_task parser = LogParser() - parser.register(CodexDownloadMetric) + parser.register(DownloadMetric) metrics = list(parser.parse(StringIO(output.getvalue()))) assert metrics == [ - CodexDownloadMetric( - cid=cid, value=200, node=codex_agent.node_id, timestamp=metrics[0].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=200, + node=codex_agent.node_id, + timestamp=metrics[0].timestamp, ), - CodexDownloadMetric( - cid=cid, value=400, node=codex_agent.node_id, timestamp=metrics[1].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=400, + node=codex_agent.node_id, + timestamp=metrics[1].timestamp, ), - CodexDownloadMetric( - cid=cid, value=600, node=codex_agent.node_id, timestamp=metrics[2].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=600, + node=codex_agent.node_id, + timestamp=metrics[2].timestamp, ), - CodexDownloadMetric( - cid=cid, value=800, node=codex_agent.node_id, timestamp=metrics[3].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=800, + node=codex_agent.node_id, + timestamp=metrics[3].timestamp, ), - CodexDownloadMetric( - cid=cid, + DownloadMetric( + dataset_name="dataset-1", + handle=cid, value=1000, node=codex_agent.node_id, timestamp=metrics[4].timestamp, @@ -183,25 +199,42 @@ async def test_should_log_download_progress_as_discrete_steps_even_when_underlyi await handle.download_task parser = LogParser() - parser.register(CodexDownloadMetric) + parser.register(DownloadMetric) metrics = list(parser.parse(StringIO(output.getvalue()))) assert metrics == [ - CodexDownloadMetric( - cid=cid, value=200, node=codex_agent.node_id, timestamp=metrics[0].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=200, + node=codex_agent.node_id, + timestamp=metrics[0].timestamp, ), - CodexDownloadMetric( - cid=cid, value=400, node=codex_agent.node_id, timestamp=metrics[1].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=400, + node=codex_agent.node_id, + timestamp=metrics[1].timestamp, ), - CodexDownloadMetric( - cid=cid, value=600, node=codex_agent.node_id, timestamp=metrics[2].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=600, + node=codex_agent.node_id, + timestamp=metrics[2].timestamp, ), - CodexDownloadMetric( - cid=cid, value=800, node=codex_agent.node_id, timestamp=metrics[3].timestamp + DownloadMetric( + dataset_name="dataset-1", + handle=cid, + value=800, + node=codex_agent.node_id, + timestamp=metrics[3].timestamp, ), - CodexDownloadMetric( - cid=cid, + DownloadMetric( + dataset_name="dataset-1", + handle=cid, value=1000, node=codex_agent.node_id, timestamp=metrics[4].timestamp, diff --git a/benchmarks/codex/logging.py b/benchmarks/codex/logging.py deleted file mode 100644 index 0e99515..0000000 --- a/benchmarks/codex/logging.py +++ /dev/null @@ -1,6 +0,0 @@ -from benchmarks.logging.logging import Metric - - -class CodexDownloadMetric(Metric): - name: str = "codex_download" - cid: str diff --git a/benchmarks/deluge/logging.py b/benchmarks/deluge/logging.py deleted file mode 100644 index 0c226c8..0000000 --- a/benchmarks/deluge/logging.py +++ /dev/null @@ -1,5 +0,0 @@ -from benchmarks.logging.logging import Metric - - -class DelugeTorrentDownload(Metric): - torrent_name: str diff --git a/benchmarks/logging/logging.py b/benchmarks/logging/logging.py index 5ff9900..abf8a4b 100644 --- a/benchmarks/logging/logging.py +++ b/benchmarks/logging/logging.py @@ -242,6 +242,11 @@ class Metric(NodeEvent): value: int | float +class DownloadMetric(Metric): + name: str = "download" + dataset_name: str + + class RequestEventType(Enum): start = "start" end = "end" @@ -265,6 +270,7 @@ def basic_log_parser() -> LogParser: parser.register(Event) parser.register(NodeEvent) parser.register(Metric) + parser.register(DownloadMetric) parser.register(RequestEvent) parser.register(ExperimentStatus) return parser diff --git a/benchmarks/logging/sources/tests/benchmarks-2025.01.21-documents.json b/benchmarks/logging/sources/tests/benchmarks-2025.01.21-documents.json index f3deeb7..5e4b172 100644 --- a/benchmarks/logging/sources/tests/benchmarks-2025.01.21-documents.json +++ b/benchmarks/logging/sources/tests/benchmarks-2025.01.21-documents.json @@ -3,7 +3,7 @@ { "@timestamp": "2025-01-21T12:47:57.098459487Z", "file": "/var/log/pods/codex-benchmarks_deluge-nodes-e3-g3-1_70c962b4-8fa3-43be-a664-aefb793c7b8c/deluge-node/0.log", - "message": "12:47:57.098 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"deluge_torrent_download\", \"timestamp\": \"2025-01-21T12:47:57.098167+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 310, \"node\": \"deluge-nodes-e3-g3-1\", \"torrent_name\": \"dataset-0-1\"}", + "message": "12:47:57.098 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"download_metric\", \"timestamp\": \"2025-01-21T12:47:57.098167+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 310, \"node\": \"deluge-nodes-e3-g3-1\", \"dataset_name\": \"dataset-0-1\"}", "pod_labels": { "app.kubernetes.io/component": "deluge-node", "app.kubernetes.io/instance": "e3", @@ -41,7 +41,7 @@ { "@timestamp": "2025-01-21T12:47:15.847325207Z", "file": "/var/log/pods/codex-benchmarks_deluge-nodes-e3-g3-0_c42bf4d9-4f1d-40b2-9654-5de197153ac0/deluge-node/0.log", - "message": "12:47:15.847 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"deluge_torrent_download\", \"timestamp\": \"2025-01-21T12:47:15.846761+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 23, \"node\": \"deluge-nodes-e3-g3-0\", \"torrent_name\": \"dataset-0-0\"}", + "message": "12:47:15.847 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"download_metric\", \"timestamp\": \"2025-01-21T12:47:15.846761+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 23, \"node\": \"deluge-nodes-e3-g3-0\", \"dataset_name\": \"dataset-0-0\"}", "pod_labels": { "app.kubernetes.io/component": "deluge-node", "app.kubernetes.io/instance": "e3", @@ -60,7 +60,7 @@ { "@timestamp": "2025-01-21T12:47:57.123446028Z", "file": "/var/log/pods/codex-benchmarks_deluge-nodes-e3-g3-1_70c962b4-8fa3-43be-a664-aefb793c7b8c/deluge-node/0.log", - "message": "12:47:57.123 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"deluge_torrent_download\", \"timestamp\": \"2025-01-21T12:47:57.123105+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 218, \"node\": \"deluge-nodes-e2-g2-1\", \"torrent_name\": \"dataset-0-1\"}", + "message": "12:47:57.123 [INFO ][deluge.core.metrics :49 ] >>{\"entry_type\": \"download_metric\", \"timestamp\": \"2025-01-21T12:47:57.123105+00:00\", \"name\": \"deluge_piece_downloaded\", \"value\": 218, \"node\": \"deluge-nodes-e2-g2-1\", \"dataset_name\": \"dataset-0-1\"}", "pod_labels": { "app.kubernetes.io/component": "deluge-node", "app.kubernetes.io/instance": "e2", diff --git a/benchmarks/logging/sources/tests/test_logstash_source.py b/benchmarks/logging/sources/tests/test_logstash_source.py index 22efc40..fcae728 100644 --- a/benchmarks/logging/sources/tests/test_logstash_source.py +++ b/benchmarks/logging/sources/tests/test_logstash_source.py @@ -1,8 +1,7 @@ import pytest from elasticsearch import Elasticsearch -from benchmarks.deluge.logging import DelugeTorrentDownload -from benchmarks.logging.logging import LogParser +from benchmarks.logging.logging import LogParser, DownloadMetric from datetime import datetime, timezone, date from benchmarks.logging.sources.logstash import LogstashSource @@ -65,36 +64,36 @@ def test_should_retrieve_logs_for_single_experiment(benchmark_logs_client): ) parser = LogParser() - parser.register(DelugeTorrentDownload) + parser.register(DownloadMetric) entries = parser.parse(_log_lines(source, "e3", "g3")) assert list(entries) == [ - DelugeTorrentDownload( + DownloadMetric( name="deluge_piece_downloaded", timestamp=datetime(2025, 1, 21, 12, 47, 15, 846761, tzinfo=timezone.utc), value=23, node="deluge-nodes-e3-g3-0", - torrent_name="dataset-0-0", + dataset_name="dataset-0-0", ), - DelugeTorrentDownload( + DownloadMetric( name="deluge_piece_downloaded", timestamp=datetime(2025, 1, 21, 12, 47, 57, 98167, tzinfo=timezone.utc), value=310, node="deluge-nodes-e3-g3-1", - torrent_name="dataset-0-1", + dataset_name="dataset-0-1", ), ] entries = parser.parse(_log_lines(source, "e2", "g3")) assert list(entries) == [ - DelugeTorrentDownload( + DownloadMetric( name="deluge_piece_downloaded", timestamp=datetime(2025, 1, 21, 12, 47, 57, 123105, tzinfo=timezone.utc), value=218, node="deluge-nodes-e2-g2-1", - torrent_name="dataset-0-1", + dataset_name="dataset-0-1", ), ] @@ -106,7 +105,7 @@ def test_should_return_empty_data_for_non_existing_experiments(benchmark_logs_cl ) parser = LogParser() - parser.register(DelugeTorrentDownload) + parser.register(DownloadMetric) lines = source.logs(experiment_id="e0", group_id="g0")