mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-01-02 13:03:13 +00:00
feat: add supporting scripts to run workflow retries
This commit is contained in:
parent
5a9543259b
commit
2061fe6dbe
52
benchmarks/k8s/collect_failed_inputs.py
Normal file
52
benchmarks/k8s/collect_failed_inputs.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""Queries the Argo Workflows API and collects inputs for failed nodes matching a given template name and group ID.
|
||||
We need this because Argo will not respect parallelism in retries, so we spin the retry as a new workflow."""
|
||||
|
||||
import sys
|
||||
from typing import Dict, Any
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
from benchmarks.k8s.parameter_expander import normalize_argo_params
|
||||
|
||||
|
||||
def collect_failed_inputs(group_id: str, template: str, workflows: Dict[str, Any]):
|
||||
def _belongs_to_group(pars):
|
||||
for parameter in pars:
|
||||
if (
|
||||
parameter.get("name") == "groupId"
|
||||
and parameter.get("value") == group_id
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
for workflow in workflows["items"]:
|
||||
for key, node in workflow["status"].get("nodes", {}).items():
|
||||
if node.get("templateName") != template:
|
||||
continue
|
||||
|
||||
if node.get("phase") != "Failed":
|
||||
continue
|
||||
|
||||
parameters = node.get("inputs", {}).get("parameters", {})
|
||||
if not parameters:
|
||||
continue
|
||||
|
||||
if not _belongs_to_group(parameters):
|
||||
continue
|
||||
|
||||
yield normalize_argo_params(parameters)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 5:
|
||||
print(
|
||||
"Usage: collect_failed_inputs.py <group_id> <template> <argo_api_host> <argo_api_port>"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
group_id, template, argo_api_host, argo_api_port = sys.argv[1:]
|
||||
workflows = requests.get(
|
||||
f"https://{argo_api_host}:{argo_api_port}/api/v1/workflows/argo", verify=False
|
||||
).json()
|
||||
print(json.dumps(list(collect_failed_inputs(group_id, template, workflows))))
|
||||
18
benchmarks/k8s/increment_retry_counter.py
Normal file
18
benchmarks/k8s/increment_retry_counter.py
Normal file
@ -0,0 +1,18 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
def increment_retry_counter(group_id: str) -> str:
|
||||
retry = re.match(r"(.+)-r(\d+)$", group_id)
|
||||
if not retry:
|
||||
return f"{group_id}-r1"
|
||||
|
||||
return f"{retry.group(1)}-r{int(retry.group(2)) + 1}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: increment_retry_counter.py <group_id>")
|
||||
sys.exit(1)
|
||||
|
||||
print(increment_retry_counter(sys.argv[1]))
|
||||
90
benchmarks/k8s/tests/test_collect_failed_inputs.py
Normal file
90
benchmarks/k8s/tests/test_collect_failed_inputs.py
Normal file
@ -0,0 +1,90 @@
|
||||
from benchmarks.k8s.collect_failed_inputs import collect_failed_inputs
|
||||
|
||||
API_RESPONSE = {
|
||||
"items": [
|
||||
{"status": {"no-nodes": []}},
|
||||
{
|
||||
"status": {
|
||||
"nodes": {
|
||||
"codex-benchmark-4pkjd-1084037939": {
|
||||
"templateName": "wrapped-benchmark-experiment",
|
||||
"phase": "Failed",
|
||||
"inputs": {
|
||||
"parameters": [
|
||||
{"name": "groupId", "value": "g1"},
|
||||
{"name": "seeders", "value": "1"},
|
||||
{"name": "repetitions", "value": "5"},
|
||||
]
|
||||
},
|
||||
},
|
||||
"codex-benchmark-4pkjd-1084037941": {
|
||||
"templateName": "wrapped-benchmark-experiment",
|
||||
"phase": "Failed",
|
||||
"inputs": {
|
||||
"parameters": [
|
||||
{"name": "groupId", "value": "g3"},
|
||||
{"name": "seeders", "value": "1"},
|
||||
{"name": "repetitions", "value": "7"},
|
||||
]
|
||||
},
|
||||
},
|
||||
"codex-benchmark-4pkjd-1084037940": {
|
||||
"templateName": "some-other-node",
|
||||
"phase": "Succeeded",
|
||||
"inputs": {
|
||||
"parameters": [
|
||||
{"name": "groupId", "value": "g1"},
|
||||
{"name": "seeders", "value": "1"},
|
||||
{"name": "repetitions", "value": "6"},
|
||||
]
|
||||
},
|
||||
},
|
||||
"codex-benchmark-4pkjd-1118304667": {
|
||||
"templateName": "cleanup",
|
||||
"phase": "Omitted",
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_should_extract_parameters_for_failed_nodes_matching_template():
|
||||
assert list(
|
||||
collect_failed_inputs(
|
||||
group_id="g1",
|
||||
template="wrapped-benchmark-experiment",
|
||||
workflows=API_RESPONSE,
|
||||
)
|
||||
) == [{"groupId": "g1", "seeders": 1, "repetitions": 5}]
|
||||
|
||||
assert list(
|
||||
collect_failed_inputs(
|
||||
group_id="g3",
|
||||
template="wrapped-benchmark-experiment",
|
||||
workflows=API_RESPONSE,
|
||||
)
|
||||
) == [{"groupId": "g3", "seeders": 1, "repetitions": 7}]
|
||||
|
||||
|
||||
def test_should_return_empty_if_no_failing_nodes():
|
||||
assert (
|
||||
list(
|
||||
collect_failed_inputs(
|
||||
group_id="g1", template="some-other-node", workflows=API_RESPONSE
|
||||
)
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
|
||||
def test_should_return_empty_if_no_matching_group_id():
|
||||
assert (
|
||||
list(
|
||||
collect_failed_inputs(
|
||||
group_id="g5", template="some-other-node", workflows=API_RESPONSE
|
||||
)
|
||||
)
|
||||
== []
|
||||
)
|
||||
12
benchmarks/k8s/tests/test_increment_retry_counter.py
Normal file
12
benchmarks/k8s/tests/test_increment_retry_counter.py
Normal file
@ -0,0 +1,12 @@
|
||||
from benchmarks.k8s.increment_retry_counter import increment_retry_counter
|
||||
|
||||
|
||||
def test_should_add_counter_if_absent():
|
||||
assert increment_retry_counter("sometestgroup") == "sometestgroup-r1"
|
||||
|
||||
|
||||
def test_should_increment_counter_if_present():
|
||||
assert increment_retry_counter("sometestgroup-r1") == "sometestgroup-r2"
|
||||
assert increment_retry_counter("sometestgroup-r2") == "sometestgroup-r3"
|
||||
assert increment_retry_counter("sometestgroup-r10") == "sometestgroup-r11"
|
||||
assert increment_retry_counter("sometestgroup-r100") == "sometestgroup-r101"
|
||||
Loading…
x
Reference in New Issue
Block a user