feat: add working Codex helm chart

This commit is contained in:
gmega 2025-02-14 11:00:17 -03:00
parent 6681922e00
commit 68ee1bad87
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
37 changed files with 604 additions and 76 deletions

View File

@ -33,7 +33,6 @@ deluge-integration:
echo "NOTE: Make sure to have started the Deluge integration harness or this will not work"
poetry run pytest -m "deluge_integration"
codex-integration:
echo "NOTE: Make sure to have started the Codex integration harness or this will not work"
poetry run pytest -m "codex_integration"
@ -41,7 +40,18 @@ codex-integration:
image-test:
docker build -t bittorrent-benchmarks:test -f ./docker/bittorrent-benchmarks.Dockerfile .
image-minikube:
export CODEX_REPO_PATH
codex-image-minikube:
@if [ -z "$(CODEX_REPO_PATH)" ]; then \
echo "Error: CODEX_REPO_PATH environment variable is not set"; \
exit 1; \
fi
eval $$(minikube docker-env) && \
cd ${CODEX_REPO_PATH} && \
docker build -t nim-codex:minikube -f ./docker/codex.Dockerfile .
harness-image-minikube:
eval $$(minikube docker-env) && \
docker build -t bittorrent-benchmarks:minikube \
--build-arg BUILD_TYPE="release" \

View File

@ -11,6 +11,7 @@ from pydantic_core import ValidationError
from typing_extensions import TypeVar
from benchmarks.codex.agent.api import CodexAgentConfig
from benchmarks.codex.config import CodexExperimentConfig
from benchmarks.core.agent import AgentBuilder
from benchmarks.core.config import ConfigParser, Builder
from benchmarks.core.experiments.experiments import Experiment, ExperimentBuilder
@ -20,8 +21,8 @@ from benchmarks.deluge.logging import DelugeTorrentDownload
from benchmarks.logging.logging import (
basic_log_parser,
LogSplitter,
LogEntry,
LogSplitterFormats,
ConfigToLogAdapters,
)
from benchmarks.logging.sources.logstash import LogstashSource
from benchmarks.logging.sources.sources import (
@ -33,6 +34,7 @@ from benchmarks.logging.sources.vector_flat_file import VectorFlatFileSource
experiment_config_parser = ConfigParser[ExperimentBuilder]()
experiment_config_parser.register(DelugeExperimentConfig)
experiment_config_parser.register(CodexExperimentConfig)
agent_config_parser = ConfigParser[AgentBuilder]()
agent_config_parser.register(DelugeAgentConfig)
@ -41,8 +43,9 @@ agent_config_parser.register(CodexAgentConfig)
log_parser = basic_log_parser()
log_parser.register(DelugeTorrentDownload)
DECLogEntry = LogEntry.adapt(DelugeExperimentConfig)
log_parser.register(DECLogEntry)
config_adapters = ConfigToLogAdapters()
log_parser.register(config_adapters.adapt(DelugeExperimentConfig))
log_parser.register(config_adapters.adapt(CodexExperimentConfig))
logger = logging.getLogger(__name__)
@ -59,7 +62,7 @@ def cmd_run_experiment(experiments: Dict[str, ExperimentBuilder[Experiment]], ar
sys.exit(-1)
experiment = experiments[args.experiment]
logger.info(DECLogEntry.adapt_instance(experiment))
logger.info(config_adapters.adapt_instance(experiment))
experiment.build().run()
print(f"Experiment {args.experiment} completed successfully.")
@ -93,7 +96,8 @@ def cmd_parse_single_log(log: Path, output: Path):
log.open("r", encoding="utf-8") as istream,
LogSplitter(output_factory) as splitter,
):
splitter.set_format(DECLogEntry, LogSplitterFormats.jsonl)
for adapted_config in config_adapters.adapted_types():
splitter.set_format(adapted_config, LogSplitterFormats.jsonl)
splitter.split(log_parser.parse(istream))
@ -113,7 +117,10 @@ def cmd_split_log_source(source: LogSource, group_id: str, output_dir: Path):
log_parser,
output_manager,
group_id,
formats=[(DECLogEntry, LogSplitterFormats.jsonl)],
formats=[
(adapted_config, LogSplitterFormats.jsonl)
for adapted_config in config_adapters.adapted_types()
],
)
@ -149,6 +156,7 @@ def _parse_config(
print(f"Config file {config} does not exist.")
sys.exit(-1)
print(f"Read config file: {config}.")
with config.open(encoding="utf-8") as infile:
try:
return parser.parse(infile)
@ -336,6 +344,9 @@ def main():
args = parser.parse_args()
# So we see clearly in logs if this gets restarted.
print("### Start Codex benchmarks CLI", file=sys.stderr)
_init_logging()
args.func(args)

View File

@ -13,7 +13,7 @@ from benchmarks.codex.client.common import Manifest
from benchmarks.codex.logging import CodexDownloadMetric
from benchmarks.core.utils.random import random_data
EMPTY_STREAM_BACKOFF = 0.1
EMPTY_STREAM_BACKOFF = 2
logger = logging.getLogger(__name__)

View File

@ -1,11 +1,12 @@
from urllib3.util import Url, parse_url
import requests
import socket
from benchmarks.codex.agent.agent import DownloadStatus
from benchmarks.core.experiments.experiments import ExperimentComponent
import requests
from requests.exceptions import ConnectionError
from urllib3.util import Url, parse_url
from benchmarks.codex.agent.agent import DownloadStatus
from benchmarks.codex.client.common import Cid
from benchmarks.core.experiments.experiments import ExperimentComponent
class CodexAgentClient(ExperimentComponent):
@ -62,3 +63,6 @@ class CodexAgentClient(ExperimentComponent):
response.raise_for_status()
return response.text
def __str__(self):
return f"CodexAgentClient({self.url.url})"

View File

@ -30,7 +30,6 @@ class FakeCodexClient(AsyncCodexClient):
blockSize=1,
filename=name,
treeCid="",
uploadedAt=0,
protected=False,
)
return cid

View File

@ -12,7 +12,6 @@ class Manifest(BaseModel):
blockSize: int
filename: str
mimetype: str
uploadedAt: int
protected: bool
@staticmethod

View File

@ -6,6 +6,7 @@ from urllib.error import HTTPError
import requests
from attr import dataclass
from requests.exceptions import ConnectionError
from tenacity import (
stop_after_attempt,
wait_exponential,
@ -115,6 +116,9 @@ class CodexNode(Node[Cid, CodexMeta], ExperimentComponent):
def name(self) -> str:
return self.agent.node_id()
def __str__(self):
return f"CodexNode({self.codex_api_url.url, self.agent})"
class CodexDownloadHandle(DownloadHandle):
def __init__(self, parent: CodexNode, monitor_url: Url):

View File

@ -2,7 +2,7 @@ import random
from itertools import islice
from typing import List, cast
from pydantic import Field
from pydantic import Field, model_validator
from pydantic_core import Url
from urllib3.util import parse_url
@ -28,6 +28,33 @@ class CodexNodeConfig(SnakeCaseModel):
agent_url: Url
class CodexNodeSetConfig(SnakeCaseModel):
network_size: int = Field(gt=1)
name: str
address: str
disc_port: int
api_port: int
first_node_index: int = 1
nodes: List[CodexNodeConfig] = []
agent_url: str
@model_validator(mode="after")
def expand_nodes(self):
self.nodes = [
CodexNodeConfig(
name=self.name.format(node_index=str(i)),
address=self.address.format(node_index=str(i)),
disc_port=self.disc_port,
api_port=self.api_port,
agent_url=self.agent_url.format(node_index=str(i)),
)
for i in range(
self.first_node_index, self.first_node_index + self.network_size
)
]
return self
CodexDisseminationExperiment = IteratedExperiment[
StaticDisseminationExperiment[Cid, CodexMeta]
]
@ -52,11 +79,17 @@ class CodexExperimentConfig(ExperimentBuilder[CodexDisseminationExperiment]):
description="Time to wait after the last download completes before tearing down the experiment.",
)
nodes: List[CodexNodeConfig]
nodes: List[CodexNodeConfig] | CodexNodeSetConfig
def build(self) -> CodexDisseminationExperiment:
node_specs = (
self.nodes.nodes
if isinstance(self.nodes, CodexNodeSetConfig)
else self.nodes
)
agents = [
CodexAgentClient(parse_url(str(node.agent_url))) for node in self.nodes
CodexAgentClient(parse_url(str(node.agent_url))) for node in node_specs
]
network = [
@ -64,7 +97,7 @@ class CodexExperimentConfig(ExperimentBuilder[CodexDisseminationExperiment]):
codex_api_url=parse_url(f"http://{str(node.address)}:{node.api_port}"),
agent=agents[i],
)
for i, node in enumerate(self.nodes)
for i, node in enumerate(node_specs)
]
env = ExperimentEnvironment(

View File

@ -0,0 +1,83 @@
from io import StringIO
from typing import cast
import yaml
from urllib3.util import parse_url
from benchmarks.codex.codex_node import CodexNode
from benchmarks.codex.config import (
CodexNodeConfig,
CodexNodeSetConfig,
CodexExperimentConfig,
)
def test_should_expand_node_sets_into_simple_nodes():
nodeset = CodexNodeSetConfig(
network_size=3,
first_node_index=0,
name="codex-{node_index}",
address="codex-{node_index}.local.svc",
disc_port=6890,
api_port=6891,
agent_url="http://codex-{node_index}.local.svc:9000",
)
assert nodeset.nodes == [
CodexNodeConfig(
name="codex-0",
address="codex-0.local.svc",
disc_port=6890,
api_port=6891,
agent_url="http://codex-0.local.svc:9000",
),
CodexNodeConfig(
name="codex-1",
address="codex-1.local.svc",
disc_port=6890,
api_port=6891,
agent_url="http://codex-1.local.svc:9000",
),
CodexNodeConfig(
name="codex-2",
address="codex-2.local.svc",
disc_port=6890,
api_port=6891,
agent_url="http://codex-2.local.svc:9000",
),
]
def test_should_build_experiment_from_config():
config_file = StringIO("""
codex_experiment:
repetitions: 3
seeders: 3
seeder_sets: 3
file_size: 1024
logging_cooldown: 10
nodes:
network_size: 5
first_node_index: 0
name: "codex-nodes-{node_index}"
address: "codex-nodes-{node_index}.codex-nodes-service.codex-benchmarks.svc.cluster.local"
disc_port: 6890
api_port: 6891
agent_url: "http://codex-nodes-{node_index}.codex-nodes-service.codex-benchmarks.svc.cluster.local:9000/"
""")
config = CodexExperimentConfig.model_validate(
yaml.safe_load(config_file)["codex_experiment"]
)
experiment = config.build()
repetitions = list(experiment.experiments)
assert len(repetitions) == 9
assert len(repetitions[0].experiment.nodes) == 5
assert cast(
CodexNode, repetitions[0].experiment.nodes[4]
).codex_api_url == parse_url(
"http://codex-nodes-4.codex-nodes-service.codex-benchmarks.svc.cluster.local:6891"
)

View File

@ -83,7 +83,7 @@ class ExperimentEnvironment(ExperimentComponent):
def await_ready(self, timeout: float = 0) -> bool:
"""Awaits for all components to be ready, or until a timeout is reached."""
logging.info(
f"Awaiting for components to be ready: {self._component_names(self.not_ready)}"
f"Awaiting for components to be ready:\n {'\n'.join(self._component_names(self.not_ready))}"
)
if not await_predicate(self.is_ready, timeout, self.polling_interval):

View File

@ -108,6 +108,8 @@ class StaticDisseminationExperiment(
sleep(self.logging_cooldown)
def teardown(self, exception: Optional[Exception] = None):
logger.info("Tearing down experiment.")
def _remove(element: Tuple[int, Node[TNetworkHandle, TInitialMetadata]]):
index, node = element
# This means this node didn't even get to seed anything.

View File

@ -1,6 +1,5 @@
import random
from itertools import islice
from pathlib import Path
from typing import List
from pydantic import BaseModel, Field, model_validator, HttpUrl
@ -76,9 +75,6 @@ class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]):
)
file_size: int = Field(gt=0, description="File size, in bytes")
shared_volume_path: Path = Field(
description="Path to the volume shared between clients and experiment runner"
)
tracker_announce_url: HttpUrl = Field(
description="URL to the tracker announce endpoint"
)
@ -108,7 +104,6 @@ class DelugeExperimentConfig(ExperimentBuilder[DelugeDisseminationExperiment]):
network = [
DelugeNode(
name=node_spec.name,
volume=self.shared_volume_path,
daemon_port=node_spec.daemon_port,
daemon_address=str(node_spec.address),
agent=agents[i],

View File

@ -1,10 +1,8 @@
import base64
import logging
import shutil
import socket
from dataclasses import dataclass
from io import BytesIO
from pathlib import Path
from typing import List, Optional, Self, Dict, Any
import pathvalidate
@ -47,7 +45,6 @@ class DelugeNode(Node[Torrent, DelugeMeta], ExperimentComponent):
def __init__(
self,
name: str,
volume: Path,
daemon_port: int,
agent: DelugeAgentClient,
daemon_address: str = "localhost",
@ -58,8 +55,6 @@ class DelugeNode(Node[Torrent, DelugeMeta], ExperimentComponent):
raise ValueError(f'Node name must be a valid filename (bad name: "{name}")')
self._name = name
self.downloads_root = volume / "downloads"
self._rpc: Optional[DelugeRPCClient] = None
self.daemon_args = {
"host": daemon_address,
@ -81,16 +76,6 @@ class DelugeNode(Node[Torrent, DelugeMeta], ExperimentComponent):
if errors:
raise Exception(f"There were errors removing torrents: {errors}")
# Wipe download folder to get rid of files that got uploaded but failed
# seeding or deletes.
try:
shutil.rmtree(self.downloads_root)
except FileNotFoundError:
# If the call to remove_torrents succeeds, this might happen. Checking
# for existence won't protect you as the client might still delete the
# folder after your check, so this is the only sane way to do it.
pass
def genseed(
self,
size: int,

View File

@ -1,5 +1,4 @@
import os
from pathlib import Path
from typing import Generator
import pytest
@ -16,7 +15,6 @@ def deluge_node(
) -> Generator[DelugeNode, None, None]:
node = DelugeNode(
name,
volume=Path("/var/lib/deluge"),
daemon_address=address,
daemon_port=port,
agent=DelugeAgentClient(parse_url(agent_url)),

View File

@ -80,6 +80,31 @@ class AdaptedLogEntry(LogEntry, ABC):
pass
class ConfigToLogAdapters:
"""Utility class for managing adapted log entry types. This is mostly used to register different :class:`Experiment`
configuration classes (typically :class:`ExperimentBuilder` models) so they can be logged and later recovered."""
def __init__(self) -> None:
self.adapters: Dict[Type[SnakeCaseModel], Type[AdaptedLogEntry]] = {}
def adapt(self, model: Type[SnakeCaseModel]) -> Type[AdaptedLogEntry]:
if model in self.adapters:
return self.adapters[model]
adapted = LogEntry.adapt(model)
self.adapters[model] = adapted
return adapted
def adapt_instance(self, instance: SnakeCaseModel) -> AdaptedLogEntry:
return self.adapt(instance.__class__).adapt_instance(instance)
def adapted_types(self) -> Iterable[Type[AdaptedLogEntry]]:
return self.adapters.values()
def __getitem__(self, model: Type[SnakeCaseModel]) -> Type[AdaptedLogEntry]:
return self.adapt(model)
type Logs = Iterable[LogEntry]

View File

@ -0,0 +1,38 @@
from benchmarks.core.pydantic import SnakeCaseModel
from benchmarks.logging.logging import LogEntry, ConfigToLogAdapters
class MyConfig(SnakeCaseModel):
my_key: str
my_value: int
class MyOtherConfig(SnakeCaseModel):
my_other_key: str
my_other_value: int
def test_should_adapt_pydantic_model_to_log_entry():
adapters = ConfigToLogAdapters()
Adapted = adapters.adapt(MyConfig)
assert issubclass(Adapted, LogEntry)
instance = Adapted(my_key="key", my_value=1)
assert instance.entry_type == "my_config_log_entry"
def test_should_adapt_instance_by_type():
adapters = ConfigToLogAdapters()
Adapted1 = adapters.adapt(MyConfig)
Adapted2 = adapters.adapt(MyOtherConfig)
config1 = MyConfig(my_key="key", my_value=1)
config2 = MyOtherConfig(my_other_key="key", my_other_value=1)
adapted1 = adapters.adapt_instance(config1)
adapted2 = adapters.adapt_instance(config2)
assert isinstance(adapted1, Adapted1)
assert isinstance(adapted2, Adapted2)

View File

@ -1,12 +1,7 @@
import json
from pathlib import Path
from typing import List, Dict, Any
def shared_volume() -> Path:
return Path(__file__).parent.parent.parent.joinpath("volume")
def compact(a_string: str) -> str:
return "\n".join([line.strip() for line in a_string.splitlines() if line.strip()])

View File

@ -0,0 +1,3 @@
codex_agent:
codex_api_url: ${CODEX_API_URL}
node_id: ${NODE_ID}

View File

@ -0,0 +1,3 @@
codex_agent:
codex_api_url: ${CODEX_API_URL}
node_id: ${NODE_ID}

View File

@ -0,0 +1,16 @@
codex_experiment:
experiment_set_id: ${EXPERIMENT_SET_ID}
seeder_sets: ${SEEDER_SETS}
seeders: ${SEEDERS}
file_size: ${FILE_SIZE}
repetitions: ${REPETITIONS}
logging_cooldown: 10
nodes:
network_size: ${NETWORK_SIZE}
first_node_index: 0
name: "${CODEX_STATEFULSET}-{node_index}"
address: "${CODEX_STATEFULSET}-{node_index}.${CODEX_SERVICE}.${NAMESPACE}.svc.cluster.local"
disc_port: 6890
api_port: 6891
agent_url: "http://${CODEX_STATEFULSET}-{node_index}.${CODEX_SERVICE}.${NAMESPACE}.svc.cluster.local:9001/"

View File

@ -19,7 +19,3 @@ codex_experiment:
disc_port: 6893
api_port: 6894
agent_url: http://${CODEX_AGENT_1:-localhost}:9003/
codex_agent:
codex_api_url: ${CODEX_API_URL}
node_id: ${NODE_ID}

View File

@ -0,0 +1,2 @@
deluge_agent:
torrents_path: /var/lib/deluge/downloads

View File

@ -0,0 +1,2 @@
deluge_agent:
torrents_path: /var/lib/deluge/downloads

View File

@ -5,7 +5,6 @@ deluge_experiment:
tracker_announce_url: ${TRACKER_ANNOUNCE_URL}
file_size: ${FILE_SIZE}
repetitions: ${REPETITIONS}
shared_volume_path: ${SHARED_VOLUME_PATH}
logging_cooldown: 10
nodes:
@ -17,6 +16,3 @@ deluge_experiment:
agent_url: "http://${DELUGE_STATEFULSET}-{node_index}.${DELUGE_SERVICE}.${NAMESPACE}:9001/"
daemon_port: 6890
listen_ports: [ 6891, 6892 ]
deluge_agent:
torrents_path: /var/lib/deluge/downloads

View File

@ -25,6 +25,3 @@ deluge_experiment:
listen_ports: [ 6897, 6898 ]
agent_url: http://${DELUGE_AGENT_3:-localhost}:9003/
deluge_agent:
torrents_path: /var/lib/deluge/downloads

View File

@ -41,7 +41,7 @@ services:
image: bittorrent-benchmarks:test
container_name: codex-agent-1
entrypoint: [ "poetry", "run", "bittorrent-benchmarks",
"agent", "experiments-codex.local.yaml", "codex_agent", "--port", "9000" ]
"agent", "config/codex/agent.local.yaml", "codex_agent", "--port", "9000" ]
environment:
- CODEX_API_URL=http://codex-1:6891
- NODE_ID=codex-1
@ -69,7 +69,7 @@ services:
image: bittorrent-benchmarks:test
container_name: codex-agent-2
entrypoint: [ "poetry", "run", "bittorrent-benchmarks",
"agent", "experiments-codex.local.yaml", "codex_agent", "--port", "9001" ]
"agent", "config/codex/agent.local.yaml", "codex_agent", "--port", "9001" ]
environment:
- CODEX_API_URL=http://codex-2:6893
- NODE_ID=codex-2
@ -97,7 +97,7 @@ services:
image: bittorrent-benchmarks:test
container_name: codex-agent-3
entrypoint: [ "poetry", "run", "bittorrent-benchmarks",
"agent", "experiments-codex.local.yaml", "codex_agent", "--port", "9002" ]
"agent", "config/codex/agent.local.yaml", "codex_agent", "--port", "9002" ]
environment:
- CODEX_API_URL=http://codex-3:6895
- NODE_ID=codex-3

View File

@ -43,7 +43,7 @@ services:
image: bittorrent-benchmarks:test
container_name: agent-1
entrypoint: [ "poetry", "run", "bittorrent-benchmarks",
"agent", "experiments-deluge.local.yaml", "deluge_agent", "--port", "9001" ]
"agent", "config/deluge/agent.local.yaml", "deluge_agent", "--port", "9001" ]
environment:
- TORRENTS_ROOT=/var/lib/deluge/downloads
volumes:
@ -72,7 +72,7 @@ services:
image: bittorrent-benchmarks:test
container_name: agent-2
entrypoint: [ "poetry", "run", "bittorrent-benchmarks", "agent",
"experiments-deluge.local.yaml", "deluge_agent", "--port", "9002" ]
"config/deluge/agent.local.yaml", "deluge_agent", "--port", "9002" ]
environment:
- TORRENTS_ROOT=/var/lib/deluge/downloads
volumes:
@ -100,7 +100,7 @@ services:
deluge-agent-3:
image: bittorrent-benchmarks:test
container_name: agent-3
entrypoint: [ "poetry", "run", "bittorrent-benchmarks", "agent", "experiments-deluge.local.yaml",
entrypoint: [ "poetry", "run", "bittorrent-benchmarks", "agent", "config/deluge/agent.local.yaml",
"deluge_agent", "--port", "9003" ]
environment:
- TORRENTS_ROOT=/var/lib/deluge/downloads

View File

@ -19,5 +19,4 @@ RUN if [ "$BUILD_TYPE" = "release" ]; then \
COPY . .
RUN poetry install --only main
ENTRYPOINT ["poetry", "run", "bittorrent-benchmarks", "experiments", \
"/opt/bittorrent-benchmarks/experiments-deluge.k8s.yaml"]
ENTRYPOINT ["poetry", "run", "bittorrent-benchmarks", "experiments"]

View File

@ -0,0 +1,5 @@
apiVersion: v2
name: codex-benchmarks
description: A Helm chart for running Codex benchmarks.
version: 0.1.0
appVersion: "1.0.0"

View File

@ -0,0 +1,78 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "filesize.bytes" }}
{{- $sizeNum := regexFind "\\d+" .Values.experiment.fileSize | int -}}
{{- $sizeUnit := regexFind "\\D+" .Values.experiment.fileSize -}}
{{- $size := dict "B" 1 "KB" 1024 "MB" 1048576 "GB" 1073741824 -}}
{{- mul $sizeNum (index $size $sizeUnit) -}}
{{- end -}}
{{- define "codex.quota" }}
{{- div (mul (include "filesize.bytes" .) 13) 10 -}}
{{- end -}}
{{- define "experiment.groupId" -}}
{{- required "A valid .Values.experiment.groupId is required!" .Values.experiment.groupId }}
{{- end }}
{{- define "experiment.id" -}}
{{- default .Release.Name .Values.experiment.id }}
{{- end }}
{{- define "experiment.fullId" -}}
{{- printf "%s-%s" (include "experiment.id" .) (include "experiment.groupId" .) }}
{{- end }}
{{/* Common and selector labels. */}}
{{- define "codex-benchmarks.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- define "app.name" -}}
{{- default "codex-benchmarks" .Values.deployment.appName | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- define "codex-nodes.service" -}}
{{ printf "codex-nodes-service-%s" (include "experiment.fullId" .) }}
{{- end -}}
{{- define "codex-nodes.statefulset" -}}
{{ printf "codex-nodes-%s" (include "experiment.fullId" .) }}
{{- end -}}
{{- define "codex-benchmarks.labels" -}}
helm.sh/chart: {{ include "codex-benchmarks.chart" . }}
app.kubernetes.io/name: {{ include "app.name" . }}
{{ include "codex-benchmarks.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{- define "codex-benchmarks.selectorLabels" -}}
app.kubernetes.io/instance: {{ include "experiment.id" . }}
app.kubernetes.io/part-of: {{ include "experiment.groupId" . }}
{{- end }}
{{/* Annotations. */}}
{{- define "codex-benchmarks.pod.annotations" -}}
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
{{- end }}
{{/* Minikube env. */}}
{{- define "benchmark.harness.image" -}}
{{ .Values.deployment.minikubeEnv | ternary "bittorrent-benchmarks:minikube" "codexstorage/bittorrent-benchmarks:latest" }}
{{- end -}}
{{- define "codex.image" -}}
{{ .Values.deployment.minikubeEnv | ternary "nim-codex:minikube" "codexstorage/nim-codex:latest" }}
{{- end -}}
{{- define "benchmark.harness.imagePullPolicy" -}}
{{ .Values.deployment.minikubeEnv | ternary "Never" "Always" }}
{{- end -}}

View File

@ -0,0 +1,21 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "codex-nodes.service" . }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: codex-nodes-service
{{- include "codex-benchmarks.labels" . | nindent 4}}
spec:
clusterIP: None
selector:
app.kubernetes.io/component: codex-node
{{- include "codex-benchmarks.selectorLabels" . | nindent 4}}
ports:
- port: 6890
name: discovery
- port: 6891
name: api
- port: 6892
name: listen

View File

@ -0,0 +1,134 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "codex-nodes.statefulset" . }}
namespace: {{ .Release.Namespace }}
spec:
replicas: {{ .Values.experiment.networkSize }}
serviceName: {{ include "codex-nodes.service" . }}
podManagementPolicy: Parallel
selector:
matchLabels:
app.kubernetes.io/component: codex-node
{{- include "codex-benchmarks.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
app.kubernetes.io/component: codex-node
{{- include "codex-benchmarks.labels" . | nindent 8 }}
annotations:
{{- include "codex-benchmarks.pod.annotations" . | nindent 8 }}
spec:
{{- if not .Values.deployment.allowColocation }}
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/component
operator: In
values:
- codex-node
topologyKey: "kubernetes.io/hostname"
{{- end }}
{{- with .Values.deployment.codexNodePool }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: codex-node
image: {{ include "codex.image" . }}
imagePullPolicy: {{ include "benchmark.harness.imagePullPolicy" . }}
ports:
- containerPort: 6890
resources:
requests:
memory: 6Gi
limits:
memory: 6Gi
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: CODEX_NAT
value: "none"
- name: CODEX_BLOCK_TTL
value: {{ .Values.experiment.blockTTL | quote }}
- name: CODEX_BLOCK_MI
value: {{ .Values.experiment.blockMaintenanceInterval | quote }}
- name: CODEX_DISC_PORT
value: "6890"
- name: CODEX_API_PORT
value: "6891"
- name: CODEX_API_BINDADDR
value: "0.0.0.0"
- name: CODEX_STORAGE_QUOTA
value: {{ include "codex.quota" . | quote }}
- name: CODEX_DATA_DIR
value: "/var/lib/codex"
- name: CODEX_LOG_LEVEL
value: {{ .Values.experiment.codexLogLevel }}
- name: BOOTSTRAP_NODE
value: "{{ include "codex-nodes.statefulset" . }}-{{ .Values.experiment.bootstrapNodeIndex }}"
command: [ "/bin/bash", "--login", "-c" ]
args:
- |
echo " -- K8S config -- "
echo "Pod name is: ${POD_NAME}"
echo "Bootstrap node is: ${BOOTSTRAP_NODE}"
echo "Pod IP is: ${POD_IP}"
echo "Log level is: ${CODEX_LOG_LEVEL}"
if [ "${POD_NAME}" != "${BOOTSTRAP_NODE}" ]; then
export BOOTSTRAP_NODE_URL="http://${BOOTSTRAP_NODE}.{{include "codex-nodes.service" .}}.{{ .Release.Namespace }}.svc.cluster.local:6891"
echo "Bootstrap node URL is: ${BOOTSTRAP_NODE_URL}"
else
echo "This is the boostrap node."
fi
export CODEX_LISTEN_ADDRS="/ip4/${POD_IP}/tcp/6892"
rm -rf ${CODEX_DATA_DIR}/*
echo " -- Starting Codex node -- "
echo "Running Docker entrypoint..."
/docker-entrypoint.sh codex
volumeMounts:
- name: codex-node-storage
mountPath: /var/lib/codex
- name: codex-agent
image: {{ include "benchmark.harness.image" . }}
imagePullPolicy: {{ include "benchmark.harness.imagePullPolicy" . }}
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NODE_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
command: [ "/bin/bash", "--login", "-c" ]
args:
- |
CODEX_API_URL="http://${POD_NAME}.{{include "codex-nodes.service" .}}.{{ .Release.Namespace }}.svc.cluster.local:6891"
export CODEX_API_URL
poetry run bittorrent-benchmarks agent config/codex/agent.k8s.yaml codex_agent --port 9001
ports:
- containerPort: 9001
volumeMounts:
- name: codex-node-storage
mountPath: /var/lib/codex
volumes:
- name: codex-node-storage
emptyDir: { }

View File

@ -0,0 +1,63 @@
{{- if .Values.experiment.testRunner }}
apiVersion: batch/v1
kind: Job
metadata:
name: codex-experiment-runner-{{ include "experiment.fullId" . }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: codex-experiment-runner
{{- include "codex-benchmarks.labels" . | nindent 4 }}
spec:
template:
metadata:
labels:
app.kubernetes.io/component: codex-experiment-runner
{{- include "codex-benchmarks.labels" . | nindent 8 }}
annotations:
{{- include "codex-benchmarks.pod.annotations" . | nindent 8 }}
spec:
{{- with .Values.deployment.runnerNodePool }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: deluge-experiment-runner
image: {{ include "benchmark.harness.image" . }}
imagePullPolicy: {{ include "benchmark.harness.imagePullPolicy" . }}
args: [ "/opt/bittorrent-benchmarks/config/codex/experiments.k8s.yaml", "run", "codex_experiment" ]
env:
- name: NETWORK_SIZE
value: {{ .Values.experiment.networkSize | quote }}
- name: SEEDERS
value: {{ .Values.experiment.seeders | quote }}
- name: REPETITIONS
value: {{ .Values.experiment.repetitions | quote }}
- name: SEEDER_SETS
value: {{ .Values.experiment.seederSets | quote }}
- name: FILE_SIZE
value: {{ include "filesize.bytes" . | quote }}
- name: CODEX_STATEFULSET
value: {{ include "codex-nodes.statefulset" . | quote }}
- name: CODEX_SERVICE
value: "codex-nodes-service-{{ include "experiment.fullId" . }}"
- name: EXPERIMENT_SET_ID
value: {{ include "experiment.fullId" . | quote }}
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
resources:
requests:
cpu: "1"
memory: "2Gi"
limits:
cpu: "1"
memory: "2Gi"
restartPolicy: Never
backoffLimit: 0
{{- end }}

View File

@ -0,0 +1,38 @@
experiment:
networkSize: 5
fileSize: "100MB"
seeders: 1
seederSets: 1
repetitions: 1
blockTTL: 360000
blockMaintenanceInterval: 360000
bootstrapNodeIndex: 0
codexLogLevel: debug
# Experiments are ran as part of groups which may entail several invocations for this Helm chart.
# We therefore identify both the experiment within a group, and the group itself.
# Identifies the experiment within the group. If left empty, defaults to the release name.
id: ""
# Identifies the experiment group. Needs to be defined by the user.
groupId: ""
# If set to false, does not deploy a test runner (useful if you just want the network).
testRunner: true
deployment:
appName: ""
# If false, Codex nodes will not be allowed to run on the same node.
allowColocation: true
# Disables pulling of images and uses :minikube tag for the test runner, agents, and Codex.
minikubeEnv: false
# Labels for setting Deluge, tracker, and experiment runner node affinity towards pools.
# Example:
# codexNodePool:
# workload: benchmarks
codexNodePool: {}
runnerNodePool: {}

View File

@ -14,10 +14,6 @@ Expand the name of the chart.
{{- div (mul $totalSize 12) 10 -}}
{{- end -}}
{{- define "deluge.pvc" }}
{{- default (printf "deluge-%s-pvc" (include "experiment.fullId" .)) .Values.deployment.pvcName }}
{{- end -}}
{{- define "experiment.groupId" -}}
{{- required "A valid .Values.experiment.groupId is required!" .Values.experiment.groupId }}
{{- end }}
@ -63,8 +59,8 @@ cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
{{- define "benchmark.harness.image" -}}
{{ .Values.deployment.minikubeEnv | ternary "bittorrent-benchmarks:minikube" "codexstorage/bittorrent-benchmarks:latest" }}
{{- end }}-}}
{{- end -}}
{{- define "benchmark.harness.imagePullPolicy" -}}
{{ .Values.deployment.minikubeEnv | ternary "Never" "Always" }}
{{- end }}
{{- end -}}

View File

@ -76,7 +76,7 @@ spec:
imagePullPolicy: {{ include "benchmark.harness.imagePullPolicy" . }}
command: [
"poetry", "run", "bittorrent-benchmarks",
"agent", "experiments-deluge.k8s.yaml", "deluge_agent", "--port", "9001"
"agent", "config/deluge/agent.k8s.yaml", "deluge_agent", "--port", "9001"
]
ports:
- containerPort: 9001

View File

@ -26,7 +26,7 @@ spec:
- name: deluge-experiment-runner
image: {{ include "benchmark.harness.image" . }}
imagePullPolicy: {{ include "benchmark.harness.imagePullPolicy" . }}
args: [ "run", "deluge_experiment" ]
args: [ "/opt/bittorrent-benchmarks/config/deluge/experiments.k8s.yaml", "run", "deluge_experiment" ]
env:
- name: NETWORK_SIZE
value: {{ .Values.experiment.networkSize | quote }}
@ -40,8 +40,6 @@ spec:
value: {{ include "filesize.bytes" . | quote }}
- name: TRACKER_ANNOUNCE_URL
value: "http://bittorrent-tracker-service-{{ include "experiment.fullId" . }}.{{ .Release.Namespace }}.svc.cluster.local:8000/announce"
- name: SHARED_VOLUME_PATH
value: "/opt/bittorrent-benchmarks/volume"
- name: DELUGE_STATEFULSET
value: "deluge-nodes-{{ include "experiment.fullId" . }}"
- name: DELUGE_SERVICE