diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000..fdef8fd --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,48 @@ +This folder contains the required Kubernetes and Argo Workflow resources required to run experiments in Kubernetes +both in local (e.g. Minikube, Kind) and remote clusters. + +## Prerequisites + +### Argo Workflows + +Whatever cluster you choose must be running [Argo Workflows](https://argo-workflows.readthedocs.io/). + +**Local clusters.** For local clusters, you can follow the instructions in +the [Argo Workflows Quickstart Guide](https://argo-workflows.readthedocs.io/en/latest/quick-start/) to get Argo +Workflows running. + +For remote clusters, it's best to consult the Argo +Workflows (Operator Manual)[https://argo-workflows.readthedocs.io/en/latest/installation/]. + +**Argo CLI Tool.** You will also need to install the +[Argo CLI tool](https://argo-workflows.readthedocs.io/en/latest/walk-through/argo-cli/) to submit workflows. + +**Permissions.** Codex workflows assume that they are running in a namespace called `codex-benchmarks`. We +have a sample manifest which creates the namespace as well as the proper service account with RBAC +permissions [here](./argo-workflows/codex-workflows-rbac.yaml). For local clusters, you can apply this manifest +as it is. For remote clusters, you might need to customize it to your needs. + +### Logs + +Experiments require logs to be stored for later parsing during analysis. For local clusters, this can be achieved +by running [Vector](https://vector.dev/) and outputting pods logs to a persistent volume. The manifests for setting the +persistent volume, as well as vector, +can be found [here](./local). + +### Submitting Workflows + +Once everything is set up, workflows can be submitted with: + +```bash +argo submit -n argo ./deluge-benchmark-workflow.yaml +``` + +for local clusters, you should add: + +```bash +argo submit -n argo ./deluge-benchmark-workflow.yaml --insecure-skip-verify +``` + +To observe progress, you can use the Argo Wokflows UI which can be accessed by port-forwarding the Argo Workflows +server. + diff --git a/k8s/argo-workflows/deluge-benchmark-workflow.yaml b/k8s/argo-workflows/deluge-benchmark-workflow.yaml index 4c69b79..476aa89 100644 --- a/k8s/argo-workflows/deluge-benchmark-workflow.yaml +++ b/k8s/argo-workflows/deluge-benchmark-workflow.yaml @@ -177,7 +177,7 @@ spec: command: [ "/bin/bash" ] source: | ./docker/bin/kubectl-wait-job\ - --selector=app.kubernetes.io/name=deluge-experiment-runner,\ + --selector=app.kubernetes.io/component=deluge-experiment-runner,\ app.kubernetes.io/instance=e{{inputs.parameters.runId}},\ app.kubernetes.io/part-of=g{{inputs.parameters.groupId}}\ --timeout={{workflow.parameters.maxExperimentDuration}}\ diff --git a/k8s/charts/deluge/templates/_helpers.tpl b/k8s/charts/deluge/templates/_helpers.tpl index ee05097..f02feec 100644 --- a/k8s/charts/deluge/templates/_helpers.tpl +++ b/k8s/charts/deluge/templates/_helpers.tpl @@ -43,6 +43,7 @@ Common and selector labels. {{- define "deluge-benchmarks.labels" -}} helm.sh/chart: {{ include "deluge-benchmarks.chart" . }} +app.kubernetes.io/name: {{ include "app.name" . }} {{ include "deluge-benchmarks.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} diff --git a/k8s/charts/deluge/templates/deluge-service.yaml b/k8s/charts/deluge/templates/deluge-service.yaml index 8623d11..3d5e164 100644 --- a/k8s/charts/deluge/templates/deluge-service.yaml +++ b/k8s/charts/deluge/templates/deluge-service.yaml @@ -4,12 +4,12 @@ metadata: name: deluge-nodes-service-{{ include "experiment.fullId" . }} namespace: {{ .Release.Namespace }} labels: - app.kubernetes.io/name: deluge-nodes-service + app.kubernetes.io/component: deluge-nodes-service {{- include "deluge-benchmarks.labels" . | nindent 4}} spec: clusterIP: None selector: - app.kubernetes.io/name: deluge-node + app.kubernetes.io/component: deluge-node {{- include "deluge-benchmarks.selectorLabels" . | nindent 4}} ports: - port: 6890 diff --git a/k8s/charts/deluge/templates/deluge-statefulset.yaml b/k8s/charts/deluge/templates/deluge-statefulset.yaml index bdcba2d..255c5a5 100644 --- a/k8s/charts/deluge/templates/deluge-statefulset.yaml +++ b/k8s/charts/deluge/templates/deluge-statefulset.yaml @@ -10,13 +10,13 @@ spec: podManagementPolicy: Parallel selector: matchLabels: - app.kubernetes.io/name: deluge-node + app.kubernetes.io/component: deluge-node {{- include "deluge-benchmarks.selectorLabels" . | nindent 6 }} template: metadata: labels: - app.kubernetes.io/name: deluge-node + app.kubernetes.io/component: deluge-node {{- include "deluge-benchmarks.labels" . | nindent 8 }} spec: diff --git a/k8s/charts/deluge/templates/testrunner-job.yaml b/k8s/charts/deluge/templates/testrunner-job.yaml index c0496fb..1df8a65 100644 --- a/k8s/charts/deluge/templates/testrunner-job.yaml +++ b/k8s/charts/deluge/templates/testrunner-job.yaml @@ -4,14 +4,14 @@ metadata: name: deluge-experiment-runner-{{ include "experiment.fullId" . }} namespace: {{ .Release.Namespace }} labels: - app.kubernetes.io/name: deluge-experiment-runner + app.kubernetes.io/component: deluge-experiment-runner {{- include "deluge-benchmarks.labels" . | nindent 4 }} spec: template: metadata: labels: - app.kubernetes.io/name: deluge-experiment-runner + app.kubernetes.io/component: deluge-experiment-runner {{- include "deluge-benchmarks.labels" . | nindent 8 }} spec: diff --git a/k8s/charts/deluge/templates/tracker-deployment.yaml b/k8s/charts/deluge/templates/tracker-deployment.yaml index 79791c9..54a67db 100644 --- a/k8s/charts/deluge/templates/tracker-deployment.yaml +++ b/k8s/charts/deluge/templates/tracker-deployment.yaml @@ -4,20 +4,20 @@ metadata: name: bittorrent-tracker-{{ include "experiment.fullId" . }} namespace: {{ .Release.Namespace }} labels: - app.kubernetes.io/name: bittorrent-tracker + app.kubernetes.io/component: bittorrent-tracker {{- include "deluge-benchmarks.labels" . | nindent 4 }} spec: replicas: 1 selector: matchLabels: - app.kubernetes.io/name: bittorrent-tracker + app.kubernetes.io/component: bittorrent-tracker {{- include "deluge-benchmarks.selectorLabels" . | nindent 6 }} template: metadata: labels: - app.kubernetes.io/name: bittorrent-tracker + app.kubernetes.io/component: bittorrent-tracker {{- include "deluge-benchmarks.labels" . | nindent 8 }} spec: diff --git a/k8s/charts/deluge/templates/tracker-service.yaml b/k8s/charts/deluge/templates/tracker-service.yaml index dcfe180..c7570ec 100644 --- a/k8s/charts/deluge/templates/tracker-service.yaml +++ b/k8s/charts/deluge/templates/tracker-service.yaml @@ -4,7 +4,7 @@ metadata: name: bittorrent-tracker-service-{{ include "experiment.fullId" . }} namespace: {{ .Release.Namespace }} labels: - app.kubernetes.io/name: bittorrent-tracker-service + app.kubernetes.io/component: bittorrent-tracker-service {{- include "deluge-benchmarks.labels" . | nindent 4 }} spec: @@ -14,5 +14,5 @@ spec: targetPort: 8000 protocol: TCP selector: - app.kubernetes.io/name: bittorrent-tracker + app.kubernetes.io/component: bittorrent-tracker {{- include "deluge-benchmarks.selectorLabels" . | nindent 4 }} \ No newline at end of file diff --git a/k8s/local/README.md b/k8s/local/README.md deleted file mode 100644 index b178a78..0000000 --- a/k8s/local/README.md +++ /dev/null @@ -1,2 +0,0 @@ -Configurations which can be applied to a local k8s cluster like Minikube or Kind so that experiment logs can be collected for later parsing. This is a simple version of what one would do in a real cluster: use a log collector and ship everything onto external storage or a service like Logstash. - diff --git a/k8s/local/vector-agents-values.yaml b/k8s/vector/vector-chart-values.yaml similarity index 95% rename from k8s/local/vector-agents-values.yaml rename to k8s/vector/vector-chart-values.yaml index 333499b..863396f 100644 --- a/k8s/local/vector-agents-values.yaml +++ b/k8s/vector/vector-chart-values.yaml @@ -5,7 +5,7 @@ service: extraVolumes: - name: vector-logs persistentVolumeClaim: - claimName: codex-workflow-logs-pvc + claimName: vector-logs-pvc extraVolumeMounts: - name: vector-logs diff --git a/k8s/local/codex-workflow-logs-pv.yaml b/k8s/vector/vector-pv.yaml similarity index 61% rename from k8s/local/codex-workflow-logs-pv.yaml rename to k8s/vector/vector-pv.yaml index 09899e3..f6a4d04 100644 --- a/k8s/local/codex-workflow-logs-pv.yaml +++ b/k8s/vector/vector-pv.yaml @@ -1,22 +1,22 @@ -# We need a persistent volume for Vector to collec the logs, which will then be accessible +# We need a persistent volume for Vector to collect the logs, which will then be accessible # to the log parser. apiVersion: v1 kind: PersistentVolume metadata: - name: codex-workflow-logs-pv + name: vector-logs-pv spec: capacity: storage: 10Gi accessModes: - ReadWriteMany hostPath: - path: /mnt/codex-workflow-logs + path: /mnt/vector-logs --- apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: codex-workflow-logs-pvc + name: vector-logs-pvc spec: accessModes: - ReadWriteMany