feat: add argo workflow sketch

2026-02-19 20:23:11 +00:00 · 2024-12-18 14:34:22 -03:00 · 2024-12-18 14:34:22 -03:00 · bea51a5adf
commit bea51a5adf
parent 1bddfc7426
15 changed files with 265 additions and 5 deletions
--- a/.github/workflows/argo.yaml
+++ b/.github/workflows/argo.yaml
@ -0,0 +1,49 @@
+name: Build Argo Workflows Runner Image
+
+on:
+  push:
+    branches:
+      - master
+
+  workflow_dispatch:
+
+env:
+  DOCKER_FILE: ./docker/bittorrent-benchmarks-workflows.Dockerfile
+  DOCKER_REPO: codexstorage/bittorrent-benchmarks-workflows
+
+jobs:
+  test-and-build:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Setup Docker Metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_REPO }}
+          flavor: |
+            latest=true
+          tags: |
+            type=sha
+
+      - name: Build and Push Prod. Image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ${{ env.DOCKER_FILE }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,4 +1,4 @@
-name: Test and Build
+name: Test and Build Experiment Runner Image

 on:
  push:
@ -18,9 +18,6 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v4

-      - name: Use Docker in rootless mode.
-        uses: ScribeMD/rootless-docker@0.2.2
-
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@ -74,4 +71,4 @@ jobs:
          file: ${{ env.DOCKER_FILE }}
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/docker/bin/kubectl-wait-job
+++ b/docker/bin/kubectl-wait-job
@ -0,0 +1,101 @@
+#!/bin/bash
+#
+# This is copied from: https://github.com/brianpursley/kubectl-wait-job
+#
+# This code is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License.
+# To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/
+#
+# Attribution: This code was inspired by an answer on Stack Overflow licensed under CC BY-SA 4.0.
+# Original answer: https://stackoverflow.com/a/60286538/5074828 by Sebastian N (https://stackoverflow.com/users/3745474/sebastian-n)
+#
+
+# Check if --help is specified in the arguments and display help text
+for arg in "$@"; do
+    if [[ "$arg" == "--help" ]]; then
+        echo "Usage: kubectl wait-job [ARGS] [OPTIONS]"
+        echo ""
+        echo "This plugin waits for a Kubernetes job to either complete or fail."
+        echo ""
+        echo "Arguments:"
+        echo "  [kubectl args]  Any args will be passed to kubectl wait."
+        echo ""
+        echo "Options:"
+        echo "  [kubectl options]  Any options will be passed to kubectl wait."
+        echo ""
+        echo "Example:"
+        echo "  kubectl wait-job job-name"
+        echo ""
+        exit 0
+    fi
+done
+
+# Make sure there is no --for flag
+for arg in "$@"; do
+    if [[ "$arg" == "--for" || "$arg" == --for=* ]]; then
+        echo "Error: The '--for' flag cannot be used with this plugin."
+        exit 2
+    fi
+done
+
+# Cleanup
+cleanup() {
+    if [[ -n $COMPLETE_STDERR ]]; then
+        rm -f "$COMPLETE_STDERR" 2> /dev/null
+    fi
+    if [[ -n $FAILED_STDERR ]]; then
+        rm -f "$FAILED_STDERR" 2> /dev/null
+    fi
+    if [[ -n $COMPLETE_PID ]]; then
+        kill "$COMPLETE_PID" 2> /dev/null
+    fi
+    if [[ -n $FAILED_PID ]]; then
+        kill "$FAILED_PID" 2> /dev/null
+    fi
+}
+trap cleanup EXIT
+
+# Create temporary files to store stderr output
+COMPLETE_STDERR=$(mktemp -t kubectl-wait-job-stderr.XXXXXXXXXX) || { echo "error: failed to create temp file"; exit 3; }
+FAILED_STDERR=$(mktemp -t kubectl-wait-job-stderr.XXXXXXXXXX) || { echo "error: failed to create temp file"; exit 3; }
+
+# Wait for complete and failed conditions in parallel
+kubectl wait job "$@" --for=condition=complete > /dev/null 2> "$COMPLETE_STDERR" &
+COMPLETE_PID=$!
+kubectl wait job "$@" --for=condition=failed > /dev/null 2> "$FAILED_STDERR" &
+FAILED_PID=$!
+
+# Wait for one of the processes to exit (using loop instead of wait -n for compatibility)
+while true; do
+    # Check if the process waiting for the job to complete has exited
+    unset COMPLETE_RESULT
+    if ! kill -0 "$COMPLETE_PID" 2>/dev/null; then
+        wait $COMPLETE_PID;
+        COMPLETE_RESULT=$?
+        if [[ $COMPLETE_RESULT -eq 0 ]]; then
+            echo "Job completed successfully"
+            exit 0
+        fi
+    fi
+
+    # Check if the process waiting for the job to fail has exited
+    unset FAILED_RESULT
+    if ! kill -0 "$FAILED_PID" 2>/dev/null; then
+        wait $FAILED_PID
+        FAILED_RESULT=$?
+        if [[ $FAILED_RESULT -eq 0 ]]; then
+            echo "Job failed"
+            exit 1
+        fi
+    fi
+
+    # If either process failed, print the stderr output and exit
+    if [[ -n $COMPLETE_RESULT || -n $FAILED_RESULT ]]; then
+        cat "$COMPLETE_STDERR" 2> /dev/null
+        cat "$FAILED_STDERR" 2> /dev/null
+        echo "error: kubectl wait failed"
+        exit 3
+    fi
+
+    # Sleep for a short time before checking again
+    sleep 0.1
+done
--- a/docker/bittorrent-benchmarks-workflows.Dockerfile
+++ b/docker/bittorrent-benchmarks-workflows.Dockerfile
@ -0,0 +1,16 @@
+FROM bitnami/kubectl:1.31.1 as kubectl
+
+FROM debian:bookworm-slim
+
+COPY --from=kubectl /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/kubectl
+
+RUN apt-get update && apt-get install -y curl
+
+RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
+RUN chmod 700 get_helm.sh
+RUN ./get_helm.sh
+
+RUN mkdir /opt/bittorrent-benchmarks
+WORKDIR /opt/bittorrent-benchmarks
+COPY ./k8s .
+COPY ./docker .
--- a/k8s/argo-workflows/codex-workflows-rbac.yaml
+++ b/k8s/argo-workflows/codex-workflows-rbac.yaml
@ -0,0 +1,49 @@
+# This sets up a service account with the required permissions for running the Codex workflows. For now,
+# this needs to be manually applied to the cluster running Argo Workflows.
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: codex-benchmarks-workflows
+  namespace: argo
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: codex-workflows-runner
+  namespace: argo
+rules:
+  - apiGroups: [ "" ]
+    resources: [ "namespaces", "persistentvolumeclaims", "pods", "services" ]
+    verbs: [ "*" ]
+
+  - apiGroups: [ "apps" ]
+    resources: [ "deployments", "statefulsets" ]
+    verbs: [ "*" ]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: codex-workflows-runner
+  namespace: argo
+subjects:
+  - kind: ServiceAccount
+    name: codex-benchmarks-workflows
+    namespace: argo
+roleRef:
+  kind: Role
+  name: codex-workflows-runner
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: codex-workflows-runner-executor
+  namespace: argo
+subjects:
+  - kind: ServiceAccount
+    name: codex-benchmarks-workflows
+    namespace: argo
+roleRef:
+  kind: Role
+  name: executor
+  apiGroup: rbac.authorization.k8s.io
--- a/k8s/argo-workflows/deluge-benchmark-workflow.yaml
+++ b/k8s/argo-workflows/deluge-benchmark-workflow.yaml
@ -0,0 +1,48 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: deluge-benchmark-
+spec:
+  serviceAccountName: codex-benchmarks-workflows
+  entrypoint: deluge-benchmark-workflow
+  templates:
+    - name: deluge-benchmark-workflow
+      steps:
+        - - name: deploy-experiment
+            template: deploy-experiment
+
+        - - name: wait-for-testrunner
+            template: wait-for-testrunner
+
+        - - name: wait-for-completion
+            template: wait-for-completion
+
+    - name: deploy-experiment
+      script:
+        image: codexstorage/bittorrent-benchmarks-workflows:latest
+        command: ["/bin/bash"]
+        source: |
+          helm install e1 k8s/charts/deluge --namespace codex-benchmarks
+
+    - name: wait-for-testrunner
+      script:
+        image: codexstorage/bittorrent-benchmarks-workflows:latest
+        command: ["/bin/bash"]
+        source: |
+          kubectl wait --for=condition=Ready --selector=app=testrunner pod -n codex-benchmarks --timeout=300s
+
+    - name: wait-for-completion
+      script:
+        image: codexstorage/bittorrent-benchmarks-workflows:latest
+        command: ["/bin/bash"]
+        source: |
+          set -e
+          ./  docker/bin/kubectl-wait-job --selector=app=testrunner -n codex-benchmarks
+
+    - name: cleanup
+      script:
+        image: codexstorage/bittorrent-benchmarks-workflows:latest
+        command: ["/bin/bash"]
+        source: |
+          helm uninstall e1 -n codex-benchmarks
+      
--- a/k8s/charts/deluge/Chart.yaml
+++ b/k8s/charts/deluge/Chart.yaml
--- a/k8s/charts/deluge/templates/_helpers.tpl
+++ b/k8s/charts/deluge/templates/_helpers.tpl
--- a/k8s/charts/deluge/templates/deluge-pvc.yaml
+++ b/k8s/charts/deluge/templates/deluge-pvc.yaml
--- a/k8s/charts/deluge/templates/deluge-service.yaml
+++ b/k8s/charts/deluge/templates/deluge-service.yaml
--- a/k8s/charts/deluge/templates/deluge-statefulset.yaml
+++ b/k8s/charts/deluge/templates/deluge-statefulset.yaml
--- a/k8s/charts/deluge/templates/testrunner-job.yaml
+++ b/k8s/charts/deluge/templates/testrunner-job.yaml
--- a/k8s/charts/deluge/templates/tracker-deployment.yaml
+++ b/k8s/charts/deluge/templates/tracker-deployment.yaml
--- a/k8s/charts/deluge/templates/tracker-service.yaml
+++ b/k8s/charts/deluge/templates/tracker-service.yaml
--- a/k8s/charts/deluge/values.yaml
+++ b/k8s/charts/deluge/values.yaml