From bea51a5adf0012925f81c4aa9cd42cf816e0063b Mon Sep 17 00:00:00 2001 From: gmega Date: Wed, 18 Dec 2024 14:34:22 -0300 Subject: [PATCH] feat: add argo workflow sketch --- .github/workflows/argo.yaml | 49 +++++++++ .github/workflows/ci.yml | 7 +- docker/bin/kubectl-wait-job | 101 ++++++++++++++++++ ...bittorrent-benchmarks-workflows.Dockerfile | 16 +++ k8s/argo-workflows/codex-workflows-rbac.yaml | 49 +++++++++ .../deluge-benchmark-workflow.yaml | 48 +++++++++ {charts => k8s/charts}/deluge/Chart.yaml | 0 .../charts}/deluge/templates/_helpers.tpl | 0 .../charts}/deluge/templates/deluge-pvc.yaml | 0 .../deluge/templates/deluge-service.yaml | 0 .../deluge/templates/deluge-statefulset.yaml | 0 .../deluge/templates/testrunner-job.yaml | 0 .../deluge/templates/tracker-deployment.yaml | 0 .../deluge/templates/tracker-service.yaml | 0 {charts => k8s/charts}/deluge/values.yaml | 0 15 files changed, 265 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/argo.yaml create mode 100644 docker/bin/kubectl-wait-job create mode 100644 docker/bittorrent-benchmarks-workflows.Dockerfile create mode 100644 k8s/argo-workflows/codex-workflows-rbac.yaml create mode 100644 k8s/argo-workflows/deluge-benchmark-workflow.yaml rename {charts => k8s/charts}/deluge/Chart.yaml (100%) rename {charts => k8s/charts}/deluge/templates/_helpers.tpl (100%) rename {charts => k8s/charts}/deluge/templates/deluge-pvc.yaml (100%) rename {charts => k8s/charts}/deluge/templates/deluge-service.yaml (100%) rename {charts => k8s/charts}/deluge/templates/deluge-statefulset.yaml (100%) rename {charts => k8s/charts}/deluge/templates/testrunner-job.yaml (100%) rename {charts => k8s/charts}/deluge/templates/tracker-deployment.yaml (100%) rename {charts => k8s/charts}/deluge/templates/tracker-service.yaml (100%) rename {charts => k8s/charts}/deluge/values.yaml (100%) diff --git a/.github/workflows/argo.yaml b/.github/workflows/argo.yaml new file mode 100644 index 0000000..4247a60 --- /dev/null +++ b/.github/workflows/argo.yaml @@ -0,0 +1,49 @@ +name: Build Argo Workflows Runner Image + +on: + push: + branches: + - master + + workflow_dispatch: + +env: + DOCKER_FILE: ./docker/bittorrent-benchmarks-workflows.Dockerfile + DOCKER_REPO: codexstorage/bittorrent-benchmarks-workflows + +jobs: + test-and-build: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Setup Docker Metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REPO }} + flavor: | + latest=true + tags: | + type=sha + + - name: Build and Push Prod. Image + uses: docker/build-push-action@v6 + with: + context: . + file: ${{ env.DOCKER_FILE }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54c71db..1544c62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: Test and Build +name: Test and Build Experiment Runner Image on: push: @@ -18,9 +18,6 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Use Docker in rootless mode. - uses: ScribeMD/rootless-docker@0.2.2 - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -74,4 +71,4 @@ jobs: file: ${{ env.DOCKER_FILE }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file + labels: ${{ steps.meta.outputs.labels }} diff --git a/docker/bin/kubectl-wait-job b/docker/bin/kubectl-wait-job new file mode 100644 index 0000000..7b9b709 --- /dev/null +++ b/docker/bin/kubectl-wait-job @@ -0,0 +1,101 @@ +#!/bin/bash +# +# This is copied from: https://github.com/brianpursley/kubectl-wait-job +# +# This code is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. +# To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/ +# +# Attribution: This code was inspired by an answer on Stack Overflow licensed under CC BY-SA 4.0. +# Original answer: https://stackoverflow.com/a/60286538/5074828 by Sebastian N (https://stackoverflow.com/users/3745474/sebastian-n) +# + +# Check if --help is specified in the arguments and display help text +for arg in "$@"; do + if [[ "$arg" == "--help" ]]; then + echo "Usage: kubectl wait-job [ARGS] [OPTIONS]" + echo "" + echo "This plugin waits for a Kubernetes job to either complete or fail." + echo "" + echo "Arguments:" + echo " [kubectl args] Any args will be passed to kubectl wait." + echo "" + echo "Options:" + echo " [kubectl options] Any options will be passed to kubectl wait." + echo "" + echo "Example:" + echo " kubectl wait-job job-name" + echo "" + exit 0 + fi +done + +# Make sure there is no --for flag +for arg in "$@"; do + if [[ "$arg" == "--for" || "$arg" == --for=* ]]; then + echo "Error: The '--for' flag cannot be used with this plugin." + exit 2 + fi +done + +# Cleanup +cleanup() { + if [[ -n $COMPLETE_STDERR ]]; then + rm -f "$COMPLETE_STDERR" 2> /dev/null + fi + if [[ -n $FAILED_STDERR ]]; then + rm -f "$FAILED_STDERR" 2> /dev/null + fi + if [[ -n $COMPLETE_PID ]]; then + kill "$COMPLETE_PID" 2> /dev/null + fi + if [[ -n $FAILED_PID ]]; then + kill "$FAILED_PID" 2> /dev/null + fi +} +trap cleanup EXIT + +# Create temporary files to store stderr output +COMPLETE_STDERR=$(mktemp -t kubectl-wait-job-stderr.XXXXXXXXXX) || { echo "error: failed to create temp file"; exit 3; } +FAILED_STDERR=$(mktemp -t kubectl-wait-job-stderr.XXXXXXXXXX) || { echo "error: failed to create temp file"; exit 3; } + +# Wait for complete and failed conditions in parallel +kubectl wait job "$@" --for=condition=complete > /dev/null 2> "$COMPLETE_STDERR" & +COMPLETE_PID=$! +kubectl wait job "$@" --for=condition=failed > /dev/null 2> "$FAILED_STDERR" & +FAILED_PID=$! + +# Wait for one of the processes to exit (using loop instead of wait -n for compatibility) +while true; do + # Check if the process waiting for the job to complete has exited + unset COMPLETE_RESULT + if ! kill -0 "$COMPLETE_PID" 2>/dev/null; then + wait $COMPLETE_PID; + COMPLETE_RESULT=$? + if [[ $COMPLETE_RESULT -eq 0 ]]; then + echo "Job completed successfully" + exit 0 + fi + fi + + # Check if the process waiting for the job to fail has exited + unset FAILED_RESULT + if ! kill -0 "$FAILED_PID" 2>/dev/null; then + wait $FAILED_PID + FAILED_RESULT=$? + if [[ $FAILED_RESULT -eq 0 ]]; then + echo "Job failed" + exit 1 + fi + fi + + # If either process failed, print the stderr output and exit + if [[ -n $COMPLETE_RESULT || -n $FAILED_RESULT ]]; then + cat "$COMPLETE_STDERR" 2> /dev/null + cat "$FAILED_STDERR" 2> /dev/null + echo "error: kubectl wait failed" + exit 3 + fi + + # Sleep for a short time before checking again + sleep 0.1 +done \ No newline at end of file diff --git a/docker/bittorrent-benchmarks-workflows.Dockerfile b/docker/bittorrent-benchmarks-workflows.Dockerfile new file mode 100644 index 0000000..54455ff --- /dev/null +++ b/docker/bittorrent-benchmarks-workflows.Dockerfile @@ -0,0 +1,16 @@ +FROM bitnami/kubectl:1.31.1 as kubectl + +FROM debian:bookworm-slim + +COPY --from=kubectl /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/kubectl + +RUN apt-get update && apt-get install -y curl + +RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 +RUN chmod 700 get_helm.sh +RUN ./get_helm.sh + +RUN mkdir /opt/bittorrent-benchmarks +WORKDIR /opt/bittorrent-benchmarks +COPY ./k8s . +COPY ./docker . diff --git a/k8s/argo-workflows/codex-workflows-rbac.yaml b/k8s/argo-workflows/codex-workflows-rbac.yaml new file mode 100644 index 0000000..d57915c --- /dev/null +++ b/k8s/argo-workflows/codex-workflows-rbac.yaml @@ -0,0 +1,49 @@ +# This sets up a service account with the required permissions for running the Codex workflows. For now, +# this needs to be manually applied to the cluster running Argo Workflows. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: codex-benchmarks-workflows + namespace: argo +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: codex-workflows-runner + namespace: argo +rules: + - apiGroups: [ "" ] + resources: [ "namespaces", "persistentvolumeclaims", "pods", "services" ] + verbs: [ "*" ] + + - apiGroups: [ "apps" ] + resources: [ "deployments", "statefulsets" ] + verbs: [ "*" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: codex-workflows-runner + namespace: argo +subjects: + - kind: ServiceAccount + name: codex-benchmarks-workflows + namespace: argo +roleRef: + kind: Role + name: codex-workflows-runner + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: codex-workflows-runner-executor + namespace: argo +subjects: + - kind: ServiceAccount + name: codex-benchmarks-workflows + namespace: argo +roleRef: + kind: Role + name: executor + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/k8s/argo-workflows/deluge-benchmark-workflow.yaml b/k8s/argo-workflows/deluge-benchmark-workflow.yaml new file mode 100644 index 0000000..2f1113f --- /dev/null +++ b/k8s/argo-workflows/deluge-benchmark-workflow.yaml @@ -0,0 +1,48 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: deluge-benchmark- +spec: + serviceAccountName: codex-benchmarks-workflows + entrypoint: deluge-benchmark-workflow + templates: + - name: deluge-benchmark-workflow + steps: + - - name: deploy-experiment + template: deploy-experiment + + - - name: wait-for-testrunner + template: wait-for-testrunner + + - - name: wait-for-completion + template: wait-for-completion + + - name: deploy-experiment + script: + image: codexstorage/bittorrent-benchmarks-workflows:latest + command: ["/bin/bash"] + source: | + helm install e1 k8s/charts/deluge --namespace codex-benchmarks + + - name: wait-for-testrunner + script: + image: codexstorage/bittorrent-benchmarks-workflows:latest + command: ["/bin/bash"] + source: | + kubectl wait --for=condition=Ready --selector=app=testrunner pod -n codex-benchmarks --timeout=300s + + - name: wait-for-completion + script: + image: codexstorage/bittorrent-benchmarks-workflows:latest + command: ["/bin/bash"] + source: | + set -e + ./ docker/bin/kubectl-wait-job --selector=app=testrunner -n codex-benchmarks + + - name: cleanup + script: + image: codexstorage/bittorrent-benchmarks-workflows:latest + command: ["/bin/bash"] + source: | + helm uninstall e1 -n codex-benchmarks + diff --git a/charts/deluge/Chart.yaml b/k8s/charts/deluge/Chart.yaml similarity index 100% rename from charts/deluge/Chart.yaml rename to k8s/charts/deluge/Chart.yaml diff --git a/charts/deluge/templates/_helpers.tpl b/k8s/charts/deluge/templates/_helpers.tpl similarity index 100% rename from charts/deluge/templates/_helpers.tpl rename to k8s/charts/deluge/templates/_helpers.tpl diff --git a/charts/deluge/templates/deluge-pvc.yaml b/k8s/charts/deluge/templates/deluge-pvc.yaml similarity index 100% rename from charts/deluge/templates/deluge-pvc.yaml rename to k8s/charts/deluge/templates/deluge-pvc.yaml diff --git a/charts/deluge/templates/deluge-service.yaml b/k8s/charts/deluge/templates/deluge-service.yaml similarity index 100% rename from charts/deluge/templates/deluge-service.yaml rename to k8s/charts/deluge/templates/deluge-service.yaml diff --git a/charts/deluge/templates/deluge-statefulset.yaml b/k8s/charts/deluge/templates/deluge-statefulset.yaml similarity index 100% rename from charts/deluge/templates/deluge-statefulset.yaml rename to k8s/charts/deluge/templates/deluge-statefulset.yaml diff --git a/charts/deluge/templates/testrunner-job.yaml b/k8s/charts/deluge/templates/testrunner-job.yaml similarity index 100% rename from charts/deluge/templates/testrunner-job.yaml rename to k8s/charts/deluge/templates/testrunner-job.yaml diff --git a/charts/deluge/templates/tracker-deployment.yaml b/k8s/charts/deluge/templates/tracker-deployment.yaml similarity index 100% rename from charts/deluge/templates/tracker-deployment.yaml rename to k8s/charts/deluge/templates/tracker-deployment.yaml diff --git a/charts/deluge/templates/tracker-service.yaml b/k8s/charts/deluge/templates/tracker-service.yaml similarity index 100% rename from charts/deluge/templates/tracker-service.yaml rename to k8s/charts/deluge/templates/tracker-service.yaml diff --git a/charts/deluge/values.yaml b/k8s/charts/deluge/values.yaml similarity index 100% rename from charts/deluge/values.yaml rename to k8s/charts/deluge/values.yaml