bittorrent-benchmarks/k8s/argo-workflows/deluge-benchmark-workflow.yaml

293 lines
10 KiB
YAML

apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: deluge-benchmark-
spec:
serviceAccountName: codex-benchmarks-workflows
entrypoint: benchmark-workflow
arguments:
parameters:
- name: repetitions
value: 5
- name: seederSets
value: 2
- name: fileSize
value: '["100MB", "1GB"]'
- name: constrained__networkSize_seeders
value: "[[2, 1], [8, [1, 2, 4]], [16, [1, 2, 4, 8]], [32, [1, 2, 4, 8, 16]]]"
- name: minikubeEnv
value: "false"
- name: maxExperimentDuration
value: 144h
# Groups the expansion such that all experiments with a given networkSize run together, smallest
# to largest. This can save significant amounts of time when running on a cluster with autoscaling.
- name: orderBy
value: '["networkSize", "seeders", "fileSize"]'
templates:
- name: benchmark-workflow
parallelism: 1
steps:
- - name: define-image-settings
template: define-image-settings
- - name: generate-group-id
template: generate-group-id
arguments:
parameters:
- name: runnerImage
value: "{{steps.define-image-settings.outputs.parameters.image}}"
- name: imagePullPolicy
value: "{{steps.define-image-settings.outputs.parameters.imagePullPolicy}}"
- - name: expand-parameter-matrix
template: expand-parameter-matrix
arguments:
parameters:
- name: runnerImage
value: "{{steps.define-image-settings.outputs.parameters.image}}"
- name: imagePullPolicy
value: "{{steps.define-image-settings.outputs.parameters.imagePullPolicy}}"
- - name: benchmark-experiment
template: wrapped-benchmark-experiment
arguments:
parameters:
- name: groupId
value: "{{steps.generate-group-id.outputs.result}}"
- name: runId
value: "{{item.runId}}"
- name: fileSize
value: "{{item.fileSize}}"
- name: seederSets
value: "{{item.seederSets}}"
- name: networkSize
value: "{{item.networkSize}}"
- name: seeders
value: "{{item.seeders}}"
- name: repetitions
value: "{{item.repetitions}}"
- name: runnerImage
value: "{{steps.define-image-settings.outputs.parameters.image}}"
- name: imagePullPolicy
value: "{{steps.define-image-settings.outputs.parameters.imagePullPolicy}}"
withParam: "{{steps.expand-parameter-matrix.outputs.result}}"
- name: define-image-settings
# I think this goes to show just how clumsy Argo Workflows is. If I want to select
# an image name and a pull policy from a workflow parameter, I need this. Sad.
script:
image: busybox:latest
command: ["sh", "-c"]
source: |
if [ "{{workflow.parameters.minikubeEnv}}" == "true"]; then
echo "bittorrent-benchmarks-workflows:minikube" > /tmp/image.txt
echo "Never" > /tmp/imagePullPolicy.txt
else
echo "codexstorage/bittorrent-benchmarks-workflows:latest" > /tmp/image.txt
echo "Always" > /tmp/imagePullPolicy.txt
fi
outputs:
parameters:
- name: image
valueFrom:
path: /tmp/image.txt
- name: imagePullPolicy
valueFrom:
path: /tmp/imagePullPolicy.txt
- name: expand-parameter-matrix
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
script:
image: '{{inputs.parameters.runnerImage}}'
imagePullPolicy: '{{inputs.parameters.imagePullPolicy}}'
command: [ "python", "-m", "parameter_expander" ]
args:
- "{{ workflow.parameters.json }}"
- name: generate-group-id
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
script:
image: '{{inputs.parameters.runnerImage}}'
imagePullPolicy: '{{inputs.parameters.imagePullPolicy}}'
command: [ "/bin/bash" ]
source: |
echo "$(date +%s)"
# We "wrap" the benchmark workflow with a dummy workflow so exit handlers behave properly. If we
# were to call benchmark-experiment directly from the main flow, the exit handlers would be run
# only when the entire set of experiments is done, not when each individual experiment is done.
- name: wrapped-benchmark-experiment
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
- name: groupId
- name: runId
- name: fileSize
- name: seederSets
- name: networkSize
- name: seeders
- name: repetitions
steps:
- - name: benchmark-experiment
template: benchmark-experiment
hooks:
exit:
template: cleanup
arguments:
parameters:
- name: runId
value: "{{inputs.parameters.runId}}"
- name: runnerImage
value: "{{inputs.parameters.runnerImage}}"
- name: imagePullPolicy
value: "{{inputs.parameters.imagePullPolicy}}"
arguments:
parameters:
- name: groupId
value: "{{inputs.parameters.groupId}}"
- name: runId
value: "{{inputs.parameters.runId}}"
- name: fileSize
value: "{{inputs.parameters.fileSize}}"
- name: seederSets
value: "{{inputs.parameters.seederSets}}"
- name: networkSize
value: "{{inputs.parameters.networkSize}}"
- name: seeders
value: "{{inputs.parameters.seeders}}"
- name: repetitions
value: "{{inputs.parameters.repetitions}}"
- name: runnerImage
value: "{{inputs.parameters.runnerImage}}"
- name: imagePullPolicy
value: "{{inputs.parameters.imagePullPolicy}}"
- name: benchmark-experiment
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
- name: groupId
- name: runId
- name: fileSize
- name: seederSets
- name: networkSize
- name: seeders
- name: repetitions
steps:
- - name: deploy-experiment
template: deploy-experiment
arguments:
parameters:
- name: runnerImage
value: "{{inputs.parameters.runnerImage}}"
- name: imagePullPolicy
value: "{{inputs.parameters.imagePullPolicy}}"
- name: groupId
value: "{{inputs.parameters.groupId}}"
- name: runId
value: "{{inputs.parameters.runId}}"
- name: fileSize
value: "{{inputs.parameters.fileSize}}"
- name: seederSets
value: "{{inputs.parameters.seederSets}}"
- name: networkSize
value: "{{inputs.parameters.networkSize}}"
- name: seeders
value: "{{inputs.parameters.seeders}}"
- name: repetitions
value: "{{inputs.parameters.repetitions}}"
- - name: wait-for-experiment
template: wait-for-experiment
arguments:
parameters:
- name: runnerImage
value: "{{inputs.parameters.runnerImage}}"
- name: imagePullPolicy
value: "{{inputs.parameters.imagePullPolicy}}"
- name: groupId
value: "{{inputs.parameters.groupId}}"
- name: runId
value: "{{inputs.parameters.runId}}"
- name: deploy-experiment
inputs:
parameters:
- name: groupId
- name: runId
- name: fileSize
- name: seederSets
- name: networkSize
- name: seeders
- name: repetitions
- name: runnerImage
- name: imagePullPolicy
script:
image: '{{inputs.parameters.runnerImage}}'
imagePullPolicy: '{{inputs.parameters.imagePullPolicy}}'
command: [ "/bin/bash" ]
source: |
set -e
if [[ "{{workflow.parameters.minikubeEnv}}" == "false" ]]; then
echo "Using devnet cluster values for deploy."
VALUE_FILE=(-f "./k8s/clusters/devnet/deluge-chart-values.yaml")
fi
helm install e{{inputs.parameters.runId}} ./k8s/charts/deluge\
--namespace codex-benchmarks "${VALUE_FILE[@]}"\
--set experiment.groupId=g{{inputs.parameters.groupId}}\
--set experiment.repetitions={{inputs.parameters.repetitions}}\
--set experiment.fileSize={{inputs.parameters.fileSize}}\
--set experiment.networkSize={{inputs.parameters.networkSize}}\
--set experiment.seeders={{inputs.parameters.seeders}}\
--set experiment.seederSets={{inputs.parameters.seederSets}}
- name: wait-for-experiment
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
- name: groupId
- name: runId
script:
image: '{{inputs.parameters.runnerImage}}'
imagePullPolicy: '{{inputs.parameters.imagePullPolicy}}'
command: [ "/bin/bash" ]
source: |
./docker/bin/kubectl-wait-job\
--selector=app.kubernetes.io/component=deluge-experiment-runner,\
app.kubernetes.io/instance=e{{inputs.parameters.runId}},\
app.kubernetes.io/part-of=g{{inputs.parameters.groupId}}\
--timeout={{workflow.parameters.maxExperimentDuration}}\
-n codex-benchmarks
- name: cleanup
inputs:
parameters:
- name: runnerImage
- name: imagePullPolicy
- name: runId
script:
image: '{{inputs.parameters.runnerImage}}'
imagePullPolicy: '{{inputs.parameters.imagePullPolicy}}'
command: [ "/bin/bash" ]
source: |
helm uninstall e{{inputs.parameters.runId}} -n codex-benchmarks