ci: refactor Jenkinsfile to be a pipeline (#3827)

Changes:
- Name local testnet output folders same as the `make` taget.
- Move both `Jenkinsfile`s to `ci` folder to avoid cluttering repo root.
- Separate builds by platform so logs from macos and linux hosts don't get mixed.
- Detect platform and architecture from Jenkins Job path to use one Jenkinsfile.
- Divide shell commands into as many stages as possible to make debugging easier.
- Generalize running testnets via a `launchLocalTestnet()` Groovy method.
- Handle uploading of results of running testnets stage-by-stage basis.
- Use `catchError()` to upload test results while marking job as failed.
- Abort previously started PR build jobs usin `disableConcurrentBuilds()`.
- Throttle jobs using the new `throttleJobProperty()` function.

Builds:
- https://ci.status.im/job/nimbus/job/nimbus-eth2/job/platforms/job/linux/job/x86_64/
- https://ci.status.im/job/nimbus/job/nimbus-eth2/job/platforms/job/macos/job/x86_64/
- https://ci.status.im/job/nimbus/job/nimbus-eth2/job/platforms/job/macos/job/aarch64/

Signed-off-by: Jakub Sokołowski <jakub@status.im>
This commit is contained in:
Jakub Sokołowski 2022-07-22 00:34:31 +02:00 committed by GitHub
parent 322d47078b
commit c33989e490
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 140 additions and 115 deletions

113
Jenkinsfile vendored
View File

@ -1,113 +0,0 @@
// https://stackoverflow.com/questions/40760716/jenkins-abort-running-build-if-new-one-is-started
// We should only abort older jobs in PR branches, so we have a nice CI history in "stable",
// "testing", and "unstable".
if (env.BRANCH_NAME != "stable" && env.BRANCH_NAME != "testing" && env.BRANCH_NAME != "unstable") {
def buildNumber = env.BUILD_NUMBER as int
if (buildNumber > 1) {
milestone(buildNumber - 1)
}
milestone(buildNumber)
}
def runStages(nodeDir) {
sh "mkdir -p ${nodeDir}"
dir(nodeDir) {
try {
stage("Clone") { timeout(15) {
/* source code checkout */
checkout scm
/* we need to update the submodules before caching kicks in */
sh "git submodule update --init --recursive"
} }
stage("Preparations") { timeout(10) {
sh """#!/bin/bash
set -e
# macOS shows scary warnings if there are old libraries and object files laying around
make clean
# to allow the following parallel stages
make -j${env.NPROC} QUICK_AND_DIRTY_COMPILER=1 update
./scripts/setup_scenarios.sh
"""
} }
stage("Tools") { timeout(30) {
sh """#!/bin/bash
set -e
make -j${env.NPROC} LOG_LEVEL=TRACE
"""
} }
stage("Test suite") { timeout(60) {
sh "make -j${env.NPROC} DISABLE_TEST_FIXTURES_SCRIPT=1 test"
} }
stage("REST test suite") { timeout(5) {
sh "make restapi-test"
} }
stage("Testnet finalization") { timeout(75) {
// EXECUTOR_NUMBER will be 0 or 1, since we have 2 executors per Jenkins node
sh """#!/bin/bash
make local-testnet-minimal
make local-testnet-mainnet
"""
} }
} catch(e) {
// we need to rethrow the exception here
throw e
} finally {
// archive testnet logs
sh """#!/bin/bash
for D in local_testnet0_data local_testnet1_data resttest0_data; do
[[ -d "\$D" ]] && tar czf "\${D}-\${NODE_NAME}.tar.gz" "\${D}"/*.txt || true
done
"""
try {
archiveArtifacts("*.tar.gz")
} catch(e) {
println("Couldn't archive artefacts.")
println(e.toString());
// we don't need to re-raise it here; it might be a PR build being cancelled by a newer one
}
// clean the workspace
cleanWs(disableDeferredWipeout: true, deleteDirs: true)
}
} // dir(...)
}
parallel(
"Linux": {
throttle(['nimbus-eth2']) {
timeout(time: 24, unit: 'HOURS') { // includes time in build queue
node("linux") {
withEnv(["NPROC=${sh(returnStdout: true, script: 'nproc').trim()}"]) {
runStages("linux")
}
}
}
}
},
"macOS (AMD64)": {
throttle(['nimbus-eth2']) {
timeout(time: 24, unit: 'HOURS') { // includes time in build queue
node("macos && x86_64") {
withEnv(["NPROC=${sh(returnStdout: true, script: 'sysctl -n hw.logicalcpu').trim()}"]) {
runStages("macos_amd64")
}
}
}
}
},
"macOS (ARM64)": {
throttle(['nimbus-eth2']) {
timeout(time: 24, unit: 'HOURS') { // includes time in build queue
node("macos && arm64") {
withEnv(["NPROC=${sh(returnStdout: true, script: 'sysctl -n hw.logicalcpu').trim()}"]) {
runStages("macos_arm64")
}
}
}
}
},
)

View File

@ -159,12 +159,12 @@ restapi-test:
local-testnet-minimal: local-testnet-minimal:
./scripts/launch_local_testnet.sh \ ./scripts/launch_local_testnet.sh \
--data-dir $@ \
--preset minimal \ --preset minimal \
--nodes 4 \ --nodes 4 \
--stop-at-epoch 5 \ --stop-at-epoch 5 \
--disable-htop \ --disable-htop \
--enable-logtrace \ --enable-logtrace \
--data-dir local_testnet0_data \
--base-port $$(( 9100 + EXECUTOR_NUMBER * 100 )) \ --base-port $$(( 9100 + EXECUTOR_NUMBER * 100 )) \
--base-rest-port $$(( 7100 + EXECUTOR_NUMBER * 100 )) \ --base-rest-port $$(( 7100 + EXECUTOR_NUMBER * 100 )) \
--base-metrics-port $$(( 8108 + EXECUTOR_NUMBER * 100 )) \ --base-metrics-port $$(( 8108 + EXECUTOR_NUMBER * 100 )) \
@ -177,11 +177,11 @@ local-testnet-minimal:
local-testnet-mainnet: local-testnet-mainnet:
./scripts/launch_local_testnet.sh \ ./scripts/launch_local_testnet.sh \
--data-dir $@ \
--nodes 4 \ --nodes 4 \
--stop-at-epoch 5 \ --stop-at-epoch 5 \
--disable-htop \ --disable-htop \
--enable-logtrace \ --enable-logtrace \
--data-dir local_testnet1_data \
--base-port $$(( 9100 + EXECUTOR_NUMBER * 100 )) \ --base-port $$(( 9100 + EXECUTOR_NUMBER * 100 )) \
--base-rest-port $$(( 7100 + EXECUTOR_NUMBER * 100 )) \ --base-rest-port $$(( 7100 + EXECUTOR_NUMBER * 100 )) \
--base-metrics-port $$(( 8108 + EXECUTOR_NUMBER * 100 )) \ --base-metrics-port $$(( 8108 + EXECUTOR_NUMBER * 100 )) \

138
ci/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,138 @@
pipeline {
/* This way we run the same Jenkinsfile on different platforms. */
agent { label params.AGENT_LABEL }
parameters {
string(
name: 'AGENT_LABEL',
description: 'Label for targetted CI slave host: linux/macos',
defaultValue: params.AGENT_LABEL ?: getAgentLabel(),
)
}
options {
timestamps()
/* This also includes wait time in the queue. */
timeout(time: 24, unit: 'HOURS')
/* Limit builds retained. */
buildDiscarder(logRotator(
numToKeepStr: '5',
daysToKeepStr: '30',
artifactNumToKeepStr: '3',
))
/* Throttle number of concurrent builds. */
throttleJobProperty(
categories: ['nimbus-eth2'],
throttleEnabled: true,
throttleOption: 'category'
)
/* Abort old builds for non-main branches. */
disableConcurrentBuilds(
abortPrevious: !isMainBranch()
)
}
environment {
NPROC = Runtime.getRuntime().availableProcessors()
MAKEFLAGS = "-j${env.NPROC}"
}
stages {
stage('Deps') {
steps { timeout(20) {
/* To allow the following parallel stages. */
sh 'make QUICK_AND_DIRTY_COMPILER=1 update'
/* Allow the following parallel stages. */
sh 'make deps'
/* Download test vectors. */
sh './scripts/setup_scenarios.sh'
} }
}
stage('Build') {
steps { timeout(30) {
sh 'make LOG_LEVEL=TRACE'
} }
}
stage('Tests') {
parallel {
stage('General') {
steps { timeout(60) {
sh 'make DISABLE_TEST_FIXTURES_SCRIPT=1 test'
} }
}
stage('REST') {
steps { timeout(5) {
sh 'make restapi-test'
} }
}
}
}
stage('Finalizations') {
/* TODO: Try in parallel. */
stages {
stage('minimal') {
steps { script { timeout(15) {
launchLocalTestnet('minimal')
} } }
}
stage('mainnet') {
steps { script { timeout(45) {
launchLocalTestnet('mainnet')
} } }
}
}
}
stage('Upload') {
steps { timeout(5) {
archiveArtifacts('*.tar.gz')
} }
}
}
post {
always {
cleanWs(
disableDeferredWipeout: true,
deleteDirs: true
)
}
}
}
def launchLocalTestnet(String name) {
/* We want to mark job as failed, but save the results. */
catchError(
message: "Local ${name} testnet finalization failure!",
buildResult: 'FAILURE',
stageResult: 'FAILURE'
) {
sh "make local-testnet-${name}"
}
/* Archive test results regardless of outcome. */
sh "tar cjf local-testnet-${name}.tar.gz local-testnet-${name}/*.txt"
}
def isMainBranch() {
return ['stable', 'testing', 'unstable'].contains(env.BRANCH_NAME)
}
/* This allows us to use one Jenkinsfile and run
* jobs on different platforms based on job name. */
def getAgentLabel() {
if (params.AGENT_LABEL) { return params.AGENT_LABEL }
/* We extract the name of the job from currentThread because
* before an agent is picket env is not available. */
def tokens = Thread.currentThread().getName().split('/')
def labels = []
/* Check if the job path contains any of the valid labels. */
['linux', 'macos', 'x86_64', 'aarch64', 'arm64'].each {
if (tokens.contains(it)) { labels.add(it) }
}
return labels.join(' && ')
}