First commit

Most of the code from infra-role-beacon-node-linux
This commit is contained in:
Tanguy 2022-04-04 13:51:14 +02:00
commit 7353484dcf
No known key found for this signature in database
GPG Key ID: 7DD8EC6B6CE6C45E
18 changed files with 470 additions and 0 deletions

77
README.md Normal file
View File

@ -0,0 +1,77 @@
# Description
This role provisions a nim Codex installation.
# Introduction
The role will:
* Checkout a branch from the [nim-dagger](https://github.com/status-im/nim-dagger/) repo
* Build it using the [`build.sh`](./templates/build.sh.j2) Bash script
* Schedule regular builds using [Systemd timers](https://www.freedesktop.org/software/systemd/man/systemd.timer.html)
* Start a node by defining a [Systemd service](https://www.freedesktop.org/software/systemd/man/systemd.service.html)
# Ports
The service exposes two ports by default:
* `9000` - LibP2P peering port. Must __ALWAYS__ be public.
* `9900` - Prometheus metrics port. Should not be public.
# Installation
Add to your `requirements.yml` file:
```yaml
- name: infra-role-nim-codex
src: git+git@github.com:status-im/infra-role-nim-codex
scm: git
```
# Configuration
The crucial settings are:
```yaml
# branch which should be built
codex_repo_branch: 'stable'
# optional setting for debug mode
codex_log_level: 'DEBUG'
```
# Management
## Service
Assuming the `main` branch was built you can manage the service with:
```sh
sudo systemctl start codex-main
sudo systemctl status codex-main
sudo systemctl stop codex-main
```
You can view logs under:
```sh
tail -f /data/codex-main/logs/service.log
```
All node data is located in `/data/codex-main/data`.
## Builds
A timer will be installed to build the image:
```sh
> sudo systemctl list-units --type=service '*codex-*'
UNIT LOAD ACTIVE SUB DESCRIPTION
codex-prater-stable.service loaded active running Codex (stable)
```
To rebuild the image:
```sh
> sudo systemctl start build-codex-main
```
To check full build logs use:
```sh
journalctl -u build-codex-main.service
```
# Requirements
Due to being part of Status infra this role assumes availability of certain things:
* The `iptables-persistent` module

55
defaults/main.yml Normal file
View File

@ -0,0 +1,55 @@
---
codex_service_name: 'codex-{{ codex_repo_branch }}'
codex_service_path: '/data/{{ codex_service_name }}'
codex_data_folder: '{{ codex_service_path }}/data'
codex_repo_path: '{{ codex_service_path }}/repo'
codex_logs_link: '{{ codex_service_path }}/logs'
codex_user: 'codex'
codex_group: 'staff'
codex_build_service_name: 'build-{{ codex_service_name }}'
codex_build_timer_enabled: true
codex_build_timer_timeout: 3600
codex_build_frequency: 'daily'
codex_build_days_kept: 3
codex_build_jobs: '{{ ansible_processor_vcpus / 2 | round(0, "ceil") }}'
codex_build_log_level: 'TRACE'
codex_build_restarts_service: true
codex_build_nim_flags: >-
-d:noSignalHandler
codex_repo_url: 'https://github.com/status-im/nim-dagger'
codex_repo_branch: 'main'
codex_log_level: 'INFO' # TRACE DEBUG INFO NOTICE WARN ERROR FATAL NONE
codex_log_format: 'json' # auto colors nocolors json none
# connectivity settings
codex_public_address: '{{ ansible_host }}'
codex_max_peers: 160
codex_discovery_port: 9000
codex_listening_port: 9000
# Firewall
codex_firewall_libp2p_open: true
codex_firewall_metrics_open: true
# Scraping of metrics done via VPN. Protected by firewall.
codex_metrics_enabled: true
codex_metrics_address: '0.0.0.0'
codex_metrics_port: 9200
codex_rest_port: 5052
# Size in MiB
codex_cache_size: 100
# Consul service definition settings
codex_consul_service_name: 'beacon-node'
codex_consul_service_file_name: '{{ codex_service_name | replace("-", "_") }}'
codex_consul_metrics_service_name: '{{ codex_consul_service_name }}-metrics'
# Which version of Nim to use for the build
# By default, it would use the one specified by the pinned nimbus-build-system
codex_nim_commit: ''

15
files/ansible_toggle.sh Normal file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Call this script to toggle whether Ansible should make changes.
# If named 'ansible_disabled.sh' all Ansible tasks are skipped.
set -e
SCRIPT_PATH=$(realpath -s "${0}")
if [[ "${SCRIPT_PATH}" =~ _enabled.sh$ ]]; then
mv -f "${SCRIPT_PATH}" "${SCRIPT_PATH/enabled/disabled}"
echo "Disabled automatic updates by Ansible!"
elif [[ "${SCRIPT_PATH}" =~ _disabled.sh$ ]]; then
mv -f "${SCRIPT_PATH}" "${SCRIPT_PATH/disabled/enabled}"
echo "Enabled automatic updates by Ansible!"
else
echo "Expected script name to include 'disabled' or 'enabled'!" >&2
exit 1
fi

3
handlers/main.yml Normal file
View File

@ -0,0 +1,3 @@
---
- name: Save iptables rules
shell: iptables-save > /etc/iptables/rules.v4

21
meta/main.yml Normal file
View File

@ -0,0 +1,21 @@
---
galaxy_info:
author: Jakub Sokołowski <jakub@status.im>
description: Build and run nim codex nodes
company: Status.im
license: MIT
min_ansible_version: 1.9
platforms:
- name: Ubuntu
versions:
- xenial
dependencies:
- name: consul-service
src: git+git@github.com:status-im/infra-role-consul-service.git
scm: git
- name: open-ports
src: git+git@github.com:status-im/infra-role-open-ports.git
scm: git
- name: systemd-timer
src: git+git@github.com:status-im/infra-role-systemd-timer.git
scm: git

30
tasks/build.yml Normal file
View File

@ -0,0 +1,30 @@
---
- name: Clone repo branch
git:
repo: '{{ codex_repo_url }}'
version: '{{ codex_repo_branch }}'
dest: '{{ codex_repo_path }}'
force: true
update: true
become_user: '{{ codex_user }}'
- name: Check if node binary exists
stat:
path: '{{ codex_repo_path }}/build/codex'
register: codex_bin
- name: Create timer for build script
include_role: name=systemd-timer
vars:
systemd_timer_name: '{{ codex_build_service_name }}'
systemd_timer_user: '{{ codex_user }}'
systemd_timer_group: '{{ codex_group }}'
systemd_timer_description: 'Build {{ codex_service_name }}'
systemd_timer_start_on_creation: '{{ not codex_bin.stat.exists }}'
systemd_timer_ionice_class: 'idle'
systemd_timer_consul_warning: true
systemd_timer_enabled: '{{ codex_build_timer_enabled }}'
systemd_timer_frequency: '{{ codex_build_frequency }}'
systemd_timer_timeout_sec: '{{ codex_build_timer_timeout }}'
systemd_timer_work_dir: '{{ codex_service_path }}'
systemd_timer_script_path: '{{ codex_service_path }}/build.sh'

17
tasks/checks.yml Normal file
View File

@ -0,0 +1,17 @@
---
# Used by devs who make manual changes to node setup.
- name: Check Ansible toggle script
stat:
path: '{{ codex_service_path }}/ansible_disabled.sh'
register: codex_ansible_toggle
- name: Check if Ansible updates are disabled
set_fact:
codex_ansible_disabled: '{{ codex_ansible_toggle.stat.exists }}'
# Using 'fail' task to make the warning red and visible.
- name: 'WARNING: {{ codex_service_name }}'
fail:
msg: 'WARNING: Ansible changes disabled due to toggle script!'
when: codex_ansible_disabled
ignore_errors: true

18
tasks/config.yml Normal file
View File

@ -0,0 +1,18 @@
---
- name: Create service directory
file:
path: '{{ codex_data_folder }}'
state: directory
owner: '{{ codex_user }}'
group: '{{ codex_group }}'
mode: 0770
- name: Create helper scripts
template:
src: '{{ item }}'
dest: '{{ codex_service_path }}/{{ item|basename|replace(".j2", "") }}'
owner: '{{ codex_user }}'
group: '{{ codex_group }}'
mode: 0750
with_fileglob:
- 'templates/scripts/*'

24
tasks/consul.yml Normal file
View File

@ -0,0 +1,24 @@
#TODO
#---
#- name: 'Create Consul service definition'
# include_role: name=consul-service
# vars:
# consul_config_name: '{{ codex_consul_service_file_name }}'
# consul_services:
# - id: '{{ codex_service_name }}'
# name: '{{ codex_consul_service_name }}'
# port: '{{ codex_listening_port }}'
# address: '{{ codex_public_address }}'
# tags: ['{{ env }}.{{ stage }}', 'beacon', 'nimbus']
#
# - id: '{{ codex_service_name }}-metrics'
# name: '{{ codex_consul_metrics_service_name }}'
# port: '{{ codex_metrics_port }}'
# address: '{{ ansible_local.wireguard.vpn_ip }}'
# tags: ['{{ env }}.{{ stage }}', 'beacon', 'nimbus', 'metrics']
# meta:
# container: '{{ codex_service_name }}'
# checks:
# - name: '{{ codex_consul_metrics_service_name }}-health'
# type: 'http'
# http: 'http://localhost:{{ codex_metrics_port }}/health'

11
tasks/firewall.yml Normal file
View File

@ -0,0 +1,11 @@
---
- name: Open Libp2p ports in iptables
include_role: name=open-ports
vars:
open_ports_default_comment: '{{ codex_service_name }}'
open_ports_default_chain: 'SERVICES'
open_ports_list:
- { port: '{{ codex_listening_port }}', protocol: 'tcp', state: '{{ codex_firewall_libp2p_open | ternary("present", "absent") }}' }
- { port: '{{ codex_discovery_port }}', protocol: 'udp', state: '{{ codex_firewall_libp2p_open | ternary("present", "absent") }}' }
- { port: '{{ codex_metrics_port }}', chain: 'VPN', ipset: 'metrics.hq', state: '{{ codex_firewall_metrics_open | ternary("present", "absent") }}' }
- { port: '{{ codex_rest_port }}', chain: 'VPN', ipset: '{{ env }}.{{ stage }}' }

8
tasks/install.yml Normal file
View File

@ -0,0 +1,8 @@
---
- name: Install build dependencies
apt:
name:
- build-essential
- libpcre3-dev
- acl # needed to run build script with ansible async/poll
- cmake # needed by nim-leopard

11
tasks/main.yml Normal file
View File

@ -0,0 +1,11 @@
---
- include_tasks: checks.yml
- when: not codex_ansible_disabled
block:
- include_tasks: install.yml
- include_tasks: user.yml
- include_tasks: config.yml
- include_tasks: build.yml
- include_tasks: service.yml
- include_tasks: firewall.yml
# - include_tasks: consul.yml

23
tasks/service.yml Normal file
View File

@ -0,0 +1,23 @@
---
- name: Symlink service logs folder
file:
src: '/var/log/service/{{ codex_service_name }}'
dest: '{{ codex_logs_link }}'
state: 'link'
force: true
- name: Create systemd Unit file
template:
src: 'beacon-node.service.j2'
dest: '/etc/systemd/system/{{ codex_service_name }}.service'
mode: 0644
register: codex_service_definition
- name: Reload and restart the service
systemd:
name: '{{ codex_service_name }}.service'
enabled: true
daemon_reload: true
state: |-
{{ codex_service_definition.changed
| ternary("restarted", "started") }}

25
tasks/user.yml Normal file
View File

@ -0,0 +1,25 @@
---
- name: Create user for codex builds
user:
name: '{{ codex_user }}'
group: '{{ codex_group }}'
shell: '/bin/bash'
- name: Set disable rebase as merge strategy
git_config:
scope: 'global'
name: 'pull.rebase'
value: 'false'
become_user: '{{ codex_user }}'
- name: Sudoers file to let non-root users start jobs
template:
src: 'builds_sudoers.j2'
dest: '/etc/sudoers.d/80-{{ codex_service_name }}-builds'
mode: 0440
- name: Sudoers file for login as codex user
template:
src: 'codex_sudoers.j2'
dest: '/etc/sudoers.d/81-{{ codex_service_name }}-login'
mode: 0440

View File

@ -0,0 +1,25 @@
[Unit]
Description=Codex ({{ codex_repo_branch }})
Documentation=https://github.com/status-im/nim-dagger/
Requires=network-online.target
After=network-online.target
[Service]
User={{ codex_user }}
Group={{ codex_group }}
WorkingDirectory={{ codex_service_path }}
SyslogIdentifier={{ codex_service_name }}
SyslogFacility=local6
SyslogLevel=debug
Restart=on-failure
ExecStart={{ codex_repo_path }}/build/codex \
--data-dir='{{ codex_data_folder }}' \
## --log-format={{ codex_log_format }} \
## --log-level={{ codex_log_level }} \
--listen-addrs=/ip4/{{ codex_public_address }}/tcp/{{ codex_listening_port }} \
--max-peers={{ codex_max_peers }} \
--api-port={{ codex_rest_port }} \
--cache-size={{ codex_cache_size }} \
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,12 @@
# Allow non-root users start Codex builds and manage service with sudo.
# {{ codex_service_name }}
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *status {{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *start {{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *stop {{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *restart {{ codex_service_name }}*
# build-{{ codex_service_name }}
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *status build-{{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *start build-{{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *stop build-{{ codex_service_name }}*
%{{ codex_group }} ALL= NOPASSWD: /usr/bin/systemctl *restart build-{{ codex_service_name }}*

View File

@ -0,0 +1,4 @@
# Allow non-root users logging in as codex user
%{{ codex_group }} ALL= NOPASSWD: /bin/su {{ codex_user }}
%{{ codex_group }} ALL=({{ codex_user }}) NOPASSWD: ALL

View File

@ -0,0 +1,91 @@
#!/usr/bin/env bash
# vim: ft=sh
set -e
function headIsDetached() {
[[ $(git rev-parse --abbrev-ref --symbolic-full-name HEAD) == "HEAD" ]];
}
function binaryExists() {
ls -l build/nimbus_codex_${COMMIT} 2>&1 1>/dev/null
}
function fetchChanges() {
# We cannot use "git pull" in here, because history may be changed upstream
git fetch
git reset --hard "origin/${BRANCH}"
}
function buildBinaries() {
# Control number of jobs used to lower impact on running nodes
export MAKEFLAGS="-j{{ codex_build_jobs | int }}"
{% if codex_nim_commit is defined and codex_nim_commit != "" %}
export NIM_COMMIT={{ codex_nim_commit }}
{% endif %}
make update OVERRIDE=1
make libbacktrace
./env.sh nim "{{ codex_build_nim_flags }}" -d:chronicles_log_level:{{ codex_build_log_level }} c dagger.nim
mkdir -p build
# Rename binaries to match commit they were built from.
mv "dagger.out" "build/codex_${COMMIT}"
# Create a symbolic link to the latest version
ln -frs build/codex_${COMMIT} build/codex
# Delete copies that are older than N days
find build -mtime +{{ codex_build_days_kept }} -exec rm '{}' \+
}
#-------------------------------------------------------------------------------
BRANCH="{{ codex_repo_branch }}"
SERVICE="{{ codex_service_name }}.service"
SERVICE_PATH="{{ codex_service_path }}"
echo " >>> Build Start: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
cd "${SERVICE_PATH}"
if [[ "${USER}" != "{{ codex_user }}" ]]; then
echo "Incorrect user: ${USER}" >&2
echo "Expected: {{ codex_user }}" >&2
exit 1
fi
# Build the Beacon node binaries
pushd repo >/dev/null
# Detached HEAD means we're probably on a tag
if headIsDetached; then
echo " >>> Deatached HEAD, nothing to fetch."
else
echo " >>> Fetching changes..."
fetchChanges
fi
COMMIT=$(git rev-parse --short=8 HEAD)
if binaryExists && [[ "$1" != "--force" ]]; then
echo " >>> Binary already built"
exit 0
else
echo " >>> Building binaries..."
buildBinaries
fi
{% if codex_build_restarts_service %}
# Avoid faiure on first Ansible run due to missing service.
if [[ $(systemctl is-active "${SERVICE}" || true) == "inactive" ]]; then
echo " !!! No service to restart!"
exit
else
echo " >>> Restarting service..."
sudo systemctl restart "${SERVICE}"
fi
{% endif %}
popd >/dev/null
echo " >>> SUCCESS"