From fcaf67941870a119d5dc4e2a2a726c80d4159ba4 Mon Sep 17 00:00:00 2001 From: burnettk Date: Sun, 14 May 2023 22:19:53 -0400 Subject: [PATCH 1/3] update gunicorn configs to add threading --- .../bin/boot_server_in_docker | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/spiffworkflow-backend/bin/boot_server_in_docker b/spiffworkflow-backend/bin/boot_server_in_docker index fb425af5..865c7de6 100755 --- a/spiffworkflow-backend/bin/boot_server_in_docker +++ b/spiffworkflow-backend/bin/boot_server_in_docker @@ -46,9 +46,9 @@ fi # HACK: if loading fixtures for acceptance tests when we do not need multiple workers # it causes issues with attempting to add duplicate data to the db -workers=3 +worker_count=4 if [[ "${SPIFFWORKFLOW_BACKEND_LOAD_FIXTURE_DATA:-}" == "true" ]]; then - workers=1 + worker_count=1 fi if [[ "${SPIFFWORKFLOW_BACKEND_RUN_DATA_SETUP:-}" != "false" ]]; then @@ -67,11 +67,31 @@ fi git init "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}" git config --global --add safe.directory "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}" +# default to 3 * 2 = 6 threads per worker +# you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive: +# cpu heavy, make it smaller +# I/O heavy, make it larger +threads_to_use_per_core=3 +num_cores_multiple_for_threads=2 + +# https://stackoverflow.com/a/55423170/6090676 +# if we had access to python (i'm not sure i want to run another python script here), +# we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a +# reasonable num_cores_multiple_for_threads +# if hasattr(os, 'sched_getaffinity') +# number_of_available_cores = os.sched_getaffinity(0) + +threads_per_worker=$((threads_to_use_per_core * num_cores_multiple_for_threads)) + +# --worker-class is not strictly necessary, since setting threads will automatically set the worker class to gthread, but meh export IS_GUNICORN="true" # THIS MUST BE THE LAST COMMAND! exec poetry run gunicorn ${additional_args} \ --bind "0.0.0.0:$port" \ - --workers="$workers" \ + --preload \ + --worker-class "gthread" \ + --workers="$worker_count" \ + --threads "$threads_per_worker" \ --limit-request-line 8192 \ --timeout "$GUNICORN_TIMEOUT_SECONDS" \ --capture-output \ From a9629fd37918fcf9a0bc6a183a6c1cf698c82a7f Mon Sep 17 00:00:00 2001 From: burnettk Date: Tue, 16 May 2023 15:35:01 -0400 Subject: [PATCH 2/3] debug --- .../src/spiffworkflow_backend/config/default.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py index c1498e86..0c4ceeb2 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py @@ -57,6 +57,8 @@ SPIFFWORKFLOW_BACKEND_OPEN_ID_SERVER_URL = environ.get( SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID = environ.get( "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID", default="spiffworkflow-backend" ) +SQLALCHEMY_POOL_SIZE=20 +SQLALCHEMY_ENGINE_OPTIONS={"pool_pre_ping": True, "pool_size": 20} SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY = environ.get( "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY", default="JXeQExm0JhQPLumgHtIIqf52bDalHz0q", From f24282d012ea84863c5d7c80dfa824bc93abde39 Mon Sep 17 00:00:00 2001 From: burnettk Date: Tue, 16 May 2023 19:24:55 -0400 Subject: [PATCH 3/3] default to 6 threads and set db pool the same, allowing overrides --- .../bin/boot_server_in_docker | 37 +++++++++++-------- .../spiffworkflow_backend/config/__init__.py | 24 ++++++++++-- .../spiffworkflow_backend/config/default.py | 6 ++- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/spiffworkflow-backend/bin/boot_server_in_docker b/spiffworkflow-backend/bin/boot_server_in_docker index 865c7de6..a26adc73 100755 --- a/spiffworkflow-backend/bin/boot_server_in_docker +++ b/spiffworkflow-backend/bin/boot_server_in_docker @@ -40,8 +40,9 @@ fi additional_args="" -if [[ "${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}" != "/" ]]; then - additional_args="${additional_args} -e SCRIPT_NAME=${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT}" +app_root="${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}" +if [[ -n "$app_root" ]] && [[ "${app_root}" != "/" ]]; then + additional_args="${additional_args} -e SCRIPT_NAME=${app_root}" fi # HACK: if loading fixtures for acceptance tests when we do not need multiple workers @@ -67,21 +68,25 @@ fi git init "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}" git config --global --add safe.directory "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}" -# default to 3 * 2 = 6 threads per worker -# you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive: -# cpu heavy, make it smaller -# I/O heavy, make it larger -threads_to_use_per_core=3 -num_cores_multiple_for_threads=2 +if [[ -z "${SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER:-}" ]]; then + # default to 3 * 2 = 6 threads per worker + # you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive: + # cpu heavy, make it smaller + # I/O heavy, make it larger + threads_to_use_per_core=3 -# https://stackoverflow.com/a/55423170/6090676 -# if we had access to python (i'm not sure i want to run another python script here), -# we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a -# reasonable num_cores_multiple_for_threads -# if hasattr(os, 'sched_getaffinity') -# number_of_available_cores = os.sched_getaffinity(0) + # just making up a number here for num_cores_multiple_for_threads + # https://stackoverflow.com/a/55423170/6090676 + # if we had access to python (i'm not sure i want to run another python script here), + # we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a + # reasonable num_cores_multiple_for_threads + # if hasattr(os, 'sched_getaffinity') + # number_of_available_cores = os.sched_getaffinity(0) + num_cores_multiple_for_threads=2 -threads_per_worker=$((threads_to_use_per_core * num_cores_multiple_for_threads)) + SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER=$((threads_to_use_per_core * num_cores_multiple_for_threads)) + export SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER +fi # --worker-class is not strictly necessary, since setting threads will automatically set the worker class to gthread, but meh export IS_GUNICORN="true" @@ -91,7 +96,7 @@ exec poetry run gunicorn ${additional_args} \ --preload \ --worker-class "gthread" \ --workers="$worker_count" \ - --threads "$threads_per_worker" \ + --threads "$SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER" \ --limit-request-line 8192 \ --timeout "$GUNICORN_TIMEOUT_SECONDS" \ --capture-output \ diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/config/__init__.py b/spiffworkflow-backend/src/spiffworkflow_backend/config/__init__.py index d61fa085..55c95897 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/config/__init__.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/__init__.py @@ -13,8 +13,7 @@ class ConfigurationError(Exception): """ConfigurationError.""" -def setup_database_uri(app: Flask) -> None: - """Setup_database_uri.""" +def setup_database_configs(app: Flask) -> None: if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI") is None: database_name = f"spiffworkflow_backend_{app.config['ENV_IDENTIFIER']}" if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_TYPE") == "sqlite": @@ -34,6 +33,25 @@ def setup_database_uri(app: Flask) -> None: else: app.config["SQLALCHEMY_DATABASE_URI"] = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI") + # if pool size came in from the environment, it's a string, but we need an int + # if it didn't come in from the environment, base it on the number of threads + # note that max_overflow defaults to 10, so that will give extra buffer. + pool_size = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE") + if pool_size is not None: + pool_size = int(pool_size) + else: + # this one doesn't come from app config and isn't documented in default.py because we don't want to give people the impression + # that setting it in flask python configs will work. on the contrary, it's used by a bash + # script that starts the backend, so it can only be set in the environment. + threads_per_worker_config = os.environ.get("SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER") + if threads_per_worker_config is not None: + pool_size = int(threads_per_worker_config) + else: + # this is a sqlalchemy default, if we don't have any better ideas + pool_size = 5 + + app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {} + app.config['SQLALCHEMY_ENGINE_OPTIONS']['pool_size'] = pool_size def load_config_file(app: Flask, env_config_module: str) -> None: """Load_config_file.""" @@ -115,7 +133,7 @@ def setup_config(app: Flask) -> None: app.config["PROCESS_UUID"] = uuid.uuid4() - setup_database_uri(app) + setup_database_configs(app) setup_logger(app) if app.config["SPIFFWORKFLOW_BACKEND_DEFAULT_USER_GROUP"] == "": diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py index 0c4ceeb2..88a34faf 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py @@ -33,6 +33,10 @@ SPIFFWORKFLOW_BACKEND_BACKGROUND_SCHEDULER_USER_INPUT_REQUIRED_POLLING_INTERVAL_ default="120", ) ) + +# we only use this in one place, and it checks to see if it is None. +SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE = environ.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE") + SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND = environ.get( "SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND", default="http://localhost:7001" ) @@ -57,8 +61,6 @@ SPIFFWORKFLOW_BACKEND_OPEN_ID_SERVER_URL = environ.get( SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID = environ.get( "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID", default="spiffworkflow-backend" ) -SQLALCHEMY_POOL_SIZE=20 -SQLALCHEMY_ENGINE_OPTIONS={"pool_pre_ping": True, "pool_size": 20} SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY = environ.get( "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY", default="JXeQExm0JhQPLumgHtIIqf52bDalHz0q",