Merge pull request #259 from sartography/feature/gunicorn-configs

update gunicorn configs to add threading
2023-05-16 23:26:51 +00:00 · 2023-05-16 23:26:51 +00:00 · df0eb1b07e
parent 77c6e80055 957af50f31
commit df0eb1b07e
3 changed files with 55 additions and 8 deletions
--- a/spiffworkflow-backend/bin/boot_server_in_docker
+++ b/spiffworkflow-backend/bin/boot_server_in_docker
@ -40,15 +40,16 @@ fi

 additional_args=""

-if [[ "${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}" != "/" ]]; then
-  additional_args="${additional_args} -e SCRIPT_NAME=${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT}"
+app_root="${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}"
+if [[ -n "$app_root" ]] && [[ "${app_root}" != "/" ]]; then
+  additional_args="${additional_args} -e SCRIPT_NAME=${app_root}"
 fi

 # HACK: if loading fixtures for acceptance tests when we do not need multiple workers
 # it causes issues with attempting to add duplicate data to the db
-workers=3
+worker_count=4
 if [[ "${SPIFFWORKFLOW_BACKEND_LOAD_FIXTURE_DATA:-}" == "true" ]]; then
-  workers=1
+  worker_count=1
 fi

 if [[ "${SPIFFWORKFLOW_BACKEND_RUN_DATA_SETUP:-}" != "false" ]]; then
@ -67,11 +68,35 @@ fi
 git init "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}"
 git config --global --add safe.directory "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}"

+if [[ -z "${SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER:-}" ]]; then
+  # default to 3 * 2 = 6 threads per worker
+  # you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive:
+  #   cpu heavy, make it smaller
+  #   I/O heavy, make it larger
+  threads_to_use_per_core=3
+
+  # just making up a number here for num_cores_multiple_for_threads
+  # https://stackoverflow.com/a/55423170/6090676
+  # if we had access to python (i'm not sure i want to run another python script here),
+  # we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a
+  # reasonable num_cores_multiple_for_threads
+  # if hasattr(os, 'sched_getaffinity')
+  #   number_of_available_cores = os.sched_getaffinity(0)
+  num_cores_multiple_for_threads=2
+
+  SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER=$((threads_to_use_per_core * num_cores_multiple_for_threads))
+  export SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER
+fi
+
+# --worker-class is not strictly necessary, since setting threads will automatically set the worker class to gthread, but meh
 export IS_GUNICORN="true"
 # THIS MUST BE THE LAST COMMAND!
 exec poetry run gunicorn ${additional_args}  \
  --bind "0.0.0.0:$port"  \
-  --workers="$workers"  \
+  --preload \
+  --worker-class "gthread" \
+  --workers="$worker_count"  \
+  --threads "$SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER" \
  --limit-request-line 8192  \
  --timeout "$GUNICORN_TIMEOUT_SECONDS"  \
  --capture-output  \
--- a/spiffworkflow-backend/src/spiffworkflow_backend/config/init.py
+++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/init.py
@ -13,8 +13,7 @@ class ConfigurationError(Exception):
    """ConfigurationError."""


-def setup_database_uri(app: Flask) -> None:
-    """Setup_database_uri."""
+def setup_database_configs(app: Flask) -> None:
    if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI") is None:
        database_name = f"spiffworkflow_backend_{app.config['ENV_IDENTIFIER']}"
        if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_TYPE") == "sqlite":
@ -34,6 +33,25 @@ def setup_database_uri(app: Flask) -> None:
    else:
        app.config["SQLALCHEMY_DATABASE_URI"] = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI")

+    # if pool size came in from the environment, it's a string, but we need an int
+    # if it didn't come in from the environment, base it on the number of threads
+    # note that max_overflow defaults to 10, so that will give extra buffer.
+    pool_size = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE")
+    if pool_size is not None:
+        pool_size = int(pool_size)
+    else:
+        # this one doesn't come from app config and isn't documented in default.py because we don't want to give people the impression
+        # that setting it in flask python configs will work. on the contrary, it's used by a bash
+        # script that starts the backend, so it can only be set in the environment.
+        threads_per_worker_config = os.environ.get("SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER")
+        if threads_per_worker_config is not None:
+            pool_size = int(threads_per_worker_config)
+        else:
+            # this is a sqlalchemy default, if we don't have any better ideas
+            pool_size = 5
+
+    app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {}
+    app.config['SQLALCHEMY_ENGINE_OPTIONS']['pool_size'] = pool_size

 def load_config_file(app: Flask, env_config_module: str) -> None:
    """Load_config_file."""
@ -115,7 +133,7 @@ def setup_config(app: Flask) -> None:

    app.config["PROCESS_UUID"] = uuid.uuid4()

-    setup_database_uri(app)
+    setup_database_configs(app)
    setup_logger(app)

    if app.config["SPIFFWORKFLOW_BACKEND_DEFAULT_USER_GROUP"] == "":
--- a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py
+++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py
@ -33,6 +33,10 @@ SPIFFWORKFLOW_BACKEND_BACKGROUND_SCHEDULER_USER_INPUT_REQUIRED_POLLING_INTERVAL_
        default="120",
    )
 )
+
+# we only use this in one place, and it checks to see if it is None.
+SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE = environ.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE")
+
 SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND = environ.get(
    "SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND", default="http://localhost:7001"
 )