default to 6 threads and set db pool the same, allowing overrides

2023-05-16 19:24:55 -04:00 · 2023-05-16 19:24:55 -04:00 · 957af50f31
parent 63999da13a
commit 957af50f31
3 changed files with 46 additions and 21 deletions
--- a/spiffworkflow-backend/bin/boot_server_in_docker
+++ b/spiffworkflow-backend/bin/boot_server_in_docker
@ -40,8 +40,9 @@ fi

 additional_args=""

-if [[ "${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}" != "/" ]]; then
-  additional_args="${additional_args} -e SCRIPT_NAME=${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT}"
+app_root="${SPIFFWORKFLOW_BACKEND_APPLICATION_ROOT:-}"
+if [[ -n "$app_root" ]] && [[ "${app_root}" != "/" ]]; then
+  additional_args="${additional_args} -e SCRIPT_NAME=${app_root}"
 fi

 # HACK: if loading fixtures for acceptance tests when we do not need multiple workers
@ -67,21 +68,25 @@ fi
 git init "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}"
 git config --global --add safe.directory "${SPIFFWORKFLOW_BACKEND_BPMN_SPEC_ABSOLUTE_DIR}"

-# default to 3 * 2 = 6 threads per worker
-# you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive:
-#   cpu heavy, make it smaller
-#   I/O heavy, make it larger
-threads_to_use_per_core=3
-num_cores_multiple_for_threads=2
+if [[ -z "${SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER:-}" ]]; then
+  # default to 3 * 2 = 6 threads per worker
+  # you may want to configure threads_to_use_per_core based on whether your workload is more cpu intensive or more I/O intensive:
+  #   cpu heavy, make it smaller
+  #   I/O heavy, make it larger
+  threads_to_use_per_core=3

-# https://stackoverflow.com/a/55423170/6090676
-# if we had access to python (i'm not sure i want to run another python script here),
-# we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a
-# reasonable num_cores_multiple_for_threads
-# if hasattr(os, 'sched_getaffinity')
-#   number_of_available_cores = os.sched_getaffinity(0)
+  # just making up a number here for num_cores_multiple_for_threads
+  # https://stackoverflow.com/a/55423170/6090676
+  # if we had access to python (i'm not sure i want to run another python script here),
+  # we could do something like this (on linux) to get the number of cores available to this process and a better estimate of a
+  # reasonable num_cores_multiple_for_threads
+  # if hasattr(os, 'sched_getaffinity')
+  #   number_of_available_cores = os.sched_getaffinity(0)
+  num_cores_multiple_for_threads=2

-threads_per_worker=$((threads_to_use_per_core * num_cores_multiple_for_threads))
+  SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER=$((threads_to_use_per_core * num_cores_multiple_for_threads))
+  export SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER
+fi

 # --worker-class is not strictly necessary, since setting threads will automatically set the worker class to gthread, but meh
 export IS_GUNICORN="true"
@ -91,7 +96,7 @@ exec poetry run gunicorn ${additional_args}  \
  --preload \
  --worker-class "gthread" \
  --workers="$worker_count"  \
-  --threads "$threads_per_worker" \
+  --threads "$SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER" \
  --limit-request-line 8192  \
  --timeout "$GUNICORN_TIMEOUT_SECONDS"  \
  --capture-output  \
--- a/spiffworkflow-backend/src/spiffworkflow_backend/config/init.py
+++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/init.py
@ -13,8 +13,7 @@ class ConfigurationError(Exception):
    """ConfigurationError."""


-def setup_database_uri(app: Flask) -> None:
-    """Setup_database_uri."""
+def setup_database_configs(app: Flask) -> None:
    if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI") is None:
        database_name = f"spiffworkflow_backend_{app.config['ENV_IDENTIFIER']}"
        if app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_TYPE") == "sqlite":
@ -34,6 +33,25 @@ def setup_database_uri(app: Flask) -> None:
    else:
        app.config["SQLALCHEMY_DATABASE_URI"] = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_URI")

+    # if pool size came in from the environment, it's a string, but we need an int
+    # if it didn't come in from the environment, base it on the number of threads
+    # note that max_overflow defaults to 10, so that will give extra buffer.
+    pool_size = app.config.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE")
+    if pool_size is not None:
+        pool_size = int(pool_size)
+    else:
+        # this one doesn't come from app config and isn't documented in default.py because we don't want to give people the impression
+        # that setting it in flask python configs will work. on the contrary, it's used by a bash
+        # script that starts the backend, so it can only be set in the environment.
+        threads_per_worker_config = os.environ.get("SPIFFWORKFLOW_BACKEND_THREADS_PER_WORKER")
+        if threads_per_worker_config is not None:
+            pool_size = int(threads_per_worker_config)
+        else:
+            # this is a sqlalchemy default, if we don't have any better ideas
+            pool_size = 5
+
+    app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {}
+    app.config['SQLALCHEMY_ENGINE_OPTIONS']['pool_size'] = pool_size

 def load_config_file(app: Flask, env_config_module: str) -> None:
    """Load_config_file."""
@ -115,7 +133,7 @@ def setup_config(app: Flask) -> None:

    app.config["PROCESS_UUID"] = uuid.uuid4()

-    setup_database_uri(app)
+    setup_database_configs(app)
    setup_logger(app)

    if app.config["SPIFFWORKFLOW_BACKEND_DEFAULT_USER_GROUP"] == "":
--- a/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py
+++ b/spiffworkflow-backend/src/spiffworkflow_backend/config/default.py
@ -33,6 +33,10 @@ SPIFFWORKFLOW_BACKEND_BACKGROUND_SCHEDULER_USER_INPUT_REQUIRED_POLLING_INTERVAL_
        default="120",
    )
 )
+
+# we only use this in one place, and it checks to see if it is None.
+SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE = environ.get("SPIFFWORKFLOW_BACKEND_DATABASE_POOL_SIZE")
+
 SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND = environ.get(
    "SPIFFWORKFLOW_BACKEND_URL_FOR_FRONTEND", default="http://localhost:7001"
 )
@ -57,8 +61,6 @@ SPIFFWORKFLOW_BACKEND_OPEN_ID_SERVER_URL = environ.get(
 SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID = environ.get(
    "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_ID", default="spiffworkflow-backend"
 )
-SQLALCHEMY_POOL_SIZE=20
-SQLALCHEMY_ENGINE_OPTIONS={"pool_pre_ping": True, "pool_size": 20}
 SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY = environ.get(
    "SPIFFWORKFLOW_BACKEND_OPEN_ID_CLIENT_SECRET_KEY",
    default="JXeQExm0JhQPLumgHtIIqf52bDalHz0q",