From a929ef91a6bfa61dd785aa42c5de69ec48ee176c Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 10:39:35 -0500 Subject: [PATCH 1/7] Add cumulative task data size to script --- .../scripts/get_data_sizes.py | 37 +++++++++++++++++++ .../services/process_instance_processor.py | 22 +++++++---- 2 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 src/spiffworkflow_backend/scripts/get_data_sizes.py diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py new file mode 100644 index 00000000..1721bcdd --- /dev/null +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -0,0 +1,37 @@ +"""Get_data_sizes.""" +from typing import Any + +from spiffworkflow_backend.models.script_attributes_context import ( + ScriptAttributesContext, +) +from spiffworkflow_backend.scripts.script import Script +#from spiffworkflow_backend.servces.process_instance_processor import ProcessInstanceProcessor +from spiffworkflow_backend.services.process_instance_processor import ( + ProcessInstanceProcessor, +) + + +class GetDataSizes(Script): + """GetDataSizes.""" + + @staticmethod + def requires_privileged_permissions() -> bool: + """We have deemed this function safe to run without elevated permissions.""" + return False + + def get_description(self) -> str: + """Get_description.""" + return """Returns a dictionary of information about the size of task data and the python environment for the currently running process.""" + + def run( + self, + script_attributes_context: ScriptAttributesContext, + *_args: Any, + **kwargs: Any + ) -> Any: + """Run.""" + task = script_attributes_context.task + return { + "cumulative_task_data_size": ProcessInstanceProcessor.get_task_data_size(task.workflow), + "python_env_size": 0, + } diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 5aabe5ac..62bf61ad 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1602,16 +1602,24 @@ class ProcessInstanceProcessor: except WorkflowTaskException as we: raise ApiError.from_workflow_exception("task_error", str(we), we) from we - def check_task_data_size(self) -> None: - """CheckTaskDataSize.""" - tasks_to_check = self.bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) - task_data = [task.data for task in tasks_to_check] - task_data_to_check = list(filter(len, task_data)) + @classmethod + def _get_data_size(cls, data: Dict[Any, Any]) -> int: + data_to_check = list(filter(len, data)) try: - task_data_len = len(json.dumps(task_data_to_check)) + return len(json.dumps(data_to_check)) except Exception: - task_data_len = 0 + return 0 + + @classmethod + def get_task_data_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: + tasks_to_check = bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) + task_data = [task.data for task in tasks_to_check] + return cls._get_data_size(task_data) + + def check_task_data_size(self) -> None: + """CheckTaskDataSize.""" + task_data_len = self.get_task_data_size(self.bpmn_process_instance) # Not sure what the number here should be but this now matches the mysql # max_allowed_packet variable on dev - 1073741824 From 60a03f03e8179c533656113374254e5a1e0df0d0 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 10:55:09 -0500 Subject: [PATCH 2/7] Adding python env size --- .../scripts/get_data_sizes.py | 2 +- .../services/process_instance_processor.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py index 1721bcdd..038c6f8a 100644 --- a/src/spiffworkflow_backend/scripts/get_data_sizes.py +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -33,5 +33,5 @@ class GetDataSizes(Script): task = script_attributes_context.task return { "cumulative_task_data_size": ProcessInstanceProcessor.get_task_data_size(task.workflow), - "python_env_size": 0, + "python_env_size": ProcessInstanceProcessor.get_python_env_size(task.workflow), } diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 62bf61ad..1dfea5a5 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -152,6 +152,11 @@ class BoxedTaskDataBasedScriptEngineEnvironment(BoxedTaskDataEnvironment): # ty super().execute(script, context, external_methods) self._last_result = context + def user_defined_state( + self, external_methods: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + return {} + def last_result(self) -> Dict[str, Any]: return {k: v for k, v in self._last_result.items()} @@ -218,13 +223,13 @@ class NonTaskDataBasedScriptEngineEnvironment(BasePythonScriptEngineEnvironment) for key_to_drop in context_keys_to_drop: context.pop(key_to_drop) - self.state = self._user_defined_state(external_methods) + self.state = self.user_defined_state(external_methods) # the task data needs to be updated with the current state so data references can be resolved properly. # the state will be removed later once the task is completed. context.update(self.state) - def _user_defined_state( + def user_defined_state( self, external_methods: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: keys_to_filter = self.non_user_defined_keys @@ -245,7 +250,7 @@ class NonTaskDataBasedScriptEngineEnvironment(BasePythonScriptEngineEnvironment) def preserve_state(self, bpmn_process_instance: BpmnWorkflow) -> None: key = self.PYTHON_ENVIRONMENT_STATE_KEY - state = self._user_defined_state() + state = self.user_defined_state() bpmn_process_instance.data[key] = state def restore_state(self, bpmn_process_instance: BpmnWorkflow) -> None: @@ -253,7 +258,7 @@ class NonTaskDataBasedScriptEngineEnvironment(BasePythonScriptEngineEnvironment) self.state = bpmn_process_instance.data.get(key, {}) def finalize_result(self, bpmn_process_instance: BpmnWorkflow) -> None: - bpmn_process_instance.data.update(self._user_defined_state()) + bpmn_process_instance.data.update(self.user_defined_state()) def revise_state_with_task_data(self, task: SpiffTask) -> None: state_keys = set(self.state.keys()) @@ -1617,6 +1622,11 @@ class ProcessInstanceProcessor: task_data = [task.data for task in tasks_to_check] return cls._get_data_size(task_data) + @classmethod + def get_python_env_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: + user_defined_state = bpmn_process_instance.script_engine.environment.user_defined_state() + return cls._get_data_size(user_defined_state) + def check_task_data_size(self) -> None: """CheckTaskDataSize.""" task_data_len = self.get_task_data_size(self.bpmn_process_instance) From 84fe64b0854a9b5c15d86cb9cf79f47354a80c79 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 11:04:07 -0500 Subject: [PATCH 3/7] Getting ./bin/pyl to pass --- src/spiffworkflow_backend/scripts/get_data_sizes.py | 11 ++++++++--- .../services/process_instance_processor.py | 4 +++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py index 038c6f8a..2f28ffa7 100644 --- a/src/spiffworkflow_backend/scripts/get_data_sizes.py +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -5,11 +5,12 @@ from spiffworkflow_backend.models.script_attributes_context import ( ScriptAttributesContext, ) from spiffworkflow_backend.scripts.script import Script -#from spiffworkflow_backend.servces.process_instance_processor import ProcessInstanceProcessor from spiffworkflow_backend.services.process_instance_processor import ( ProcessInstanceProcessor, ) +# from spiffworkflow_backend.servces.process_instance_processor import ProcessInstanceProcessor + class GetDataSizes(Script): """GetDataSizes.""" @@ -32,6 +33,10 @@ class GetDataSizes(Script): """Run.""" task = script_attributes_context.task return { - "cumulative_task_data_size": ProcessInstanceProcessor.get_task_data_size(task.workflow), - "python_env_size": ProcessInstanceProcessor.get_python_env_size(task.workflow), + "cumulative_task_data_size": ProcessInstanceProcessor.get_task_data_size( + task.workflow + ), + "python_env_size": ProcessInstanceProcessor.get_python_env_size( + task.workflow + ), } diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 1dfea5a5..0d0a641e 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1624,7 +1624,9 @@ class ProcessInstanceProcessor: @classmethod def get_python_env_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: - user_defined_state = bpmn_process_instance.script_engine.environment.user_defined_state() + user_defined_state = ( + bpmn_process_instance.script_engine.environment.user_defined_state() + ) return cls._get_data_size(user_defined_state) def check_task_data_size(self) -> None: From 02db5acf6e9fdfa34508a2202d3c6b26d36a54d1 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 11:17:23 -0500 Subject: [PATCH 4/7] Unfactor to fix size calculation/type hint issues --- .../scripts/get_data_sizes.py | 15 +++++++------ .../services/process_instance_processor.py | 22 +++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py index 2f28ffa7..94dd2fd6 100644 --- a/src/spiffworkflow_backend/scripts/get_data_sizes.py +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -22,7 +22,8 @@ class GetDataSizes(Script): def get_description(self) -> str: """Get_description.""" - return """Returns a dictionary of information about the size of task data and the python environment for the currently running process.""" + return """Returns a dictionary of information about the size of task data and + the python environment for the currently running process.""" def run( self, @@ -32,11 +33,11 @@ class GetDataSizes(Script): ) -> Any: """Run.""" task = script_attributes_context.task + cumulative_task_data_size = ProcessInstanceProcessor.get_task_data_size( + task.workflow + ) + python_env_size = ProcessInstanceProcessor.get_python_env_size(task.workflow) return { - "cumulative_task_data_size": ProcessInstanceProcessor.get_task_data_size( - task.workflow - ), - "python_env_size": ProcessInstanceProcessor.get_python_env_size( - task.workflow - ), + "cumulative_task_data_size": cumulative_task_data_size, + "python_env_size": python_env_size, } diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 0d0a641e..5b728cd6 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1607,27 +1607,27 @@ class ProcessInstanceProcessor: except WorkflowTaskException as we: raise ApiError.from_workflow_exception("task_error", str(we), we) from we - @classmethod - def _get_data_size(cls, data: Dict[Any, Any]) -> int: - data_to_check = list(filter(len, data)) - - try: - return len(json.dumps(data_to_check)) - except Exception: - return 0 - @classmethod def get_task_data_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: tasks_to_check = bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) task_data = [task.data for task in tasks_to_check] - return cls._get_data_size(task_data) + task_data_to_check = list(filter(len, task_data)) + + try: + return len(json.dumps(task_data_to_check)) + except Exception: + return 0 @classmethod def get_python_env_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: user_defined_state = ( bpmn_process_instance.script_engine.environment.user_defined_state() ) - return cls._get_data_size(user_defined_state) + + try: + return len(json.dumps(user_defined_state)) + except Exception: + return 0 def check_task_data_size(self) -> None: """CheckTaskDataSize.""" From d53d0c21c2a193a03c2cb3d12b354bb191292df8 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 13:13:28 -0500 Subject: [PATCH 5/7] Provide info about keys in task data and python env --- .../scripts/get_data_sizes.py | 21 ++++++++++++------- .../services/process_instance_processor.py | 15 +++++++++---- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py index 94dd2fd6..553d85c5 100644 --- a/src/spiffworkflow_backend/scripts/get_data_sizes.py +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -32,12 +32,17 @@ class GetDataSizes(Script): **kwargs: Any ) -> Any: """Run.""" - task = script_attributes_context.task - cumulative_task_data_size = ProcessInstanceProcessor.get_task_data_size( - task.workflow - ) - python_env_size = ProcessInstanceProcessor.get_python_env_size(task.workflow) - return { - "cumulative_task_data_size": cumulative_task_data_size, - "python_env_size": python_env_size, + workflow = script_attributes_context.task.workflow + task_data_size = ProcessInstanceProcessor.get_task_data_size(workflow) + task_data_keys_by_task = { + t.task_spec.name: sorted(t.data.keys()) + for t in ProcessInstanceProcessor.get_tasks_with_data(workflow) + } + python_env_size = ProcessInstanceProcessor.get_python_env_size(workflow) + python_env_keys = workflow.script_engine.environment.user_defined_state().keys() + return { + "python_env_size": python_env_size, + "python_env_keys": sorted(python_env_keys), + "task_data_size": task_data_size, + "task_data_keys_by_task": task_data_keys_by_task, } diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 5b728cd6..498ebadf 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1607,14 +1607,21 @@ class ProcessInstanceProcessor: except WorkflowTaskException as we: raise ApiError.from_workflow_exception("task_error", str(we), we) from we + @classmethod + def get_tasks_with_data(cls, bpmn_process_instance: BpmnWorkflow) -> List[SpiffTask]: + return [ + task + for task in bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) + if len(task.data) > 0 + ] + @classmethod def get_task_data_size(cls, bpmn_process_instance: BpmnWorkflow) -> int: - tasks_to_check = bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) - task_data = [task.data for task in tasks_to_check] - task_data_to_check = list(filter(len, task_data)) + tasks_with_data = cls.get_tasks_with_data(bpmn_process_instance) + all_task_data = [task.data for task in tasks_with_data] try: - return len(json.dumps(task_data_to_check)) + return len(json.dumps(all_task_data)) except Exception: return 0 From 4ffba533104549e4cd622ddc8c8d7c0a945e4df3 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 13:34:26 -0500 Subject: [PATCH 6/7] Getting ./bin/pyl to pass --- .../services/process_instance_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/spiffworkflow_backend/services/process_instance_processor.py b/src/spiffworkflow_backend/services/process_instance_processor.py index 498ebadf..86ff4490 100644 --- a/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1608,7 +1608,9 @@ class ProcessInstanceProcessor: raise ApiError.from_workflow_exception("task_error", str(we), we) from we @classmethod - def get_tasks_with_data(cls, bpmn_process_instance: BpmnWorkflow) -> List[SpiffTask]: + def get_tasks_with_data( + cls, bpmn_process_instance: BpmnWorkflow + ) -> List[SpiffTask]: return [ task for task in bpmn_process_instance.get_tasks(TaskState.FINISHED_MASK) From d0b67bb82ece4ba52068185b7eb2b441f3aa8779 Mon Sep 17 00:00:00 2001 From: Jon Herron Date: Wed, 22 Feb 2023 13:42:45 -0500 Subject: [PATCH 7/7] Cleanup --- src/spiffworkflow_backend/scripts/get_data_sizes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/spiffworkflow_backend/scripts/get_data_sizes.py b/src/spiffworkflow_backend/scripts/get_data_sizes.py index 553d85c5..ea474674 100644 --- a/src/spiffworkflow_backend/scripts/get_data_sizes.py +++ b/src/spiffworkflow_backend/scripts/get_data_sizes.py @@ -9,8 +9,6 @@ from spiffworkflow_backend.services.process_instance_processor import ( ProcessInstanceProcessor, ) -# from spiffworkflow_backend.servces.process_instance_processor import ProcessInstanceProcessor - class GetDataSizes(Script): """GetDataSizes."""