spiff-arena/spiffworkflow-backend/bin/data_migrations/run_all.py

import time

from flask import current_app
from spiffworkflow_backend import create_app
from spiffworkflow_backend.data_migrations.process_instance_file_data_migrator import ProcessInstanceFileDataMigrator
from spiffworkflow_backend.data_migrations.version_1_3 import VersionOneThree
from spiffworkflow_backend.data_migrations.version_2 import Version2
from spiffworkflow_backend.models.db import db
from spiffworkflow_backend.models.human_task import HumanTaskModel
from spiffworkflow_backend.models.process_instance import ProcessInstanceModel
from sqlalchemy import update


# simple decorator to time the func
# https://stackoverflow.com/a/11151365/6090676, thank you
def benchmark_log_func(func):
    """
    decorator to calculate the total time of a func
    """

    def st_func(*args, **kwargs):
        t1 = time.time()
        r = func(*args, **kwargs)
        t2 = time.time()
        # __qualname__, i know you use it every day. but if not, it's the function name prefixed with any qualifying class names
        current_app.logger.debug(f"Function={func.__qualname__}, Time={t2 - t1}")
        return r

    return st_func


@benchmark_log_func
def put_serializer_version_onto_numeric_track() -> None:
    old_busted_serializer_version = "1.0-spiffworkflow-backend"
    update_query = (
        update(ProcessInstanceModel)
        .where(ProcessInstanceModel.spiff_serializer_version == old_busted_serializer_version)
        .values(spiff_serializer_version="1")
    )
    db.session.execute(update_query)
    db.session.commit()


@benchmark_log_func
def remove_duplicate_human_task_rows() -> None:
    result = (
        db.session.query(HumanTaskModel.process_instance_id, HumanTaskModel.task_guid, db.func.count().label("ct"))
        .group_by(HumanTaskModel.task_guid, HumanTaskModel.process_instance_id)
        .having(db.func.count() > 1)
        .all()
    )

    # Process the result as needed
    rows_to_delete = []
    for row in result:
        human_tasks = (
            HumanTaskModel.query.filter_by(task_guid=row.task_guid).order_by(HumanTaskModel.created_at_in_seconds.desc()).all()
        )
        rows_to_delete = rows_to_delete + human_tasks[1:]
    for row in rows_to_delete:
        db.session.delete(row)
    db.session.commit()


@benchmark_log_func
def backfill_task_guid_for_human_tasks() -> None:
    update_query = (
        update(HumanTaskModel).where(HumanTaskModel.task_guid == None).values(task_guid=HumanTaskModel.task_id)  # noqa: E711
    )
    db.session.execute(update_query)
    db.session.commit()


def all_potentially_relevant_process_instances() -> list[ProcessInstanceModel]:
    return ProcessInstanceModel.query.filter(
        ProcessInstanceModel.spiff_serializer_version < Version2.version(),
        ProcessInstanceModel.status.in_(ProcessInstanceModel.non_terminal_statuses()),
    ).all()


@benchmark_log_func
def run_version_1() -> None:
    VersionOneThree().run()  # make this a class method


@benchmark_log_func
def run_version_2(process_instances: list[ProcessInstanceModel]) -> None:
    Version2.run(process_instances)


def main() -> None:
    start_time = time.time()
    app = create_app()
    end_time = time.time()

    with app.app_context():
        current_app.logger.debug(f"data_migrations/run_all::create_app took {end_time - start_time} seconds")
        start_time = time.time()
        put_serializer_version_onto_numeric_track()
        remove_duplicate_human_task_rows()
        backfill_task_guid_for_human_tasks()
        process_instances = all_potentially_relevant_process_instances()
        potentially_relevant_instance_count = len(process_instances)
        current_app.logger.debug(f"Found potentially relevant process_instances: {potentially_relevant_instance_count}")
        if potentially_relevant_instance_count > 0:
            run_version_1()
            # this will run while using the new per instance on demand data migration framework
            # run_version_2(process_instances)
        if app.config["SPIFFWORKFLOW_BACKEND_PROCESS_INSTANCE_FILE_DATA_FILESYSTEM_PATH"] is not None:
            ProcessInstanceFileDataMigrator.migrate_from_database_to_filesystem()

        end_time = time.time()
        current_app.logger.debug(
            f"done running data migrations in ./bin/data_migrations/run_all.py. took {end_time - start_time} seconds"
        )


if __name__ == "__main__":
    main()
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`import time`

			`from flask import current_app`
			`from spiffworkflow_backend import create_app`
Pi file data on fs (#1403) * added basic support to store process instance file data on the file system w/ burnettk * the files need to be in a hashed directory structure w/ burnettk * fixed exception message w/ burnettk * use the same entry method to the contents of data file w/ burnettk * fixed hash method w/ burnettk * make hash method easier to read w/ burnettk * added a migrator and test for migration w/ burnettk * fix a couple things while testing --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2024-04-18 15:24:59 +00:00			`from spiffworkflow_backend.data_migrations.process_instance_file_data_migrator import ProcessInstanceFileDataMigrator`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`from spiffworkflow_backend.data_migrations.version_1_3 import VersionOneThree`
			`from spiffworkflow_backend.data_migrations.version_2 import Version2`
			`from spiffworkflow_backend.models.db import db`
Feature/reset pi go button (#978) * delete human task for task that is being reset to in a process instance w/ burnettk * added script to remove duplicate human tasks from the database w/ burnettk * test that human tasks are not duplicated during pi reset w/ burnettk --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> 2024-02-06 20:50:00 +00:00			`from spiffworkflow_backend.models.human_task import HumanTaskModel`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`from spiffworkflow_backend.models.process_instance import ProcessInstanceModel`
			`from sqlalchemy import update`


			`# simple decorator to time the func`
			`# https://stackoverflow.com/a/11151365/6090676, thank you`
			`def benchmark_log_func(func):`
			`"""`
			`decorator to calculate the total time of a func`
			`"""`

			`def st_func(args, *kwargs):`
			`t1 = time.time()`
			`r = func(args, *kwargs)`
			`t2 = time.time()`
			`# __qualname__, i know you use it every day. but if not, it's the function name prefixed with any qualifying class names`
			`current_app.logger.debug(f"Function={func.__qualname__}, Time={t2 - t1}")`
			`return r`

			`return st_func`


			`@benchmark_log_func`
			`def put_serializer_version_onto_numeric_track() -> None:`
			`old_busted_serializer_version = "1.0-spiffworkflow-backend"`
			`update_query = (`
			`update(ProcessInstanceModel)`
			`.where(ProcessInstanceModel.spiff_serializer_version == old_busted_serializer_version)`
			`.values(spiff_serializer_version="1")`
			`)`
			`db.session.execute(update_query)`
			`db.session.commit()`


Feature/reset pi go button (#978) * delete human task for task that is being reset to in a process instance w/ burnettk * added script to remove duplicate human tasks from the database w/ burnettk * test that human tasks are not duplicated during pi reset w/ burnettk --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> 2024-02-06 20:50:00 +00:00			`@benchmark_log_func`
			`def remove_duplicate_human_task_rows() -> None:`
			`result = (`
			`db.session.query(HumanTaskModel.process_instance_id, HumanTaskModel.task_guid, db.func.count().label("ct"))`
			`.group_by(HumanTaskModel.task_guid, HumanTaskModel.process_instance_id)`
			`.having(db.func.count() > 1)`
			`.all()`
			`)`

			`# Process the result as needed`
			`rows_to_delete = []`
			`for row in result:`
			`human_tasks = (`
			`HumanTaskModel.query.filter_by(task_guid=row.task_guid).order_by(HumanTaskModel.created_at_in_seconds.desc()).all()`
			`)`
			`rows_to_delete = rows_to_delete + human_tasks[1:]`
			`for row in rows_to_delete:`
			`db.session.delete(row)`
			`db.session.commit()`


cherry-picked 81b6431c585f2a428708c778349643f329dcea15 into main for task_guid backfill fix 2024-02-08 15:19:57 +00:00			`@benchmark_log_func`
			`def backfill_task_guid_for_human_tasks() -> None:`
			`update_query = (`
			`update(HumanTaskModel).where(HumanTaskModel.task_guid == None).values(task_guid=HumanTaskModel.task_id) # noqa: E711`
			`)`
			`db.session.execute(update_query)`
			`db.session.commit()`


Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`def all_potentially_relevant_process_instances() -> list[ProcessInstanceModel]:`
			`return ProcessInstanceModel.query.filter(`
Improvement/better serialization (#540) * using new spiffworkflow locally and the db can be recreated w/ burnettk * tests are passing w/ burnettk * added version 3 data migration for typenames on tasks and bpmn processes w/ burnettk * pyl w/ burnettk * switch SpiffWorkflow back to main --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> 2023-10-12 18:14:02 +00:00			`ProcessInstanceModel.spiff_serializer_version < Version2.version(),`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`ProcessInstanceModel.status.in_(ProcessInstanceModel.non_terminal_statuses()),`
			`).all()`


			`@benchmark_log_func`
			`def run_version_1() -> None:`
			`VersionOneThree().run() # make this a class method`


			`@benchmark_log_func`
			`def run_version_2(process_instances: list[ProcessInstanceModel]) -> None:`
			`Version2.run(process_instances)`


			`def main() -> None:`
			`start_time = time.time()`
			`app = create_app()`
			`end_time = time.time()`

			`with app.app_context():`
			`current_app.logger.debug(f"data_migrations/run_all::create_app took {end_time - start_time} seconds")`
			`start_time = time.time()`
			`put_serializer_version_onto_numeric_track()`
Feature/reset pi go button (#978) * delete human task for task that is being reset to in a process instance w/ burnettk * added script to remove duplicate human tasks from the database w/ burnettk * test that human tasks are not duplicated during pi reset w/ burnettk --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> 2024-02-06 20:50:00 +00:00			`remove_duplicate_human_task_rows()`
cherry-picked 81b6431c585f2a428708c778349643f329dcea15 into main for task_guid backfill fix 2024-02-08 15:19:57 +00:00			`backfill_task_guid_for_human_tasks()`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`process_instances = all_potentially_relevant_process_instances()`
			`potentially_relevant_instance_count = len(process_instances)`
Feature/background proc with celery (#788) * WIP: some initial test code to test out celery w/ burnettk * some cleanup for celery and added base model to put tasks waiting on timers * removed dup bpmn file * some more cleanup and added strategy to queue instructions * some minor code changes w/ burnettk * remove the unused next_task key from api calls since nobody uses it w/ burnettk essweine * added migration for future tasks and added test to make sure we are inserting into it w/ burnettk essweine * ensure future task run at time can be updated w/ burnettk * added table to queue instructions for end user in w/ burnettk * added test to ensure we are storing instructions for end users w/ burnettk * added progress page to display new instructions to user * ignore dup instructions on db insert w/ burnettk * some more updates for celery w/ burnettk * some pyl and test fixes w/ burnettk * fixed tests w/ burnettk * WIP: added in page to show instructions on pi show page w/ burnettk * pi show page is fully using not interstitial now w/ burnettk * fixed broken test w/ burnettk * moved background processing items to own module w/ burnettk * fixed apscheduler start script * updated celery task queue to handle future tasks and upgraded black and set its line-length to match ruff w/ burnettk * added support to run future tasks using countdown w/ burnettk * build image for celery branch w/ burnettk * poet does not exist in the image w/ burnettk * start blocking scheduler should always start the scheduler w/ burnettk * add init and stuff for this branch * make this work not just on my mac * send other args to only * added running status for process instance and use that on fe to go to show page and added additional identifier to locking system to isolate celery workers better w/ burnettk * fixed typing error that typeguard found, not sure why mypy did not w/ burnettk * do not check for no instructions on interstitial page for cypress tests on frontend w/ burnettk * do not queue process instances twice w/ burnettk * removed bad file w/ burnettk * queue tasks using strings to avoid circular imports when attmepting to queue w/ burnettk * only queue imminent new timer events and mock celery * some keyboard shortcut support on frontend and added ability to force run a process instance over the api w/ burnettk * some styles added for the shortcut menu w/ burnettk * pyl w/ burnettk * fixed test w/ burnettk * removed temporary celery script and added support for celery worker in run server locally w/ burnettk * cleaned up migrations w/ burnettk * created new migration to clean up old migrations --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-12-05 16:41:59 +00:00			`current_app.logger.debug(f"Found potentially relevant process_instances: {potentially_relevant_instance_count}")`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00			`if potentially_relevant_instance_count > 0:`
			`run_version_1()`
			`# this will run while using the new per instance on demand data migration framework`
			`# run_version_2(process_instances)`
Pi file data on fs (#1403) * added basic support to store process instance file data on the file system w/ burnettk * the files need to be in a hashed directory structure w/ burnettk * fixed exception message w/ burnettk * use the same entry method to the contents of data file w/ burnettk * fixed hash method w/ burnettk * make hash method easier to read w/ burnettk * added a migrator and test for migration w/ burnettk * fix a couple things while testing --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2024-04-18 15:24:59 +00:00			`if app.config["SPIFFWORKFLOW_BACKEND_PROCESS_INSTANCE_FILE_DATA_FILESYSTEM_PATH"] is not None:`
			`ProcessInstanceFileDataMigrator.migrate_from_database_to_filesystem()`
Hotfix/user task with timer cancel (#533) * cherry picked changes from b12af9f3bc625a6b12cfa0b8d908b378b6be9442 to pin form json files * use the class name to determine what a task type is w/ burnettk * initial thoughts to fix cancel timer issue w/ burnettk * added migration to run predict on all open instances w/ burnettk * remove debug, refactor data migrations, add benchmark_log_func * log progress of script * only process predicted tasks and their parents in the version 2 data miagration w/ burnettk * added data migrator and using that to run version 2 migrations when needed w/ burnettk * removed some unwanted code * fix issue, but tests still need updating * fix tests by returning code to closer to what it was --------- Co-authored-by: jasquat <jasquat@users.noreply.github.com> Co-authored-by: burnettk <burnettk@users.noreply.github.com> 2023-10-10 15:17:09 +00:00
			`end_time = time.time()`
			`current_app.logger.debug(`
			`f"done running data migrations in ./bin/data_migrations/run_all.py. took {end_time - start_time} seconds"`
			`)`


			`if __name__ == "__main__":`
			`main()`