From c91e81e35624fc11798a16240e227d70ea7e9981 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Mon, 27 Sep 2021 17:15:53 -0400 Subject: [PATCH 1/7] Clean up file data. Always keep at least 1 version. Do not delete if in lookup or dependency tables --- crc/services/file_service.py | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 4e9ae6d9..5e0e4bb7 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -545,3 +545,49 @@ class FileService(object): dmn_file = prefix + etree.tostring(root) return dmn_file + + @staticmethod + def cleanup_file_data(copies_to_keep=1): + if isinstance(copies_to_keep, int) and copies_to_keep > 0: + + deleted_models = [] + saved_models = [] + current_models = [] + + session.flush() + + workflow_spec_models = session.query(WorkflowSpecModel).all() + + for wf_spec_model in workflow_spec_models: + file_models = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == wf_spec_model.id)\ + .all() + # current_model = file_models[0] + for file_model in file_models: + file_data_models = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .all() + current_models.append(file_data_models[:copies_to_keep]) + for fd_model in file_data_models[copies_to_keep:]: + dependencies = session.query(WorkflowSpecDependencyFile)\ + .filter(WorkflowSpecDependencyFile.file_data_id == fd_model.id)\ + .all() + if len(dependencies) > 0: + saved_models.append(fd_model) + continue + lookups = session.query(LookupFileModel)\ + .filter(LookupFileModel.file_data_model_id == fd_model.id)\ + .all() + if len(lookups) > 0: + saved_models.append(fd_model) + continue + deleted_models.append(fd_model) + # session.delete(fd_model) + + session.commit() + return current_models, saved_models, deleted_models + + else: + raise ApiError(code='bad_keep', + message='You must keep at least 1 version') From 0ee377dda3c9618d8f31240a14cde064672000b7 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Mon, 27 Sep 2021 17:16:21 -0400 Subject: [PATCH 2/7] Test for file data cleanup --- tests/files/test_file_data_cleanup.py | 145 ++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 tests/files/test_file_data_cleanup.py diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py new file mode 100644 index 00000000..74a9f9a0 --- /dev/null +++ b/tests/files/test_file_data_cleanup.py @@ -0,0 +1,145 @@ +from tests.base_test import BaseTest + +from crc import session +from crc.models.file import FileModel, FileDataModel, LookupFileModel +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecDependencyFile +from crc.services.file_service import FileService + +from sqlalchemy import desc + +import io +import json + + +class TestFileDataCleanup(BaseTest): + + xml_str_one = b""" + + + + + + + + + + + + """ + + xml_str_two = b""" + + + + Flow_1v0s5ht + + + # Hello + Flow_1v0s5ht + Flow_12k5ua1 + + + + Flow_12k5ua1 + + + + + + + + + + + + + + + + + + + + + + + + + + """ + + def test_file_data_cleanup(self): + """Update a file twice. Make sure we clean up the correct files""" + + self.load_example_data() + workflow = self.create_workflow('empty_workflow') + file_data_model_count = session.query(FileDataModel).count() + + # Use for comparison after cleanup + replaced_models = [] + + # Get workflow spec + workflow_spec_model = session.query(WorkflowSpecModel).first() + + # Add lookup file + data = {'file': (io.BytesIO(b'asdf'), 'lookup_1.xlsx')} + rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json = json.loads(rv.get_data(as_text=True)) + file_id = file_json['id'] + lookup_model = LookupFileModel(file_data_model_id=file_id, + workflow_spec_id=workflow_spec_model.id) + session.add(lookup_model) + session.commit() + + + # Grab first file model + file_model = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ + .first() + + # Grab the file data model we want to replace + old_file_data_model = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .first() + + # Update first time + replaced_models.append(old_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_one), 'test_bpmn_1.bpmn')} + rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json = json.loads(rv.get_data(as_text=True)) + + # Grab the new file data model we want to replace + old_file_data_model = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .first() + + # Update second time + replaced_models.append(old_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_two), 'test_bpmn_1.bpmn')} + rv = self.app.put('/v1.0/file/%i/data' % file_json['id'], data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json = json.loads(rv.get_data(as_text=True)) + file_id = file_json['id'] + + # Add file to dependencies + wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=file_id, + workflow_id=workflow.id) + session.add(wf_spec_depend_model) + session.commit() + + # Run the cleanup files process + current_models, saved_models, deleted_models = FileService.cleanup_file_data() + + # assert correct versions are removed + new_count = session.query(FileDataModel).count() + self.assertEqual(set(deleted_models), set(replaced_models)) + self.assertEqual(file_data_model_count, new_count) + + print('test_file_data_cleanup') From 29798f1ba64c795e0ac4d12fb94572aa1b7c9d30 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Tue, 28 Sep 2021 10:16:47 -0400 Subject: [PATCH 3/7] turned it on --- crc/services/file_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 5e0e4bb7..aea6ba61 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -562,7 +562,7 @@ class FileService(object): file_models = session.query(FileModel)\ .filter(FileModel.workflow_spec_id == wf_spec_model.id)\ .all() - # current_model = file_models[0] + for file_model in file_models: file_data_models = session.query(FileDataModel)\ .filter(FileDataModel.file_model_id == file_model.id)\ @@ -583,7 +583,7 @@ class FileService(object): saved_models.append(fd_model) continue deleted_models.append(fd_model) - # session.delete(fd_model) + session.delete(fd_model) session.commit() return current_models, saved_models, deleted_models From 767a90faba12ef1ba3327e85ff8005d48bf7532e Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Tue, 28 Sep 2021 10:20:45 -0400 Subject: [PATCH 4/7] Removed debug print statement --- tests/files/test_file_data_cleanup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py index 74a9f9a0..76df91eb 100644 --- a/tests/files/test_file_data_cleanup.py +++ b/tests/files/test_file_data_cleanup.py @@ -93,7 +93,6 @@ class TestFileDataCleanup(BaseTest): session.add(lookup_model) session.commit() - # Grab first file model file_model = session.query(FileModel)\ .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ @@ -141,5 +140,3 @@ class TestFileDataCleanup(BaseTest): new_count = session.query(FileDataModel).count() self.assertEqual(set(deleted_models), set(replaced_models)) self.assertEqual(file_data_model_count, new_count) - - print('test_file_data_cleanup') From 89b8be075585f330a5806e5239574cff399a400f Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Tue, 28 Sep 2021 10:25:13 -0400 Subject: [PATCH 5/7] Take saved files into account during assertion --- tests/files/test_file_data_cleanup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py index 76df91eb..5222339c 100644 --- a/tests/files/test_file_data_cleanup.py +++ b/tests/files/test_file_data_cleanup.py @@ -138,5 +138,5 @@ class TestFileDataCleanup(BaseTest): # assert correct versions are removed new_count = session.query(FileDataModel).count() - self.assertEqual(set(deleted_models), set(replaced_models)) + self.assertEqual(set(deleted_models).union(set(saved_models)), set(replaced_models)) self.assertEqual(file_data_model_count, new_count) From cc0b7853e3d3ee80f00e5f1e2ea67d869de10091 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Tue, 28 Sep 2021 13:12:16 -0400 Subject: [PATCH 6/7] reordered the setup and tests --- tests/files/test_file_data_cleanup.py | 93 ++++++++++++++------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py index 5222339c..96110d09 100644 --- a/tests/files/test_file_data_cleanup.py +++ b/tests/files/test_file_data_cleanup.py @@ -78,8 +78,44 @@ class TestFileDataCleanup(BaseTest): # Use for comparison after cleanup replaced_models = [] - # Get workflow spec - workflow_spec_model = session.query(WorkflowSpecModel).first() + # Get `empty_workflow` workflow spec + workflow_spec_model = session.query(WorkflowSpecModel)\ + .filter(WorkflowSpecModel.name == 'empty_workflow')\ + .first() + + # Get file model for empty_workflow spec + file_model = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ + .first() + + # Grab the file data model for empty_workflow file_model + original_file_data_model = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .first() + + # Add file to dependencies + # It should not get deleted + wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=original_file_data_model.id, + workflow_id=workflow.id) + session.add(wf_spec_depend_model) + session.commit() + + # Update first time + replaced_models.append(original_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_one), file_model.name)} + rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json_first = json.loads(rv.get_data(as_text=True)) + + # Update second time + # replaced_models.append(old_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_two), file_model.name)} + rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json_second = json.loads(rv.get_data(as_text=True)) # Add lookup file data = {'file': (io.BytesIO(b'asdf'), 'lookup_1.xlsx')} @@ -87,56 +123,27 @@ class TestFileDataCleanup(BaseTest): content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) file_json = json.loads(rv.get_data(as_text=True)) - file_id = file_json['id'] - lookup_model = LookupFileModel(file_data_model_id=file_id, + lookup_file_id = file_json['id'] + lookup_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == lookup_file_id).first() + lookup_model = LookupFileModel(file_data_model_id=lookup_data_model.id, workflow_spec_id=workflow_spec_model.id) session.add(lookup_model) session.commit() - # Grab first file model - file_model = session.query(FileModel)\ - .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ - .first() - - # Grab the file data model we want to replace - old_file_data_model = session.query(FileDataModel)\ - .filter(FileDataModel.file_model_id == file_model.id)\ - .order_by(desc(FileDataModel.date_created))\ - .first() - - # Update first time - replaced_models.append(old_file_data_model) - data = {'file': (io.BytesIO(self.xml_str_one), 'test_bpmn_1.bpmn')} - rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + # Update lookup file + data = {'file': (io.BytesIO(b'1234'), 'lookup_1.xlsx')} + rv = self.app.put('/v1.0/file/%i/data' % lookup_file_id, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) - file_json = json.loads(rv.get_data(as_text=True)) - - # Grab the new file data model we want to replace - old_file_data_model = session.query(FileDataModel)\ - .filter(FileDataModel.file_model_id == file_model.id)\ - .order_by(desc(FileDataModel.date_created))\ - .first() - - # Update second time - replaced_models.append(old_file_data_model) - data = {'file': (io.BytesIO(self.xml_str_two), 'test_bpmn_1.bpmn')} - rv = self.app.put('/v1.0/file/%i/data' % file_json['id'], data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - file_json = json.loads(rv.get_data(as_text=True)) - file_id = file_json['id'] - - # Add file to dependencies - wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=file_id, - workflow_id=workflow.id) - session.add(wf_spec_depend_model) - session.commit() # Run the cleanup files process current_models, saved_models, deleted_models = FileService.cleanup_file_data() # assert correct versions are removed new_count = session.query(FileDataModel).count() - self.assertEqual(set(deleted_models).union(set(saved_models)), set(replaced_models)) - self.assertEqual(file_data_model_count, new_count) + self.assertEqual(8, new_count) + self.assertEqual(4, len(current_models)) + self.assertEqual(2, len(saved_models)) + self.assertEqual(1, len(deleted_models)) + + print('test_file_data_cleanup') From f44b9836ceb95c229d9832879fdaacbbd81fd609 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Thu, 30 Sep 2021 14:11:33 -0400 Subject: [PATCH 7/7] Add file cleanup to the scheduler --- crc/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crc/__init__.py b/crc/__init__.py index 31fb8ceb..e18d09e7 100644 --- a/crc/__init__.py +++ b/crc/__init__.py @@ -46,6 +46,7 @@ ma = Marshmallow(app) from crc import models from crc import api from crc.api import admin +from crc.services.file_service import FileService from crc.services.workflow_service import WorkflowService connexion_app.add_api('api.yml', base_path='/v1.0') @@ -56,6 +57,7 @@ def process_waiting_tasks(): WorkflowService.do_waiting() scheduler.add_job(process_waiting_tasks,'interval',minutes=1) +scheduler.add_job(FileService.cleanup_file_data, 'interval', minutes=1440) # once a day scheduler.start()