diff --git a/crc/__init__.py b/crc/__init__.py index dcb6cde8..7fde7de0 100644 --- a/crc/__init__.py +++ b/crc/__init__.py @@ -47,6 +47,7 @@ ma = Marshmallow(app) from crc import models from crc import api from crc.api import admin +from crc.services.file_service import FileService from crc.services.workflow_service import WorkflowService connexion_app.add_api('api.yml', base_path='/v1.0') @@ -57,6 +58,7 @@ def process_waiting_tasks(): WorkflowService.do_waiting() scheduler.add_job(process_waiting_tasks,'interval',minutes=1) +scheduler.add_job(FileService.cleanup_file_data, 'interval', minutes=1440) # once a day scheduler.start() diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 4e9ae6d9..aea6ba61 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -545,3 +545,49 @@ class FileService(object): dmn_file = prefix + etree.tostring(root) return dmn_file + + @staticmethod + def cleanup_file_data(copies_to_keep=1): + if isinstance(copies_to_keep, int) and copies_to_keep > 0: + + deleted_models = [] + saved_models = [] + current_models = [] + + session.flush() + + workflow_spec_models = session.query(WorkflowSpecModel).all() + + for wf_spec_model in workflow_spec_models: + file_models = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == wf_spec_model.id)\ + .all() + + for file_model in file_models: + file_data_models = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .all() + current_models.append(file_data_models[:copies_to_keep]) + for fd_model in file_data_models[copies_to_keep:]: + dependencies = session.query(WorkflowSpecDependencyFile)\ + .filter(WorkflowSpecDependencyFile.file_data_id == fd_model.id)\ + .all() + if len(dependencies) > 0: + saved_models.append(fd_model) + continue + lookups = session.query(LookupFileModel)\ + .filter(LookupFileModel.file_data_model_id == fd_model.id)\ + .all() + if len(lookups) > 0: + saved_models.append(fd_model) + continue + deleted_models.append(fd_model) + session.delete(fd_model) + + session.commit() + return current_models, saved_models, deleted_models + + else: + raise ApiError(code='bad_keep', + message='You must keep at least 1 version') diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py new file mode 100644 index 00000000..96110d09 --- /dev/null +++ b/tests/files/test_file_data_cleanup.py @@ -0,0 +1,149 @@ +from tests.base_test import BaseTest + +from crc import session +from crc.models.file import FileModel, FileDataModel, LookupFileModel +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecDependencyFile +from crc.services.file_service import FileService + +from sqlalchemy import desc + +import io +import json + + +class TestFileDataCleanup(BaseTest): + + xml_str_one = b""" + + + + + + + + + + + + """ + + xml_str_two = b""" + + + + Flow_1v0s5ht + + + # Hello + Flow_1v0s5ht + Flow_12k5ua1 + + + + Flow_12k5ua1 + + + + + + + + + + + + + + + + + + + + + + + + + + """ + + def test_file_data_cleanup(self): + """Update a file twice. Make sure we clean up the correct files""" + + self.load_example_data() + workflow = self.create_workflow('empty_workflow') + file_data_model_count = session.query(FileDataModel).count() + + # Use for comparison after cleanup + replaced_models = [] + + # Get `empty_workflow` workflow spec + workflow_spec_model = session.query(WorkflowSpecModel)\ + .filter(WorkflowSpecModel.name == 'empty_workflow')\ + .first() + + # Get file model for empty_workflow spec + file_model = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ + .first() + + # Grab the file data model for empty_workflow file_model + original_file_data_model = session.query(FileDataModel)\ + .filter(FileDataModel.file_model_id == file_model.id)\ + .order_by(desc(FileDataModel.date_created))\ + .first() + + # Add file to dependencies + # It should not get deleted + wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=original_file_data_model.id, + workflow_id=workflow.id) + session.add(wf_spec_depend_model) + session.commit() + + # Update first time + replaced_models.append(original_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_one), file_model.name)} + rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json_first = json.loads(rv.get_data(as_text=True)) + + # Update second time + # replaced_models.append(old_file_data_model) + data = {'file': (io.BytesIO(self.xml_str_two), file_model.name)} + rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json_second = json.loads(rv.get_data(as_text=True)) + + # Add lookup file + data = {'file': (io.BytesIO(b'asdf'), 'lookup_1.xlsx')} + rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + file_json = json.loads(rv.get_data(as_text=True)) + lookup_file_id = file_json['id'] + lookup_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == lookup_file_id).first() + lookup_model = LookupFileModel(file_data_model_id=lookup_data_model.id, + workflow_spec_id=workflow_spec_model.id) + session.add(lookup_model) + session.commit() + + # Update lookup file + data = {'file': (io.BytesIO(b'1234'), 'lookup_1.xlsx')} + rv = self.app.put('/v1.0/file/%i/data' % lookup_file_id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv) + + # Run the cleanup files process + current_models, saved_models, deleted_models = FileService.cleanup_file_data() + + # assert correct versions are removed + new_count = session.query(FileDataModel).count() + self.assertEqual(8, new_count) + self.assertEqual(4, len(current_models)) + self.assertEqual(2, len(saved_models)) + self.assertEqual(1, len(deleted_models)) + + print('test_file_data_cleanup')