diff --git a/crc/__init__.py b/crc/__init__.py
index dcb6cde8..7fde7de0 100644
--- a/crc/__init__.py
+++ b/crc/__init__.py
@@ -47,6 +47,7 @@ ma = Marshmallow(app)
from crc import models
from crc import api
from crc.api import admin
+from crc.services.file_service import FileService
from crc.services.workflow_service import WorkflowService
connexion_app.add_api('api.yml', base_path='/v1.0')
@@ -57,6 +58,7 @@ def process_waiting_tasks():
WorkflowService.do_waiting()
scheduler.add_job(process_waiting_tasks,'interval',minutes=1)
+scheduler.add_job(FileService.cleanup_file_data, 'interval', minutes=1440) # once a day
scheduler.start()
diff --git a/crc/services/file_service.py b/crc/services/file_service.py
index 4e9ae6d9..aea6ba61 100644
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@@ -545,3 +545,49 @@ class FileService(object):
dmn_file = prefix + etree.tostring(root)
return dmn_file
+
+ @staticmethod
+ def cleanup_file_data(copies_to_keep=1):
+ if isinstance(copies_to_keep, int) and copies_to_keep > 0:
+
+ deleted_models = []
+ saved_models = []
+ current_models = []
+
+ session.flush()
+
+ workflow_spec_models = session.query(WorkflowSpecModel).all()
+
+ for wf_spec_model in workflow_spec_models:
+ file_models = session.query(FileModel)\
+ .filter(FileModel.workflow_spec_id == wf_spec_model.id)\
+ .all()
+
+ for file_model in file_models:
+ file_data_models = session.query(FileDataModel)\
+ .filter(FileDataModel.file_model_id == file_model.id)\
+ .order_by(desc(FileDataModel.date_created))\
+ .all()
+ current_models.append(file_data_models[:copies_to_keep])
+ for fd_model in file_data_models[copies_to_keep:]:
+ dependencies = session.query(WorkflowSpecDependencyFile)\
+ .filter(WorkflowSpecDependencyFile.file_data_id == fd_model.id)\
+ .all()
+ if len(dependencies) > 0:
+ saved_models.append(fd_model)
+ continue
+ lookups = session.query(LookupFileModel)\
+ .filter(LookupFileModel.file_data_model_id == fd_model.id)\
+ .all()
+ if len(lookups) > 0:
+ saved_models.append(fd_model)
+ continue
+ deleted_models.append(fd_model)
+ session.delete(fd_model)
+
+ session.commit()
+ return current_models, saved_models, deleted_models
+
+ else:
+ raise ApiError(code='bad_keep',
+ message='You must keep at least 1 version')
diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py
new file mode 100644
index 00000000..96110d09
--- /dev/null
+++ b/tests/files/test_file_data_cleanup.py
@@ -0,0 +1,149 @@
+from tests.base_test import BaseTest
+
+from crc import session
+from crc.models.file import FileModel, FileDataModel, LookupFileModel
+from crc.models.workflow import WorkflowSpecModel, WorkflowSpecDependencyFile
+from crc.services.file_service import FileService
+
+from sqlalchemy import desc
+
+import io
+import json
+
+
+class TestFileDataCleanup(BaseTest):
+
+ xml_str_one = b"""
+
+
+
+
+
+
+
+
+
+
+
+ """
+
+ xml_str_two = b"""
+
+
+
+ Flow_1v0s5ht
+
+
+ # Hello
+ Flow_1v0s5ht
+ Flow_12k5ua1
+
+
+
+ Flow_12k5ua1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+
+ def test_file_data_cleanup(self):
+ """Update a file twice. Make sure we clean up the correct files"""
+
+ self.load_example_data()
+ workflow = self.create_workflow('empty_workflow')
+ file_data_model_count = session.query(FileDataModel).count()
+
+ # Use for comparison after cleanup
+ replaced_models = []
+
+ # Get `empty_workflow` workflow spec
+ workflow_spec_model = session.query(WorkflowSpecModel)\
+ .filter(WorkflowSpecModel.name == 'empty_workflow')\
+ .first()
+
+ # Get file model for empty_workflow spec
+ file_model = session.query(FileModel)\
+ .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\
+ .first()
+
+ # Grab the file data model for empty_workflow file_model
+ original_file_data_model = session.query(FileDataModel)\
+ .filter(FileDataModel.file_model_id == file_model.id)\
+ .order_by(desc(FileDataModel.date_created))\
+ .first()
+
+ # Add file to dependencies
+ # It should not get deleted
+ wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=original_file_data_model.id,
+ workflow_id=workflow.id)
+ session.add(wf_spec_depend_model)
+ session.commit()
+
+ # Update first time
+ replaced_models.append(original_file_data_model)
+ data = {'file': (io.BytesIO(self.xml_str_one), file_model.name)}
+ rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True,
+ content_type='multipart/form-data', headers=self.logged_in_headers())
+ self.assert_success(rv)
+ file_json_first = json.loads(rv.get_data(as_text=True))
+
+ # Update second time
+ # replaced_models.append(old_file_data_model)
+ data = {'file': (io.BytesIO(self.xml_str_two), file_model.name)}
+ rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True,
+ content_type='multipart/form-data', headers=self.logged_in_headers())
+ self.assert_success(rv)
+ file_json_second = json.loads(rv.get_data(as_text=True))
+
+ # Add lookup file
+ data = {'file': (io.BytesIO(b'asdf'), 'lookup_1.xlsx')}
+ rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True,
+ content_type='multipart/form-data', headers=self.logged_in_headers())
+ self.assert_success(rv)
+ file_json = json.loads(rv.get_data(as_text=True))
+ lookup_file_id = file_json['id']
+ lookup_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == lookup_file_id).first()
+ lookup_model = LookupFileModel(file_data_model_id=lookup_data_model.id,
+ workflow_spec_id=workflow_spec_model.id)
+ session.add(lookup_model)
+ session.commit()
+
+ # Update lookup file
+ data = {'file': (io.BytesIO(b'1234'), 'lookup_1.xlsx')}
+ rv = self.app.put('/v1.0/file/%i/data' % lookup_file_id, data=data, follow_redirects=True,
+ content_type='multipart/form-data', headers=self.logged_in_headers())
+ self.assert_success(rv)
+
+ # Run the cleanup files process
+ current_models, saved_models, deleted_models = FileService.cleanup_file_data()
+
+ # assert correct versions are removed
+ new_count = session.query(FileDataModel).count()
+ self.assertEqual(8, new_count)
+ self.assertEqual(4, len(current_models))
+ self.assertEqual(2, len(saved_models))
+ self.assertEqual(1, len(deleted_models))
+
+ print('test_file_data_cleanup')