diff --git a/crc/scripts/file_data_get.py b/crc/scripts/file_data_get.py index 5ce6eb14..2396ea59 100644 --- a/crc/scripts/file_data_get.py +++ b/crc/scripts/file_data_get.py @@ -1,5 +1,3 @@ -from flask import g - from crc.api.common import ApiError from crc.services.data_store_service import DataStoreBase from crc.scripts.script import Script @@ -7,32 +5,35 @@ from crc.scripts.script import Script class FileDataGet(Script, DataStoreBase): def get_description(self): - return """Gets user data from the data store - takes only two keyword arguments arguments: 'file_id' and 'key' """ + return """Gets user data from the data store - takes two keyword arguments arguments: 'file_id' and 'key' """ def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - if self.validate_kw_args(**kwargs): - myargs = [kwargs['key']] - return True + self.validate_kw_args(**kwargs) + my_args = [kwargs['key']] + if 'default' in kwargs.keys(): + my_args.append(kwargs['default']) + result = self.get_validate_common('file_data_get', None, None, kwargs['file_id'], *my_args) + return result - def validate_kw_args(self,**kwargs): - if kwargs.get('key',None) is None: + @staticmethod + def validate_kw_args(**kwargs): + if kwargs.get('key', None) is None: raise ApiError(code="missing_argument", - message=f"The 'file_data_get' script requires a keyword argument of 'key'") + message=f"The 'file_data_get' script requires a keyword argument of 'key'") - if kwargs.get('file_id',None) is None: + if kwargs.get('file_id', None) is None: raise ApiError(code="missing_argument", - message=f"The 'file_data_get' script requires a keyword argument of 'file_id'") + message=f"The 'file_data_get' script requires a keyword argument of 'file_id'") return True - def do_task(self, task, study_id, workflow_id, *args, **kwargs): - if self.validate_kw_args(**kwargs): - myargs = [kwargs['key']] + self.validate_kw_args(**kwargs) + my_args = [kwargs['key']] if 'default' in kwargs.keys(): - myargs.append(kwargs['default']) + my_args.append(kwargs['default']) return self.get_data_common(None, None, 'file_data_get', kwargs['file_id'], - *myargs) + *my_args) diff --git a/crc/scripts/file_data_set.py b/crc/scripts/file_data_set.py index 4b2ce399..b3a2b8f3 100644 --- a/crc/scripts/file_data_set.py +++ b/crc/scripts/file_data_set.py @@ -1,5 +1,3 @@ -from flask import g - from crc.api.common import ApiError from crc.services.data_store_service import DataStoreBase from crc.scripts.script import Script @@ -9,16 +7,23 @@ from crc.services.user_file_service import UserFileService class FileDataSet(Script, DataStoreBase): def get_description(self): - return """Sets data the data store - takes three keyword arguments arguments: 'file_id' and 'key' and 'value'""" + return """Sets data the data store - takes three keyword arguments arguments: 'file_id', 'key' and 'value'""" def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - if self.validate_kw_args(**kwargs): - myargs = [kwargs['key'],kwargs['value']] - fileid = kwargs['file_id'] - del(kwargs['file_id']) - return True + self.validate_kw_args(**kwargs) + my_args = [kwargs['key'], kwargs['value']] + file_id = kwargs['file_id'] + result = self.set_validate_common(task.id, + study_id, + workflow_id, + 'file_data_set', + None, + file_id, + *my_args) + return result - def validate_kw_args(self, **kwargs): + @staticmethod + def validate_kw_args(**kwargs): if kwargs.get('key', None) is None: raise ApiError(code="missing_argument", message=f"The 'file_data_get' script requires a keyword argument of 'key'") @@ -32,28 +37,26 @@ class FileDataSet(Script, DataStoreBase): if kwargs['key'] == 'irb_code' and not DocumentService.is_allowed_document(kwargs.get('value')): raise ApiError("invalid_form_field_key", "When setting an irb_code, the form field id must match a known document in the " - "irb_docunents.xslx reference file. This code is not found in that file '%s'" % + "irb_documents.xlsx reference file. This code is not found in that file '%s'" % kwargs.get('value')) return True - def do_task(self, task, study_id, workflow_id, *args, **kwargs): - if self.validate_kw_args(**kwargs): - myargs = [kwargs['key'],kwargs['value']] + self.validate_kw_args(**kwargs) + my_args = [kwargs['key'], kwargs['value']] try: fileid = int(kwargs['file_id']) - except: + except Exception: raise ApiError("invalid_file_id", - "Attempting to update DataStore for an invalid fileid '%s'" % kwargs['file_id']) + "Attempting to update DataStore for an invalid file_id '%s'" % kwargs['file_id']) del(kwargs['file_id']) if kwargs['key'] == 'irb_code': irb_doc_code = kwargs['value'] UserFileService.update_irb_code(fileid, irb_doc_code) - return self.set_data_common(task.id, None, None, @@ -61,6 +64,5 @@ class FileDataSet(Script, DataStoreBase): None, 'file_data_set', fileid, - *myargs, + *my_args, **kwargs) - diff --git a/crc/scripts/study_data_get.py b/crc/scripts/study_data_get.py index 085caa5f..a4803964 100644 --- a/crc/scripts/study_data_get.py +++ b/crc/scripts/study_data_get.py @@ -2,12 +2,12 @@ from crc.services.data_store_service import DataStoreBase from crc.scripts.script import Script -class StudyDataGet(Script,DataStoreBase): +class StudyDataGet(Script, DataStoreBase): def get_description(self): return """Gets study data from the data store.""" def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - return self.do_task(task, study_id, workflow_id, *args, **kwargs) + return self.get_validate_common('study_data_get', study_id, None, None, *args) def do_task(self, task, study_id, workflow_id, *args, **kwargs): return self.get_data_common(study_id, @@ -15,4 +15,3 @@ class StudyDataGet(Script,DataStoreBase): 'study_data_get', None, *args) - diff --git a/crc/scripts/study_data_set.py b/crc/scripts/study_data_set.py index 2d0c2e6b..81647dac 100644 --- a/crc/scripts/study_data_set.py +++ b/crc/scripts/study_data_set.py @@ -2,17 +2,19 @@ from crc.services.data_store_service import DataStoreBase from crc.scripts.script import Script -class StudyDataSet(Script,DataStoreBase): +class StudyDataSet(Script, DataStoreBase): def get_description(self): return """Sets study data from the data store. Takes two positional arguments key and value""" def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - self.set_validate_common(study_id, - workflow_id, - None, - 'study_data_set', - None, - *args) + result = self.set_validate_common(task.id, + study_id, + workflow_id, + 'study_data_set', + None, + None, + *args) + return result def do_task(self, task, study_id, workflow_id, *args, **kwargs): return self.set_data_common(task.id, @@ -24,9 +26,3 @@ class StudyDataSet(Script,DataStoreBase): None, *args, **kwargs) - - - - - - diff --git a/crc/scripts/user_data_get.py b/crc/scripts/user_data_get.py index 103475b1..4e7572fc 100644 --- a/crc/scripts/user_data_get.py +++ b/crc/scripts/user_data_get.py @@ -9,7 +9,7 @@ class UserDataGet(Script, DataStoreBase): return """Gets user data from the data store - takes only one argument 'key' """ def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - self.do_task(task, study_id, workflow_id, *args, **kwargs) + return self.get_validate_common('user_data_get', None, g.user.uid, None, *args) def do_task(self, task, study_id, workflow_id, *args, **kwargs): return self.get_data_common(None, diff --git a/crc/scripts/user_data_set.py b/crc/scripts/user_data_set.py index 9baf77fd..a58d05dd 100644 --- a/crc/scripts/user_data_set.py +++ b/crc/scripts/user_data_set.py @@ -4,17 +4,18 @@ from crc.services.data_store_service import DataStoreBase from crc.scripts.script import Script -class UserDataSet(Script,DataStoreBase): +class UserDataSet(Script, DataStoreBase): def get_description(self): return """Sets user data to the data store these are positional arguments key and value. - example: user_data_set('mykey','myvalue') + example: user_data_set('my_key','my_value') """ def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs): - self.set_validate_common(None, + self.set_validate_common(task.id, + study_id, workflow_id, - g.user.uid, 'user_data_set', + g.user.uid, None, *args) @@ -28,6 +29,3 @@ class UserDataSet(Script,DataStoreBase): None, *args, **kwargs) - - - diff --git a/crc/services/data_store_service.py b/crc/services/data_store_service.py index 2f337705..4d8a7305 100644 --- a/crc/services/data_store_service.py +++ b/crc/services/data_store_service.py @@ -2,45 +2,58 @@ from crc import session from crc.api.common import ApiError from crc.models.data_store import DataStoreModel from crc.models.workflow import WorkflowModel -from datetime import datetime + +from flask import g class DataStoreBase(object): - def overwritten(self, value, prev_value): - if prev_value is None: - overwritten = False - else: - if prev_value == value: - overwritten = False - else: - overwritten = True - return overwritten - - - def set_validate_common(self, study_id, workflow_id, user_id, script_name, file_id, *args): + def set_validate_common(self, task_id, study_id, workflow_id, script_name, user_id, file_id, *args): self.check_args_2(args, script_name) - workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() - self.get_prev_value(study_id=study_id, user_id=user_id, file_id=file_id, key=args[0]) + if script_name == 'study_data_set': + record = {'task_id': task_id, 'study_id': study_id, 'workflow_id': workflow_id, args[0]: args[1]} + elif script_name == 'file_data_set': + record = {'task_id': task_id, 'study_id': study_id, 'workflow_id': workflow_id, 'file_id': file_id, args[0]: args[1]} + elif script_name == 'user_data_set': + record = {'task_id': task_id, 'study_id': study_id, 'workflow_id': workflow_id, 'user_id': user_id, args[0]: args[1]} + g.validation_data_store.append(record) + return record - def check_args(self, args, maxlen=1, script_name='study_data_get'): + def get_validate_common(self, script_name, study_id=None, user_id=None, file_id=None, *args): + # This method uses a temporary validation_data_store that is only available for the current validation request. + # This allows us to set data_store values during validation that don't affect the real data_store. + # For data_store `gets`, we first look in the temporary validation_data_store. + # If we don't find an entry in validation_data_store, we look in the real data_store. + if script_name == 'study_data_get': + # If it's in the validation data store, return it + for record in g.validation_data_store: + if 'study_id' in record and record['study_id'] == study_id and args[0] in record: + return record[args[0]] + # If not in validation_data_store, look for in the actual data_store + return self.get_data_common(study_id, user_id, 'study_data_get', file_id, *args) + elif script_name == 'file_data_get': + for record in g.validation_data_store: + if 'file_id' in record and record['file_id'] == file_id and args[0] in record: + return record[args[0]] + return self.get_data_common(study_id, user_id, 'file_data_get', file_id, *args) + elif script_name == 'user_data_get': + for record in g.validation_data_store: + if 'user_id' in record and record['user_id'] == user_id and args[0] in record: + return record[args[0]] + return self.get_data_common(study_id, user_id, 'user_data_get', file_id, *args) + + @staticmethod + def check_args(args, maxlen=1, script_name='study_data_get'): if len(args) < 1 or len(args) > maxlen: raise ApiError(code="missing_argument", message=f"The {script_name} script takes either one or two arguments, " f"starting with the key and an optional default") - def check_args_2(self, args, script_name='study_data_set'): + @staticmethod + def check_args_2(args, script_name='study_data_set'): if len(args) != 2: raise ApiError(code="missing_argument", - message=f"The {script_name} script takes two arguments, starting with the key and a " - "value for the key") - - def get_prev_value(self, study_id, user_id, key, file_id): - study = session.query(DataStoreModel).filter_by(study_id=study_id, - user_id=user_id, - file_id=file_id, - key=key).first() - return study + message=f"The {script_name} script takes two arguments, key and value, in that order.") def set_data_common(self, task_spec, @@ -54,47 +67,38 @@ class DataStoreBase(object): **kwargs): self.check_args_2(args, script_name=script_name) - study = self.get_prev_value(study_id=study_id, - user_id=user_id, - file_id=file_id, - key=args[0]) if workflow_spec_id is None and workflow_id is not None: workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() workflow_spec_id = workflow.workflow_spec_id - if study is not None: - prev_value = study.value - else: - prev_value = None - study = DataStoreModel(key=args[0], value=args[1], - study_id=study_id, - task_spec=task_spec, - user_id=user_id, # Make this available to any User - file_id=file_id, - workflow_id=workflow_id, - spec_id=workflow_spec_id) - study.value = args[1] - study.last_updated = datetime.utcnow() - overwritten = self.overwritten(study.value, prev_value) - session.add(study) + dsm = DataStoreModel(key=args[0], + value=args[1], + study_id=study_id, + task_spec=task_spec, + user_id=user_id, # Make this available to any User + file_id=file_id, + workflow_id=workflow_id, + spec_id=workflow_spec_id) + session.add(dsm) session.commit() - return {'new_value': study.value, - 'old_value': prev_value, - 'overwritten': overwritten} + + return dsm.value def get_data_common(self, study_id, user_id, script_name, file_id=None, *args): self.check_args(args, 2, script_name) - study = session.query(DataStoreModel).filter_by(study_id=study_id, - user_id=user_id, - file_id=file_id, - key=args[ - 0]).first() - if study: - return study.value + record = session.query(DataStoreModel).filter_by(study_id=study_id, + user_id=user_id, + file_id=file_id, + key=args[0]).first() + if record: + return record.value else: - return args[1] + # This is a possible default value passed in from the data_store get methods + if len(args) == 2: + return args[1] - def get_multi_common(self, study_id, user_id, file_id=None): - study = session.query(DataStoreModel).filter_by(study_id=study_id, - user_id=user_id, - file_id=file_id) - return study + @staticmethod + def get_multi_common(study_id, user_id, file_id=None): + results = session.query(DataStoreModel).filter_by(study_id=study_id, + user_id=user_id, + file_id=file_id) + return results diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index 6fe4248c..13ce9f11 100755 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -41,6 +41,8 @@ from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_spec_service import WorkflowSpecService +from flask import g + class WorkflowService(object): TASK_ACTION_COMPLETE = "COMPLETE" @@ -73,7 +75,7 @@ class WorkflowService(object): db.session.add(UserModel(uid="test")) db.session.commit() user = db.session.query(UserModel).filter_by(uid="test").first() - if validate_study_id: + if validate_study_id is not None: study = db.session.query(StudyModel).filter_by(id=validate_study_id).first() else: study = db.session.query(StudyModel).filter_by(user_uid=user.uid).first() @@ -175,6 +177,8 @@ class WorkflowService(object): spec, only completing the required fields, rather than everything. """ + g.validation_data_store = [] + workflow_model = WorkflowService.make_test_workflow(spec_id, validate_study_id) try: processor = WorkflowProcessor(workflow_model, validate_only=True) diff --git a/tests/data/data_store_validation/data_store_validation.bpmn b/tests/data/data_store_validation/data_store_validation.bpmn new file mode 100644 index 00000000..ca4783e2 --- /dev/null +++ b/tests/data/data_store_validation/data_store_validation.bpmn @@ -0,0 +1,81 @@ + + + + + Flow_0nstzm8 + + + + Flow_0nstzm8 + Flow_08r3ga0 + study_data_set('study_data_key', 'study_data_value') +file_data_set(file_id=1, key='file_data_key', value='file_data_value') +user_data_set('user_data_key', 'user_data_value') + + + + Flow_08r3ga0 + Flow_02l0u2v + +previous_study_data_value = study_data_get('previous_study_data_key') +previous_file_data_value = file_data_get(file_id=1, key='previous_file_data_key') +previous_user_data_value = user_data_get('previous_user_data_key') + +study_data_value = study_data_get('study_data_key') +file_data_value = file_data_get(file_id=1, key='file_data_key') +user_data_value = user_data_get('user_data_key') + +bad_study_data_value = study_data_get('bad_study_data_key', 'bad_study_data_value') +bad_file_data_value = file_data_get(file_id=1, key='bad_file_data_key', default='bad_file_data_value') +bad_user_data_value = user_data_get('bad_user_data_key', 'bad_user_data_value') + + + + + ## Data +# Study +{{ study_data_value }} + Flow_02l0u2v + Flow_047lt2r + + + Flow_047lt2r + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/workflow/test_workflow_data_store_validation.py b/tests/workflow/test_workflow_data_store_validation.py new file mode 100644 index 00000000..81f17c44 --- /dev/null +++ b/tests/workflow/test_workflow_data_store_validation.py @@ -0,0 +1,94 @@ +from tests.base_test import BaseTest +from crc import session +from crc.models.data_store import DataStoreModel +from crc.models.file import FileModel, FileType +from crc.models.study import StudyModel +from crc.models.user import UserModel +from crc.services.workflow_service import WorkflowService +from flask import g + + +class TestDataStoreValidation(BaseTest): + + @staticmethod + def add_test_file(): + file_model = FileModel( + name='my_test_file', + type=FileType.pdf, + content_type='application/pdf' + ) + session.add(file_model) + session.commit() + file_id = session.query(FileModel.id).filter(FileModel.name == 'my_test_file').scalar() + return file_id + + @staticmethod + def add_previous_data_stores(user, study, spec_model, file_id): + dsm = DataStoreModel( + key='previous_study_data_key', + workflow_id=None, + study_id=study.id, + task_spec=None, + spec_id=spec_model.id, + user_id=None, + file_id=None, + value='previous_study_data_value' + + ) + session.add(dsm) + dsm = DataStoreModel( + key='previous_user_data_key', + workflow_id=None, + study_id=None, + task_spec=None, + spec_id=spec_model.id, + user_id=user.uid, + file_id=None, + value='previous_user_data_value' + ) + session.add(dsm) + dsm = DataStoreModel( + key='previous_file_data_key', + workflow_id=None, + study_id=None, + task_spec=None, + spec_id=spec_model.id, + user_id=None, + file_id=file_id, + value='previous_file_data_value' + ) + session.add(dsm) + + session.commit() + + def test_data_store_validation(self): + # add_studies also adds test users + self.add_studies() + user = session.query(UserModel).first() + g.user = user + study = session.query(StudyModel).first() + spec_model = self.load_test_spec('data_store_validation') + file_id = self.add_test_file() + self.add_previous_data_stores(user, study, spec_model, file_id) + + result = WorkflowService.test_spec(spec_model.id, validate_study_id=study.id) + self.assertIn('previous_study_data_value', result) + self.assertEqual('previous_study_data_value', result['previous_study_data_value']) + self.assertIn('previous_file_data_value', result) + self.assertEqual('previous_file_data_value', result['previous_file_data_value']) + self.assertIn('previous_user_data_value', result) + self.assertEqual('previous_user_data_value', result['previous_user_data_value']) + + self.assertIn('study_data_value', result) + self.assertEqual('study_data_value', result['study_data_value']) + self.assertIn('file_data_value', result) + self.assertEqual('file_data_value', result['file_data_value']) + self.assertIn('user_data_value', result) + self.assertEqual('user_data_value', result['user_data_value']) + + self.assertIn('bad_study_data_value', result) + self.assertEqual('bad_study_data_value', result['bad_study_data_value']) + self.assertIn('bad_file_data_value', result) + self.assertEqual('bad_file_data_value', result['bad_file_data_value']) + self.assertIn('bad_user_data_value', result) + self.assertEqual('bad_user_data_value', result['bad_user_data_value'])