From cd7f67ab48b7bec858f5d5eb6a34be556f4d029c Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Thu, 28 May 2020 08:27:26 -0400 Subject: [PATCH 1/7] A major refactor of how we search and store files, as there was a lot of confusing bits in here. From an API point of view you can do the following (and only the following) /files?workflow_spec_id=x * You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id /files?workflow_id=x * You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow /files?workflow_id=x&form_field_key=y * You can find all files associated with a form element on a running workflow, and add a new file. Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived, and the new file takes its place. The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an endpoint called /studies/all - that returns all the studies in the system, and does include their files. On a deeper level: The File model no longer contains: - study_id, - task_id, - form_field_key Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study. A file is never associated with a task_id, as these change when the workflow is reloaded. The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code. --- crc/api.yml | 28 ++++------- crc/api/file.py | 38 ++++++++------- crc/api/study.py | 12 ++--- crc/models/file.py | 3 -- crc/models/study.py | 18 +------- crc/scripts/complete_template.py | 13 ++---- crc/services/file_service.py | 79 ++++++++++---------------------- crc/services/study_service.py | 18 +++++--- tests/data/docx/docx.bpmn | 30 ++++++------ tests/test_approvals_service.py | 43 +++++++---------- tests/test_file_service.py | 52 +++++++++------------ tests/test_files_api.py | 5 +- tests/test_study_service.py | 41 ++++++++++++----- tests/test_workflow_processor.py | 4 +- 14 files changed, 164 insertions(+), 220 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 2160eef2..c061307d 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -82,7 +82,7 @@ paths: # /v1.0/study /study: get: - operationId: crc.api.study.all_studies + operationId: crc.api.study.user_studies summary: Provides a list of studies related to the current user. tags: - Studies @@ -109,11 +109,13 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/Study" - /study-files: + type: array + items: + $ref: "#/components/schemas/Study" + /study/all: get: - operationId: crc.api.study.all_studies_and_files - summary: Provides a list of studies with submitted files + operationId: crc.api.study.all_studies + summary: Provides a list of studies tags: - Studies responses: @@ -122,7 +124,9 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/Study" + type: array + items: + $ref: "#/components/schemas/Study" /study/{study_id}: parameters: - name: study_id @@ -353,24 +357,12 @@ paths: description: The unique id of a workflow specification schema: type: string - - name: study_id - in: query - required: false - description: The unique id of a study - schema: - type: integer - name: workflow_id in: query required: false description: The unique id of a workflow instance schema: type: integer - - name: task_id - in: query - required: false - description: The unique id of a workflow task - schema: - type: string - name: form_field_key in: query required: false diff --git a/crc/api/file.py b/crc/api/file.py index 706df77e..733dcd34 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -5,18 +5,20 @@ from flask import send_file from crc import session from crc.api.common import ApiError -from crc.models.file import FileModelSchema, FileModel, FileDataModel +from crc.models.file import FileModelSchema, FileModel from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService -def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): - if all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key]): +def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None): + if all(v is None for v in [workflow_spec_id, workflow_id, form_field_key]): raise ApiError('missing_parameter', - 'Please specify at least one of workflow_spec_id, study_id, ' - 'workflow_id, and task_id for this file in the HTTP parameters') + 'Please specify either a workflow_spec_id or a ' + 'workflow_id with an optional form_field_key') - results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key) + results = FileService.get_files(workflow_spec_id=workflow_spec_id, + workflow_id=workflow_id, + irb_doc_code=form_field_key) return FileModelSchema(many=True).dump(results) @@ -25,25 +27,21 @@ def get_reference_files(): return FileModelSchema(many=True).dump(results) -def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): - all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key]) - missing_some = (workflow_spec_id is None) and (None in [study_id, workflow_id, form_field_key]) - if all_none or missing_some: - raise ApiError('missing_parameter', - 'Please specify either a workflow_spec_id or all 3 of study_id, ' - 'workflow_id, and field_id for this file in the HTTP parameters') - if 'file' not in connexion.request.files: - raise ApiError('invalid_file', - 'Expected a file named "file" in the multipart form request') - +def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None): file = connexion.request.files['file'] - if workflow_spec_id: + if workflow_id: + if form_field_key is None: + raise ApiError('invalid_workflow_file', + 'When adding a workflow related file, you must specify a form_field_key') + file_model = FileService.add_workflow_file(workflow_id=workflow_id, irb_doc_code=form_field_key, + name=file.filename, content_type=file.content_type, + binary_data=file.stream.read()) + elif workflow_spec_id: workflow_spec = session.query(WorkflowSpecModel).filter_by(id=workflow_spec_id).first() file_model = FileService.add_workflow_spec_file(workflow_spec, file.filename, file.content_type, file.stream.read()) else: - file_model = FileService.add_form_field_file(study_id, workflow_id, task_id, form_field_key, file.filename, - file.content_type, file.stream.read()) + raise ApiError("invalid_file", "You must supply either a workflow spec id or a workflow_id and form_field_key.") return FileModelSchema().dump(file_model) diff --git a/crc/api/study.py b/crc/api/study.py index 34ca4a3e..423f6fe2 100644 --- a/crc/api/study.py +++ b/crc/api/study.py @@ -6,7 +6,7 @@ from sqlalchemy.exc import IntegrityError from crc import session from crc.api.common import ApiError, ApiErrorSchema from crc.models.protocol_builder import ProtocolBuilderStatus -from crc.models.study import StudySchema, StudyFilesSchema, StudyModel, Study +from crc.models.study import StudySchema, StudyModel, Study from crc.services.study_service import StudyService @@ -65,7 +65,7 @@ def delete_study(study_id): raise ApiError(code="study_integrity_error", message=message) -def all_studies(): +def user_studies(): """Returns all the studies associated with the current user. """ StudyService.synch_with_protocol_builder_if_enabled(g.user) studies = StudyService.get_studies_for_user(g.user) @@ -73,8 +73,8 @@ def all_studies(): return results -def all_studies_and_files(): - """Returns all studies with submitted files""" - studies = StudyService.get_studies_with_files() - results = StudyFilesSchema(many=True).dump(studies) +def all_studies(): + """Returns all studies (regardless of user) with submitted files""" + studies = StudyService.get_all_studies_with_files() + results = StudySchema(many=True).dump(studies) return results diff --git a/crc/models/file.py b/crc/models/file.py index a96583b8..c351d2a7 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -78,10 +78,7 @@ class FileModel(db.Model): primary_process_id = db.Column(db.String, nullable=True) # An id in the xml of BPMN documents, critical for primary BPMN. workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True) workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) - study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True) - task_id = db.Column(db.String, nullable=True) irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file. - form_field_key = db.Column(db.String, nullable=True) latest_version = db.Column(db.Integer, default=0) diff --git a/crc/models/study.py b/crc/models/study.py index 2af9aeb7..283196e2 100644 --- a/crc/models/study.py +++ b/crc/models/study.py @@ -40,10 +40,6 @@ class StudyModel(db.Model): if self.on_hold: self.protocol_builder_status = ProtocolBuilderStatus.HOLD - def files(self): - _files = FileModel.query.filter_by(workflow_id=self.workflow[0].id) - return _files - class WorkflowMetadata(object): def __init__(self, id, name, display_name, description, spec_version, category_id, state: WorkflowState, status: WorkflowStatus, @@ -122,7 +118,7 @@ class Study(object): self.ind_number = ind_number self.categories = categories self.warnings = [] - + self.files = [] @classmethod def from_model(cls, study_model: StudyModel): @@ -153,6 +149,7 @@ class StudySchema(ma.Schema): hsr_number = fields.String(allow_none=True) sponsor = fields.String(allow_none=True) ind_number = fields.String(allow_none=True) + files = fields.List(fields.Nested(SimpleFileSchema), dump_only=True) class Meta: model = Study @@ -165,14 +162,3 @@ class StudySchema(ma.Schema): """Can load the basic study data for updates to the database, but categories are write only""" return Study(**data) - -class StudyFilesSchema(ma.Schema): - - files = fields.Method('_files') - - class Meta: - model = Study - additional = ["id", "title", "last_updated", "primary_investigator_id"] - - def _files(self, obj): - return [file.name for file in obj.files()] diff --git a/crc/scripts/complete_template.py b/crc/scripts/complete_template.py index 46daba6b..2acc1c4a 100644 --- a/crc/scripts/complete_template.py +++ b/crc/scripts/complete_template.py @@ -36,14 +36,11 @@ Takes two arguments: final_document_stream = self.process_template(task, study_id, workflow, *args, **kwargs) file_name = args[0] irb_doc_code = args[1] - FileService.add_task_file(study_id=study_id, - workflow_id=workflow_id, - workflow_spec_id=workflow.workflow_spec_id, - task_id=task.id, - name=file_name, - content_type=CONTENT_TYPES['docx'], - binary_data=final_document_stream.read(), - irb_doc_code=irb_doc_code) + FileService.add_workflow_file(workflow_id=workflow_id, + name=file_name, + content_type=CONTENT_TYPES['docx'], + binary_data=final_document_stream.read(), + irb_doc_code=irb_doc_code) def process_template(self, task, study_id, workflow=None, *args, **kwargs): """Entry point, mostly worried about wiring it all up.""" diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 4e249675..493f33ec 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -10,7 +10,7 @@ from pandas import ExcelFile from crc import session from crc.api.common import ApiError from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel -from crc.models.workflow import WorkflowSpecModel +from crc.models.workflow import WorkflowSpecModel, WorkflowModel from crc.services.workflow_processor import WorkflowProcessor @@ -40,31 +40,27 @@ class FileService(object): return code in df['code'].values @staticmethod - def add_form_field_file(study_id, workflow_id, task_id, form_field_key, name, content_type, binary_data): - """Create a new file and associate it with a user task form field within a workflow. - Please note that the form_field_key MUST be a known file in the irb_documents.xslx reference document.""" - if not FileService.is_allowed_document(form_field_key): + def add_workflow_file(workflow_id, irb_doc_code, name, content_type, binary_data): + """Create a new file and associate it with the workflow + Please note that the irb_doc_code MUST be a known file in the irb_documents.xslx reference document.""" + if not FileService.is_allowed_document(irb_doc_code): raise ApiError("invalid_form_field_key", "When uploading files, the form field id must match a known document in the " - "irb_docunents.xslx reference file. This code is not found in that file '%s'" % form_field_key) + "irb_docunents.xslx reference file. This code is not found in that file '%s'" % irb_doc_code) """Assure this is unique to the workflow, task, and document code AND the Name Because we will allow users to upload multiple files for the same form field in some cases """ file_model = session.query(FileModel)\ .filter(FileModel.workflow_id == workflow_id)\ - .filter(FileModel.task_id == str(task_id))\ .filter(FileModel.name == name)\ - .filter(FileModel.irb_doc_code == form_field_key).first() + .filter(FileModel.irb_doc_code == irb_doc_code).first() if not file_model: file_model = FileModel( - study_id=study_id, workflow_id=workflow_id, - task_id=task_id, name=name, - form_field_key=form_field_key, - irb_doc_code=form_field_key + irb_doc_code=irb_doc_code ) return FileService.update_file(file_model, binary_data, content_type) @@ -85,28 +81,6 @@ class FileService(object): df = df.set_index(index_column) return json.loads(df.to_json(orient='index')) - @staticmethod - def add_task_file(study_id, workflow_id, workflow_spec_id, task_id, name, content_type, binary_data, - irb_doc_code=None): - - """Assure this is unique to the workflow, task, and document code. Disregard name.""" - file_model = session.query(FileModel)\ - .filter(FileModel.workflow_id == workflow_id)\ - .filter(FileModel.task_id == str(task_id))\ - .filter(FileModel.irb_doc_code == irb_doc_code).first() - - if not file_model: - """Create a new file and associate it with an executing task within a workflow.""" - file_model = FileModel( - study_id=study_id, - workflow_id=workflow_id, - workflow_spec_id=workflow_spec_id, - task_id=task_id, - name=name, - irb_doc_code=irb_doc_code - ) - return FileService.update_file(file_model, binary_data, content_type) - @staticmethod def get_workflow_files(workflow_id): """Returns all the file models associated with a running workflow.""" @@ -179,32 +153,29 @@ class FileService(object): return file_model @staticmethod - def get_files(workflow_spec_id=None, - study_id=None, workflow_id=None, task_id=None, form_field_key=None, + def get_files_for_study(study_id, irb_doc_code=None): + query = session.query(FileModel).\ + join(WorkflowModel).\ + filter(WorkflowModel.study_id == study_id) + if irb_doc_code: + query = query.filter(FileModel.irb_doc_code == irb_doc_code) + return query.all() + + @staticmethod + def get_files(workflow_spec_id=None, workflow_id=None, name=None, is_reference=False, irb_doc_code=None): query = session.query(FileModel).filter_by(is_reference=is_reference) if workflow_spec_id: query = query.filter_by(workflow_spec_id=workflow_spec_id) - if all(v is None for v in [study_id, workflow_id, task_id, form_field_key]): - query = query.filter_by( - study_id=None, - workflow_id=None, - task_id=None, - form_field_key=None, - ) - else: - if study_id: - query = query.filter_by(study_id=study_id) - if workflow_id: - query = query.filter_by(workflow_id=workflow_id) - if task_id: - query = query.filter_by(task_id=str(task_id)) - if form_field_key: - query = query.filter_by(form_field_key=form_field_key) - if name: - query = query.filter_by(name=name) + elif workflow_id: + query = query.filter_by(workflow_id=workflow_id) if irb_doc_code: query = query.filter_by(irb_doc_code=irb_doc_code) + elif is_reference: + query = query.filter_by(is_reference=True) + + if name: + query = query.filter_by(name=name) results = query.all() return results diff --git a/crc/services/study_service.py b/crc/services/study_service.py index db5c2b72..68f760e1 100644 --- a/crc/services/study_service.py +++ b/crc/services/study_service.py @@ -33,10 +33,15 @@ class StudyService(object): return studies @staticmethod - def get_studies_with_files(): + def get_all_studies_with_files(): """Returns a list of all studies""" db_studies = session.query(StudyModel).all() - return db_studies + studies = [] + for s in db_studies: + study = Study.from_model(s) + study.files = FileService.get_files_for_study(study.id) + studies.append(study) + return studies @staticmethod def get_study(study_id, study_model: StudyModel = None): @@ -48,6 +53,7 @@ class StudyService(object): study = Study.from_model(study_model) study.categories = StudyService.get_categories() workflow_metas = StudyService.__get_workflow_metas(study_id) + study.files = FileService.get_files_for_study(study.id) # Calling this line repeatedly is very very slow. It creates the # master spec and runs it. @@ -150,17 +156,15 @@ class StudyService(object): doc['display_name'] = ' / '.join(name_list) # For each file, get associated workflow status - doc_files = FileService.get_files(study_id=study_id, irb_doc_code=code) + doc_files = FileService.get_files_for_study(study_id=study_id, irb_doc_code=code) doc['count'] = len(doc_files) doc['files'] = [] for file in doc_files: doc['files'].append({'file_id': file.id, - 'task_id': file.task_id, - 'workflow_id': file.workflow_id, - 'workflow_spec_id': file.workflow_spec_id}) + 'workflow_id': file.workflow_id}) # update the document status to match the status of the workflow it is in. - if not 'status' in doc or doc['status'] is None: + if 'status' not in doc or doc['status'] is None: workflow: WorkflowModel = session.query(WorkflowModel).filter_by(id=file.workflow_id).first() doc['status'] = workflow.status.value diff --git a/tests/data/docx/docx.bpmn b/tests/data/docx/docx.bpmn index 1c9d766a..a95feb07 100644 --- a/tests/data/docx/docx.bpmn +++ b/tests/data/docx/docx.bpmn @@ -1,5 +1,5 @@ - + SequenceFlow_0637d8i @@ -27,7 +27,7 @@ SequenceFlow_1i7hk1a SequenceFlow_11c35oq - CompleteTemplate Letter.docx AncillaryDocument.CoCApplication + CompleteTemplate Letter.docx AD_CoCApp SequenceFlow_11c35oq @@ -36,30 +36,30 @@ - - - - - - + + + - - - + + + + + + + + + + - - - - diff --git a/tests/test_approvals_service.py b/tests/test_approvals_service.py index dcc557e9..0ae37941 100644 --- a/tests/test_approvals_service.py +++ b/tests/test_approvals_service.py @@ -37,11 +37,9 @@ class TestApprovalsService(BaseTest): ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - FileService.add_task_file(study_id=workflow.study_id, workflow_id=workflow.id, - workflow_spec_id=workflow.workflow_spec_id, - task_id=task.id, - name="anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code_1) + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code=irb_code_1) ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") self.assertEquals(2, db.session.query(ApprovalModel).count()) @@ -59,22 +57,16 @@ class TestApprovalsService(BaseTest): irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs. irb_code_2 = "NonUVAIRB_AssuranceForm" # The second file in above. # Add a task file to the workflow. - FileService.add_task_file(study_id=workflow.study_id, workflow_id=workflow.id, - workflow_spec_id=workflow.workflow_spec_id, - task_id=task.id, - name="anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code_1) - # Add a two form field files with the same irb_code, but - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - task_id=task.id, - form_field_key=irb_code_2, - name="anything.png", content_type="text", - binary_data=b'1234') - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - form_field_key=irb_code_2, - task_id=task.id, - name="another_anything.png", content_type="text", - binary_data=b'5678') + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code=irb_code_1) + # Add a two form field files with the same irb_code, but different names + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code=irb_code_2) + FileService.add_workflow_file(workflow_id=workflow.id, + name="another_anything.png", content_type="text", + binary_data=b'5678', irb_doc_code=irb_code_2) # Workflow hash should look be id[1]-id[1]-id[1] @@ -85,10 +77,9 @@ class TestApprovalsService(BaseTest): # Replace last file # should now be id[1]-id[1]-id[2] - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - form_field_key=irb_code_2, - task_id=task.id, - name="another_anything.png", content_type="text", - binary_data=b'9999') + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code=irb_code_2, + name="another_anything.png", content_type="text", + binary_data=b'9999') self.assertRegexpMatches(ApprovalService._generate_workflow_hash(latest_files), "\d+\[1\]-\d+\[1\]-\d+\[2\]") diff --git a/tests/test_file_service.py b/tests/test_file_service.py index 29026117..0b5ae5cf 100644 --- a/tests/test_file_service.py +++ b/tests/test_file_service.py @@ -13,17 +13,13 @@ class TestFileService(BaseTest): processor = WorkflowProcessor(workflow) task = processor.next_task() irb_code = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - FileService.add_task_file(study_id=workflow.study_id, workflow_id=workflow.id, - workflow_spec_id=workflow.workflow_spec_id, - task_id=task.id, - name="anything.png", content_type="text", - binary_data=b'1234', irb_doc_code=irb_code) + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code=irb_code) # Add the file again with different data - FileService.add_task_file(study_id=workflow.study_id, workflow_id=workflow.id, - workflow_spec_id=workflow.workflow_spec_id, - task_id=task.id, - name="anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code) + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code=irb_code) file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(1, len(file_models)) @@ -36,17 +32,15 @@ class TestFileService(BaseTest): processor = WorkflowProcessor(workflow) task = processor.next_task() irb_code = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - task_id=task.id, - form_field_key=irb_code, - name="anything.png", content_type="text", - binary_data=b'1234') + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code=irb_code, + name="anything.png", content_type="text", + binary_data=b'1234') # Add the file again with different data - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - form_field_key=irb_code, - task_id=task.id, - name="anything.png", content_type="text", - binary_data=b'5678') + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code=irb_code, + name="anything.png", content_type="text", + binary_data=b'5678') file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(1, len(file_models)) @@ -59,17 +53,15 @@ class TestFileService(BaseTest): processor = WorkflowProcessor(workflow) task = processor.next_task() irb_code = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - task_id=task.id, - form_field_key=irb_code, - name="anything.png", content_type="text", - binary_data=b'1234') + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code=irb_code, + name="anything.png", content_type="text", + binary_data=b'1234') # Add the file again with different data - FileService.add_form_field_file(study_id=workflow.study_id, workflow_id=workflow.id, - form_field_key=irb_code, - task_id=task.id, - name="a_different_thing.png", content_type="text", - binary_data=b'5678') + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code=irb_code, + name="a_different_thing.png", content_type="text", + binary_data=b'5678') file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(2, len(file_models)) self.assertEquals(1, file_models[0].latest_version) diff --git a/tests/test_files_api.py b/tests/test_files_api.py index d7e31f80..79f8dbf2 100644 --- a/tests/test_files_api.py +++ b/tests/test_files_api.py @@ -1,7 +1,7 @@ import io import json -from datetime import datetime -from unittest.mock import patch + +from tests.base_test import BaseTest from crc import session from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel @@ -9,7 +9,6 @@ from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService from crc.services.workflow_processor import WorkflowProcessor from example_data import ExampleDataLoader -from tests.base_test import BaseTest class TestFilesApi(BaseTest): diff --git a/tests/test_study_service.py b/tests/test_study_service.py index d7e522da..aa77cc24 100644 --- a/tests/test_study_service.py +++ b/tests/test_study_service.py @@ -2,6 +2,8 @@ import json from datetime import datetime from unittest.mock import patch +from tests.base_test import BaseTest + from crc import db, app from crc.models.protocol_builder import ProtocolBuilderStatus from crc.models.study import StudyModel @@ -12,7 +14,6 @@ from crc.services.file_service import FileService from crc.services.study_service import StudyService from crc.services.workflow_processor import WorkflowProcessor from example_data import ExampleDataLoader -from tests.base_test import BaseTest class TestStudyService(BaseTest): @@ -143,11 +144,9 @@ class TestStudyService(BaseTest): # Add a document to the study with the correct code. workflow = self.create_workflow('docx') irb_code = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - FileService.add_task_file(study_id=workflow.study_id, workflow_id=workflow.id, - workflow_spec_id=workflow.workflow_spec_id, - task_id="fakingthisout", - name="anything.png", content_type="text", - binary_data=b'1234', irb_doc_code=irb_code) + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code=irb_code) docs = StudyService().get_documents_status(workflow.study_id) self.assertIsNotNone(docs) @@ -156,13 +155,31 @@ class TestStudyService(BaseTest): self.assertIsNotNone(docs["UVACompl_PRCAppr"]['files'][0]) self.assertIsNotNone(docs["UVACompl_PRCAppr"]['files'][0]['file_id']) self.assertEquals(workflow.id, docs["UVACompl_PRCAppr"]['files'][0]['workflow_id']) - self.assertEquals(workflow.workflow_spec_id, docs["UVACompl_PRCAppr"]['files'][0]['workflow_spec_id']) - # 'file_id': 123, - # 'task_id': 'abcdef14236890', - # 'workflow_id': 456, - # 'workflow_spec_id': 'irb_api_details', - # 'status': 'complete', + def test_get_all_studies(self): + user = self.create_user_with_study_and_workflow() + + # Add a document to the study with the correct code. + workflow1 = self.create_workflow('docx') + workflow2 = self.create_workflow('empty_workflow') + + # Add files to both workflows. + FileService.add_workflow_file(workflow_id=workflow1.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code="UVACompl_PRCAppr" ) + FileService.add_workflow_file(workflow_id=workflow1.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code="AD_Consent_Model") + FileService.add_workflow_file(workflow_id=workflow2.id, + name="anything.png", content_type="text", + binary_data=b'1234', irb_doc_code="UVACompl_PRCAppr" ) + + studies = StudyService().get_all_studies_with_files() + self.assertEquals(1, len(studies)) + self.assertEquals(3, len(studies[0].files)) + + + @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_docs def test_get_personnel(self, mock_docs): diff --git a/tests/test_workflow_processor.py b/tests/test_workflow_processor.py index e19bbc87..fe182e28 100644 --- a/tests/test_workflow_processor.py +++ b/tests/test_workflow_processor.py @@ -223,10 +223,10 @@ class TestWorkflowProcessor(BaseTest): self._populate_form_with_random_data(task) processor.complete_task(task) - files = session.query(FileModel).filter_by(study_id=study.id, workflow_id=processor.get_workflow_id()).all() + files = session.query(FileModel).filter_by(workflow_id=processor.get_workflow_id()).all() self.assertEqual(0, len(files)) processor.do_engine_steps() - files = session.query(FileModel).filter_by(study_id=study.id, workflow_id=processor.get_workflow_id()).all() + files = session.query(FileModel).filter_by(workflow_id=processor.get_workflow_id()).all() self.assertEqual(1, len(files), "The task should create a new file.") file_data = session.query(FileDataModel).filter(FileDataModel.file_model_id == files[0].id).first() self.assertIsNotNone(file_data.data) From 8f41dfa95f286daf14c0f70de3675d4223a9350e Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Thu, 28 May 2020 10:31:20 -0400 Subject: [PATCH 2/7] forgot the migration. --- migrations/versions/23c62c933848_.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 migrations/versions/23c62c933848_.py diff --git a/migrations/versions/23c62c933848_.py b/migrations/versions/23c62c933848_.py new file mode 100644 index 00000000..616808a9 --- /dev/null +++ b/migrations/versions/23c62c933848_.py @@ -0,0 +1,34 @@ +"""empty message + +Revision ID: 23c62c933848 +Revises: 9b43e725f39c +Create Date: 2020-05-28 10:30:49.409760 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '23c62c933848' +down_revision = '9b43e725f39c' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('file_study_id_fkey', 'file', type_='foreignkey') + op.drop_column('file', 'task_id') + op.drop_column('file', 'study_id') + op.drop_column('file', 'form_field_key') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file', sa.Column('form_field_key', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.add_column('file', sa.Column('study_id', sa.INTEGER(), autoincrement=False, nullable=True)) + op.add_column('file', sa.Column('task_id', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.create_foreign_key('file_study_id_fkey', 'file', 'study', ['study_id'], ['id']) + # ### end Alembic commands ### From dba41f4759a5ac7c44185762990f4c87fa43d90e Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Thu, 28 May 2020 20:03:50 -0400 Subject: [PATCH 3/7] Ludicrously stupid launch in a refactor of the way all files work in the system at a time where I crave sleep and peace above all other things. Added a File class, that we wrap around the FileModel so the api endpoints don't change, but File no longer holds refences to versions or dates of the file_data model, we figure this out based on a clean database structure. The ApprovalFile is directly related to the file_data_model - so no chance that a reviewer would review the incorrect version of a file.py Noticed that our FileType enum called "bpmn" "bpmm", hope this doesn't screw someone up. Workflows are directly related to the data_models that create the workflow spec it needs. So the files should always be there. There are no more hashes, and thus no more hash errors where it can't find the files to rebuild the workflow.py Not much to report here, other than I broke every single test in the system at one point. So I'm super concerned about this, and will be testing it a lot before creating the pull request. --- crc/api/file.py | 33 ++-- crc/api/workflow.py | 4 +- crc/models/approval.py | 17 +-- crc/models/file.py | 42 ++++- crc/models/study.py | 2 +- crc/models/workflow.py | 17 ++- crc/scripts/complete_template.py | 14 +- crc/services/approval_service.py | 38 ++--- crc/services/file_service.py | 101 +++++++++--- crc/services/study_service.py | 2 + crc/services/workflow_processor.py | 211 ++++++++++++-------------- crc/services/workflow_service.py | 8 +- tests/test_approvals_service.py | 54 ++----- tests/test_file_service.py | 13 +- tests/test_files_api.py | 22 ++- tests/test_request_approval_script.py | 6 +- tests/test_study_service.py | 1 - tests/test_tasks_api.py | 1 - tests/test_workflow_processor.py | 12 +- 19 files changed, 329 insertions(+), 269 deletions(-) diff --git a/crc/api/file.py b/crc/api/file.py index 733dcd34..07ced388 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -1,30 +1,39 @@ import io +from typing import List import connexion from flask import send_file from crc import session from crc.api.common import ApiError -from crc.models.file import FileModelSchema, FileModel +from crc.models.file import FileSchema, FileModel, File, FileModelSchema from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService +def to_file_api(file_model): + """Converts a FileModel object to something we can return via the aip""" + return File.from_models(file_model, FileService.get_file_data(file_model.id)) + + def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None): if all(v is None for v in [workflow_spec_id, workflow_id, form_field_key]): raise ApiError('missing_parameter', 'Please specify either a workflow_spec_id or a ' 'workflow_id with an optional form_field_key') - results = FileService.get_files(workflow_spec_id=workflow_spec_id, - workflow_id=workflow_id, - irb_doc_code=form_field_key) - return FileModelSchema(many=True).dump(results) + file_models = FileService.get_files(workflow_spec_id=workflow_spec_id, + workflow_id=workflow_id, + irb_doc_code=form_field_key) + + files = (to_file_api(model) for model in file_models) + return FileSchema(many=True).dump(files) def get_reference_files(): results = FileService.get_files(is_reference=True) - return FileModelSchema(many=True).dump(results) + files = (to_file_api(model) for model in results) + return FileSchema(many=True).dump(files) def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None): @@ -43,7 +52,7 @@ def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None): else: raise ApiError("invalid_file", "You must supply either a workflow spec id or a workflow_id and form_field_key.") - return FileModelSchema().dump(file_model) + return FileSchema().dump(to_file_api(file_model)) def get_reference_file(name): @@ -78,7 +87,7 @@ def set_reference_file(name): file_model = file_models[0] FileService.update_file(file_models[0], file.stream.read(), file.content_type) - return FileModelSchema().dump(file_model) + return FileSchema().dump(to_file_api(file_model)) def update_file_data(file_id): @@ -87,7 +96,7 @@ def update_file_data(file_id): if file_model is None: raise ApiError('no_such_file', 'The file id you provided does not exist') file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) - return FileModelSchema().dump(file_model) + return FileSchema().dump(to_file_api(file_model)) def get_file_data(file_id, version=None): @@ -99,7 +108,7 @@ def get_file_data(file_id, version=None): attachment_filename=file_data.file_model.name, mimetype=file_data.file_model.content_type, cache_timeout=-1, # Don't cache these files on the browser. - last_modified=file_data.last_updated + last_modified=file_data.date_created ) @@ -107,7 +116,7 @@ def get_file_info(file_id): file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first() if file_model is None: raise ApiError('no_such_file', 'The file id you provided does not exist', status_code=404) - return FileModelSchema().dump(file_model) + return FileSchema().dump(to_file_api(file_model)) def update_file_info(file_id, body): @@ -122,7 +131,7 @@ def update_file_info(file_id, body): file_model = FileModelSchema().load(body, session=session) session.add(file_model) session.commit() - return FileModelSchema().dump(file_model) + return FileSchema().dump(to_file_api(file_model)) def delete_file(file_id): diff --git a/crc/api/workflow.py b/crc/api/workflow.py index f67cc12f..9154652a 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -118,8 +118,8 @@ def __get_workflow_api_model(processor: WorkflowProcessor, next_task = None): next_task=None, navigation=navigation, workflow_spec_id=processor.workflow_spec_id, - spec_version=processor.get_spec_version(), - is_latest_spec=processor.get_spec_version() == processor.get_latest_version_string(processor.workflow_spec_id), + spec_version=processor.get_version_string(), + is_latest_spec=processor.is_latest_spec, total_tasks=processor.workflow_model.total_tasks, completed_tasks=processor.workflow_model.completed_tasks, last_updated=processor.workflow_model.last_updated diff --git a/crc/models/approval.py b/crc/models/approval.py index 8591f3df..7fad2a43 100644 --- a/crc/models/approval.py +++ b/crc/models/approval.py @@ -4,7 +4,7 @@ from marshmallow import INCLUDE from sqlalchemy import func from crc import db, ma -from crc.models.file import FileModel +from crc.models.file import FileModel, FileDataModel from crc.models.study import StudyModel from crc.models.workflow import WorkflowModel @@ -17,13 +17,11 @@ class ApprovalStatus(enum.Enum): class ApprovalFile(db.Model): - id = db.Column(db.Integer, primary_key=True) - file_id = db.Column(db.Integer, db.ForeignKey(FileModel.id), nullable=False) - approval_id = db.Column(db.Integer, db.ForeignKey("approval.id"), nullable=False) - file_version = db.Column(db.Integer, nullable=False) + file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True) + approval_id = db.Column(db.Integer, db.ForeignKey("approval.id"), primary_key=True) approval = db.relationship("ApprovalModel") - file = db.relationship(FileModel) + file_data = db.relationship(FileDataModel) class ApprovalModel(db.Model): @@ -38,9 +36,9 @@ class ApprovalModel(db.Model): message = db.Column(db.String) date_created = db.Column(db.DateTime(timezone=True), default=func.now()) version = db.Column(db.Integer) # Incremented integer, so 1,2,3 as requests are made. - workflow_hash = db.Column(db.String) # A hash of the workflow at the moment the approval was created. - - approval_files = db.relationship(ApprovalFile, back_populates="approval") + approval_files = db.relationship(ApprovalFile, back_populates="approval", + cascade="all, delete, delete-orphan", + order_by=ApprovalFile.file_data_id) class Approval(object): @@ -57,7 +55,6 @@ class Approval(object): instance.message = model.message instance.date_created = model.date_created instance.version = model.version - instance.workflow_hash = model.workflow_hash instance.title = '' if model.study: instance.title = model.study.title diff --git a/crc/models/file.py b/crc/models/file.py index c351d2a7..e779f52a 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -1,6 +1,7 @@ import enum from typing import cast +from marshmallow import INCLUDE, EXCLUDE from marshmallow_enum import EnumField from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from sqlalchemy import func, Index @@ -10,7 +11,7 @@ from crc import db, ma class FileType(enum.Enum): - bpmn = "bpmm" + bpmn = "bpmn" csv = 'csv' dmn = "dmn" doc = "doc" @@ -55,15 +56,16 @@ CONTENT_TYPES = { "zip": "application/zip" } + class FileDataModel(db.Model): __tablename__ = 'file_data' id = db.Column(db.Integer, primary_key=True) md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) data = db.Column(db.LargeBinary) version = db.Column(db.Integer, default=0) - last_updated = db.Column(db.DateTime(timezone=True), default=func.now()) + date_created = db.Column(db.DateTime(timezone=True), default=func.now()) file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) - file_model = db.relationship("FileModel") + file_model = db.relationship("FileModel", foreign_keys=[file_model_id]) class FileModel(db.Model): @@ -79,9 +81,30 @@ class FileModel(db.Model): workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True) workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file. - latest_version = db.Column(db.Integer, default=0) +class File(object): + @classmethod + def from_models(cls, model: FileModel, data_model: FileDataModel): + instance = cls() + instance.id = model.id + instance.name = model.name + instance.is_status = model.is_status + instance.is_reference = model.is_reference + instance.content_type = model.content_type + instance.primary = model.primary + instance.primary_process_id = model.primary_process_id + instance.workflow_spec_id = model.workflow_spec_id + instance.workflow_id = model.workflow_id + instance.irb_doc_code = model.irb_doc_code + instance.type = model.type + if data_model: + instance.last_modified = data_model.date_created + instance.latest_version = data_model.version + else: + instance.last_modified = None + instance.latest_version = None + return instance class FileModelSchema(SQLAlchemyAutoSchema): class Meta: @@ -89,6 +112,17 @@ class FileModelSchema(SQLAlchemyAutoSchema): load_instance = True include_relationships = True include_fk = True # Includes foreign keys + unknown = EXCLUDE + type = EnumField(FileType) + + +class FileSchema(ma.Schema): + class Meta: + model = File + fields = ["id", "name", "is_status", "is_reference", "content_type", + "primary", "primary_process_id", "workflow_spec_id", "workflow_id", + "irb_doc_code", "last_modified", "latest_version", "type"] + unknown = INCLUDE type = EnumField(FileType) diff --git a/crc/models/study.py b/crc/models/study.py index 283196e2..38bd2f3b 100644 --- a/crc/models/study.py +++ b/crc/models/study.py @@ -64,7 +64,7 @@ class WorkflowMetadata(object): name=workflow.workflow_spec.name, display_name=workflow.workflow_spec.display_name, description=workflow.workflow_spec.description, - spec_version=workflow.spec_version, + spec_version=workflow.spec_version(), category_id=workflow.workflow_spec.category_id, state=WorkflowState.optional, status=workflow.status, diff --git a/crc/models/workflow.py b/crc/models/workflow.py index 9029ac6b..718dfccf 100644 --- a/crc/models/workflow.py +++ b/crc/models/workflow.py @@ -5,6 +5,7 @@ from marshmallow import EXCLUDE from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from crc import db +from crc.models.file import FileModel, FileDataModel class WorkflowSpecCategoryModel(db.Model): @@ -67,6 +68,14 @@ class WorkflowStatus(enum.Enum): complete = "complete" +class WorkflowSpecDependencyFile(db.Model): + """Connects a workflow to the version of the specification files it depends on to execute""" + file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True) + workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True) + + file_data = db.relationship(FileDataModel) + + class WorkflowModel(db.Model): __tablename__ = 'workflow' id = db.Column(db.Integer, primary_key=True) @@ -76,7 +85,13 @@ class WorkflowModel(db.Model): study = db.relationship("StudyModel", backref='workflow') workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id')) workflow_spec = db.relationship("WorkflowSpecModel") - spec_version = db.Column(db.String) total_tasks = db.Column(db.Integer, default=0) completed_tasks = db.Column(db.Integer, default=0) last_updated = db.Column(db.DateTime) + # Order By is important or generating hashes on reviews. + dependencies = db.relationship(WorkflowSpecDependencyFile, cascade="all, delete, delete-orphan", + order_by="WorkflowSpecDependencyFile.file_data_id") + + def spec_version(self): + dep_ids = list(dep.file_data_id for dep in self.dependencies) + return "-".join(str(dep_ids)) diff --git a/crc/scripts/complete_template.py b/crc/scripts/complete_template.py index 2acc1c4a..59f63158 100644 --- a/crc/scripts/complete_template.py +++ b/crc/scripts/complete_template.py @@ -59,13 +59,13 @@ Takes two arguments: file_data_model = None if workflow is not None: - # Get the workflow's latest files - joined_file_data_models = WorkflowProcessor\ - .get_file_models_for_version(workflow.workflow_spec_id, workflow.spec_version) - - for joined_file_data in joined_file_data_models: - if joined_file_data.file_model.name == file_name: - file_data_model = session.query(FileDataModel).filter_by(id=joined_file_data.id).first() + # Get the workflow specification file with the given name. + file_data_models = FileService.get_spec_data_files( + workflow_spec_id=workflow.workflow_spec_id, + workflow_id=workflow.id) + for file_data in file_data_models: + if file_data.file_model.name == file_name: + file_data_model = file_data if workflow is None or file_data_model is None: file_data_model = FileService.get_workflow_file_data(task.workflow, file_name) diff --git a/crc/services/approval_service.py b/crc/services/approval_service.py index f247ed09..8a13e6c2 100644 --- a/crc/services/approval_service.py +++ b/crc/services/approval_service.py @@ -3,8 +3,10 @@ from datetime import datetime from sqlalchemy import desc from crc import db, session +from crc.api.common import ApiError from crc.models.approval import ApprovalModel, ApprovalStatus, ApprovalFile +from crc.models.workflow import WorkflowModel from crc.services.file_service import FileService @@ -51,15 +53,22 @@ class ApprovalService(object): # Construct as hash of the latest files to see if things have changed since # the last approval. - latest_files = FileService.get_workflow_files(workflow_id) - current_workflow_hash = ApprovalService._generate_workflow_hash(latest_files) + workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() + workflow_data_files = FileService.get_workflow_data_files(workflow_id) + current_data_file_ids = list(data_file.id for data_file in workflow_data_files) + + if len(current_data_file_ids) == 0: + raise ApiError("invalid_workflow_approval", "You can't create an approval for a workflow that has" + "no files to approve in it.") # If an existing approval request exists and no changes were made, do nothing. # If there is an existing approval request for a previous version of the workflow # then add a new request, and cancel any waiting/pending requests. if latest_approval_request: - # We could just compare the ApprovalFile lists here and do away with this hash. - if latest_approval_request.workflow_hash == current_workflow_hash: + request_file_ids = list(file.file_data_id for file in latest_approval_request.approval_files) + current_data_file_ids.sort() + request_file_ids.sort() + if current_data_file_ids == request_file_ids: return # This approval already exists. else: latest_approval_request.status = ApprovalStatus.CANCELED.value @@ -71,27 +80,18 @@ class ApprovalService(object): model = ApprovalModel(study_id=study_id, workflow_id=workflow_id, approver_uid=approver_uid, status=ApprovalStatus.WAITING.value, message="", date_created=datetime.now(), - version=version, workflow_hash=current_workflow_hash) - approval_files = ApprovalService._create_approval_files(latest_files, model) + version=version) + approval_files = ApprovalService._create_approval_files(workflow_data_files, model) db.session.add(model) db.session.add_all(approval_files) db.session.commit() @staticmethod - def _create_approval_files(files, approval): + def _create_approval_files(workflow_data_files, approval): """Currently based exclusively on the status of files associated with a workflow.""" file_approval_models = [] - for file in files: - file_approval_models.append(ApprovalFile(file_id=file.id, - approval=approval, - file_version=file.latest_version)) + for file_data in workflow_data_files: + file_approval_models.append(ApprovalFile(file_data_id=file_data.id, + approval=approval)) return file_approval_models - @staticmethod - def _generate_workflow_hash(files): - """Currently based exclusively on the status of files associated with a workflow.""" - version_array = [] - for file in files: - version_array.append(str(file.id) + "[" + str(file.latest_version) + "]") - full_version = "-".join(version_array) - return full_version diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 493f33ec..13466a0e 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -5,13 +5,14 @@ from datetime import datetime from uuid import UUID from xml.etree import ElementTree +from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException from pandas import ExcelFile +from sqlalchemy import desc from crc import session from crc.api.common import ApiError from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel -from crc.models.workflow import WorkflowSpecModel, WorkflowModel -from crc.services.workflow_processor import WorkflowProcessor +from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile class FileService(object): @@ -111,12 +112,12 @@ class FileService(object): def update_file(file_model, binary_data, content_type): session.flush() # Assure the database is up-to-date before running this. - file_data_model = session.query(FileDataModel). \ - filter_by(file_model_id=file_model.id, - version=file_model.latest_version - ).with_for_update().first() + latest_data_model = session.query(FileDataModel). \ + filter(FileDataModel.file_model_id == file_model.id).\ + order_by(desc(FileDataModel.date_created)).first() + md5_checksum = UUID(hashlib.md5(binary_data).hexdigest()) - if (file_data_model is not None) and (md5_checksum == file_data_model.md5_hash): + if (latest_data_model is not None) and (md5_checksum == latest_data_model.md5_hash): # This file does not need to be updated, it's the same file. return file_model @@ -130,28 +131,50 @@ class FileService(object): file_model.type = FileType[file_extension] file_model.content_type = content_type - if file_data_model is None: + if latest_data_model is None: version = 1 else: - version = file_data_model.version + 1 + version = latest_data_model.version + 1 # If this is a BPMN, extract the process id. if file_model.type == FileType.bpmn: bpmn: ElementTree.Element = ElementTree.fromstring(binary_data) - file_model.primary_process_id = WorkflowProcessor.get_process_id(bpmn) + file_model.primary_process_id = FileService.get_process_id(bpmn) - file_model.latest_version = version new_file_data_model = FileDataModel( data=binary_data, file_model_id=file_model.id, file_model=file_model, - version=version, md5_hash=md5_checksum, last_updated=datetime.now() + version=version, md5_hash=md5_checksum, date_created=datetime.now() ) - session.add_all([file_model, new_file_data_model]) session.commit() session.flush() # Assure the id is set on the model before returning it. return file_model + @staticmethod + def get_process_id(et_root: ElementTree.Element): + process_elements = [] + for child in et_root: + if child.tag.endswith('process') and child.attrib.get('isExecutable', False): + process_elements.append(child) + + if len(process_elements) == 0: + raise ValidationException('No executable process tag found') + + # There are multiple root elements + if len(process_elements) > 1: + + # Look for the element that has the startEvent in it + for e in process_elements: + this_element: ElementTree.Element = e + for child_element in list(this_element): + if child_element.tag.endswith('startEvent'): + return this_element.attrib['id'] + + raise ValidationException('No start event found in %s' % et_root.attrib['id']) + + return process_elements[0].attrib['id'] + @staticmethod def get_files_for_study(study_id, irb_doc_code=None): query = session.query(FileModel).\ @@ -176,23 +199,51 @@ class FileService(object): if name: query = query.filter_by(name=name) + query = query.order_by(FileModel.id) results = query.all() return results @staticmethod - def get_file_data(file_id, file_model=None, version=None): + def get_spec_data_files(workflow_spec_id, workflow_id=None): + """Returns all the FileDataModels related to a workflow specification. + If a workflow is specified, returns the version of the spec relatted + to that workflow, otherwise, returns the lastes files.""" + if workflow_id: + files = session.query(FileDataModel) \ + .join(WorkflowSpecDependencyFile) \ + .filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \ + .order_by(FileDataModel.id).all() + return files + else: + """Returns all the latest files related to a workflow specification""" + file_models = FileService.get_files(workflow_spec_id=workflow_spec_id) + latest_data_files = [] + for file_model in file_models: + latest_data_files.append(FileService.get_file_data(file_model.id)) + return latest_data_files - """Returns the file_data that is associated with the file model id, if an actual file_model - is provided, uses that rather than looking it up again.""" - if file_model is None: - file_model = session.query(FileModel).filter(FileModel.id == file_id).first() - if version is None: - version = file_model.latest_version - return session.query(FileDataModel) \ - .filter(FileDataModel.file_model_id == file_id) \ - .filter(FileDataModel.version == version) \ - .first() + @staticmethod + def get_workflow_data_files(workflow_id=None): + """Returns all the FileDataModels related to a running workflow - + So these are the latest data files that were uploaded or generated + that go along with this workflow. Not related to the spec in any way""" + file_models = FileService.get_files(workflow_id=workflow_id) + latest_data_files = [] + for file_model in file_models: + latest_data_files.append(FileService.get_file_data(file_model.id)) + return latest_data_files + + @staticmethod + def get_file_data(file_id: int, version: int = None): + """Returns the file data with the given version, or the lastest file, if version isn't provided.""" + query = session.query(FileDataModel) \ + .filter(FileDataModel.file_model_id == file_id) + if version: + query = query.filter(FileDataModel.version == version) + else: + query = query.order_by(desc(FileDataModel.date_created)) + return query.first() @staticmethod def get_reference_file_data(file_name): @@ -201,7 +252,7 @@ class FileService(object): filter(FileModel.name == file_name).first() if not file_model: raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name) - return FileService.get_file_data(file_model.id, file_model) + return FileService.get_file_data(file_model.id) @staticmethod def get_workflow_file_data(workflow, file_name): diff --git a/crc/services/study_service.py b/crc/services/study_service.py index 68f760e1..98a8d15a 100644 --- a/crc/services/study_service.py +++ b/crc/services/study_service.py @@ -78,6 +78,8 @@ class StudyService(object): def delete_workflow(workflow): for file in session.query(FileModel).filter_by(workflow_id=workflow.id).all(): FileService.delete_file(file.id) + for deb in workflow.dependencies: + session.delete(deb) session.query(TaskEventModel).filter_by(workflow_id=workflow.id).delete() session.query(WorkflowModel).filter_by(id=workflow.id).delete() diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py index d4e7c08e..0834cac1 100644 --- a/crc/services/workflow_processor.py +++ b/crc/services/workflow_processor.py @@ -1,8 +1,7 @@ -import random import re -import string import xml.etree.ElementTree as ElementTree from datetime import datetime +from typing import List from SpiffWorkflow import Task as SpiffTask, WorkflowException from SpiffWorkflow.bpmn.BpmnScriptEngine import BpmnScriptEngine @@ -13,14 +12,15 @@ from SpiffWorkflow.bpmn.workflow import BpmnWorkflow from SpiffWorkflow.camunda.parser.CamundaParser import CamundaParser from SpiffWorkflow.dmn.parser.BpmnDmnParser import BpmnDmnParser from SpiffWorkflow.exceptions import WorkflowTaskExecException -from SpiffWorkflow.operators import Operator from SpiffWorkflow.specs import WorkflowSpec +from sqlalchemy import desc from crc import session from crc.api.common import ApiError from crc.models.file import FileDataModel, FileModel, FileType -from crc.models.workflow import WorkflowStatus, WorkflowModel +from crc.models.workflow import WorkflowStatus, WorkflowModel, WorkflowSpecDependencyFile from crc.scripts.script import Script +from crc.services.file_service import FileService class CustomBpmnScriptEngine(BpmnScriptEngine): @@ -48,7 +48,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine): mod = __import__(module_name, fromlist=[class_name]) klass = getattr(mod, class_name) study_id = task.workflow.data[WorkflowProcessor.STUDY_ID_KEY] - if(WorkflowProcessor.WORKFLOW_ID_KEY in task.workflow.data): + if WorkflowProcessor.WORKFLOW_ID_KEY in task.workflow.data: workflow_id = task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] else: workflow_id = None @@ -75,7 +75,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine): Evaluate the given expression, within the context of the given task and return the result. """ - exp,valid = self.validateExpression(expression) + exp, valid = self.validateExpression(expression) return self._eval(exp, **task.data) @staticmethod @@ -108,18 +108,22 @@ class WorkflowProcessor(object): If neither flag is set, it will use the same version of the specification that was used to originally create the workflow model. """ self.workflow_model = workflow_model - orig_version = workflow_model.spec_version - if soft_reset or workflow_model.spec_version is None: - self.workflow_model.spec_version = WorkflowProcessor.get_latest_version_string( - workflow_model.workflow_spec_id) - spec = self.get_spec(workflow_model.workflow_spec_id, workflow_model.spec_version) + if soft_reset or len(workflow_model.dependencies) == 0: + self.spec_data_files = FileService.get_spec_data_files( + workflow_spec_id=workflow_model.workflow_spec_id) + else: + self.spec_data_files = FileService.get_spec_data_files( + workflow_spec_id=workflow_model.workflow_spec_id, + workflow_id=workflow_model.id) + + spec = self.get_spec(self.spec_data_files, workflow_model.workflow_spec_id) self.workflow_spec_id = workflow_model.workflow_spec_id try: self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec) self.bpmn_workflow.script_engine = self._script_engine - if not self.WORKFLOW_ID_KEY in self.bpmn_workflow.data: + if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data: if not workflow_model.id: session.add(workflow_model) session.commit() @@ -132,22 +136,26 @@ class WorkflowProcessor(object): self.save() except KeyError as ke: - if soft_reset: - # Undo the soft-reset. - workflow_model.spec_version = orig_version raise ApiError(code="unexpected_workflow_structure", message="Failed to deserialize workflow" " '%s' version %s, due to a mis-placed or missing task '%s'" % - (self.workflow_spec_id, workflow_model.spec_version, str(ke)) + - " This is very likely due to a soft reset where there was a structural change.") + (self.workflow_spec_id, self.get_version_string(), str(ke)) + + " This is very likely due to a soft reset where there was a structural change.") if hard_reset: # Now that the spec is loaded, get the data and rebuild the bpmn with the new details - workflow_model.spec_version = self.hard_reset() + self.hard_reset() workflow_model.bpmn_workflow_json = WorkflowProcessor._serializer.serialize_workflow(self.bpmn_workflow) self.save() + if soft_reset: + self.save() + + # set whether this is the latest spec file. + if self.spec_data_files == FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id): + self.is_latest_spec = True + else: + self.is_latest_spec = False def __get_bpmn_workflow(self, workflow_model: WorkflowModel, spec: WorkflowSpec): - if workflow_model.bpmn_workflow_json: bpmn_workflow = self._serializer.deserialize_workflow(workflow_model.bpmn_workflow_json, workflow_spec=spec) else: @@ -159,44 +167,32 @@ class WorkflowProcessor(object): def save(self): """Saves the current state of this processor to the database """ - workflow_model = self.workflow_model - workflow_model.bpmn_workflow_json = self.serialize() + self.workflow_model.bpmn_workflow_json = self.serialize() complete_states = [SpiffTask.CANCELLED, SpiffTask.COMPLETED] tasks = list(self.get_all_user_tasks()) - workflow_model.status = self.get_status() - workflow_model.total_tasks = len(tasks) - workflow_model.completed_tasks = sum(1 for t in tasks if t.state in complete_states) - workflow_model.last_updated = datetime.now() - session.add(workflow_model) + self.workflow_model.status = self.get_status() + self.workflow_model.total_tasks = len(tasks) + self.workflow_model.completed_tasks = sum(1 for t in tasks if t.state in complete_states) + self.workflow_model.last_updated = datetime.now() + self.update_dependencies(self.spec_data_files) + session.add(self.workflow_model) session.commit() - @staticmethod - def run_master_spec(spec_model, study): - """Executes a BPMN specification for the given study, without recording any information to the database - Useful for running the master specification, which should not persist. """ - version = WorkflowProcessor.get_latest_version_string(spec_model.id) - spec = WorkflowProcessor.get_spec(spec_model.id, version) - try: - bpmn_workflow = BpmnWorkflow(spec, script_engine=WorkflowProcessor._script_engine) - bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = study.id - bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = False - bpmn_workflow.do_engine_steps() - except WorkflowException as we: - raise ApiError.from_task_spec("error_running_master_spec", str(we), we.sender) - - if not bpmn_workflow.is_completed(): - raise ApiError("master_spec_not_automatic", - "The master spec should only contain fully automated tasks, it failed to complete.") - - return bpmn_workflow.last_task.data + def get_version_string(self): + # this could potentially become expensive to load all the data in the data models. + # in which case we might consider using a deferred loader for the actual data, but + # trying not to pre-optimize. + file_data_models = FileService.get_spec_data_files(self.workflow_model.workflow_spec_id, + self.workflow_model.id) + return WorkflowProcessor.__get_version_string_for_data_models(file_data_models) @staticmethod - def get_parser(): - parser = MyCustomParser() - return parser + def get_latest_version_string_for_spec(spec_id): + file_data_models = FileService.get_spec_data_files(spec_id) + return WorkflowProcessor.__get_version_string_for_data_models(file_data_models) @staticmethod - def get_latest_version_string(workflow_spec_id): + def __get_version_string_for_data_models(file_data_models): """Version is in the format v[VERSION] (FILE_ID_LIST) For example, a single bpmn file with only one version would be v1 (12) Where 12 is the id of the file data model that is used to create the @@ -205,10 +201,6 @@ class WorkflowProcessor(object): a Spec that includes a BPMN, DMN, an a Word file all on the first version would be v1.1.1 (12.45.21)""" - # this could potentially become expensive to load all the data in the data models. - # in which case we might consider using a deferred loader for the actual data, but - # trying not to pre-optimize. - file_data_models = WorkflowProcessor.__get_latest_file_models(workflow_spec_id) major_version = 0 # The version of the primary file. minor_version = [] # The versions of the minor files if any. file_ids = [] @@ -224,60 +216,72 @@ class WorkflowProcessor(object): full_version = "v%s (%s)" % (version, files) return full_version - @staticmethod - def get_file_models_for_version(workflow_spec_id, version): - file_id_strings = re.findall('\((.*)\)', version)[0].split(".") - file_ids = [int(i) for i in file_id_strings] - files = session.query(FileDataModel)\ - .join(FileModel) \ - .filter(FileModel.workflow_spec_id == workflow_spec_id)\ - .filter(FileDataModel.id.in_(file_ids)).all() - if len(files) != len(file_ids): - raise ApiError("invalid_version", - "The version '%s' of workflow specification '%s' is invalid. " % - (version, workflow_spec_id) + - " Unable to locate the correct files to recreate it.") - return files + + + def update_dependencies(self, spec_data_files): + existing_dependencies = FileService.get_spec_data_files( + workflow_spec_id=self.workflow_model.workflow_spec_id, + workflow_id=self.workflow_model.id) + + # Don't save the dependencies if they haven't changed. + if existing_dependencies == spec_data_files: + return + + # Remove all existing dependencies, and replace them. + self.workflow_model.dependencies = [] + for file_data in spec_data_files: + self.workflow_model.dependencies.append(WorkflowSpecDependencyFile(file_data_id=file_data.id)) @staticmethod - def __get_latest_file_models(workflow_spec_id): - """Returns all the latest files related to a workflow specification""" - return session.query(FileDataModel) \ - .join(FileModel) \ - .filter(FileModel.workflow_spec_id == workflow_spec_id)\ - .filter(FileDataModel.version == FileModel.latest_version)\ - .order_by(FileModel.id)\ - .all() + def run_master_spec(spec_model, study): + """Executes a BPMN specification for the given study, without recording any information to the database + Useful for running the master specification, which should not persist. """ + spec_data_files = FileService.get_spec_data_files(spec_model.id) + spec = WorkflowProcessor.get_spec(spec_data_files, spec_model.id) + try: + bpmn_workflow = BpmnWorkflow(spec, script_engine=WorkflowProcessor._script_engine) + bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = study.id + bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = False + bpmn_workflow.do_engine_steps() + except WorkflowException as we: + raise ApiError.from_task_spec("error_running_master_spec", str(we), we.sender) + + if not bpmn_workflow.is_completed(): + raise ApiError("master_spec_not_automatic", + "The master spec should only contain fully automated tasks, it failed to complete.") + + return bpmn_workflow.last_task.data @staticmethod - def get_spec(workflow_spec_id, version=None): - """Returns the requested version of the specification, - or the latest version if none is specified.""" + def get_parser(): + parser = MyCustomParser() + return parser + + @staticmethod + def get_spec(file_data_models: List[FileDataModel], workflow_spec_id): + """Returns a SpiffWorkflow specification for the given workflow spec, + using the files provided. The Workflow_spec_id is only used to generate + better error messages.""" parser = WorkflowProcessor.get_parser() process_id = None - if version is None: - file_data_models = WorkflowProcessor.__get_latest_file_models(workflow_spec_id) - else: - file_data_models = WorkflowProcessor.get_file_models_for_version(workflow_spec_id, version) - for file_data in file_data_models: if file_data.file_model.type == FileType.bpmn: bpmn: ElementTree.Element = ElementTree.fromstring(file_data.data) if file_data.file_model.primary: - process_id = WorkflowProcessor.get_process_id(bpmn) + process_id = FileService.get_process_id(bpmn) parser.add_bpmn_xml(bpmn, filename=file_data.file_model.name) elif file_data.file_model.type == FileType.dmn: dmn: ElementTree.Element = ElementTree.fromstring(file_data.data) parser.add_dmn_xml(dmn, filename=file_data.file_model.name) if process_id is None: - raise(ApiError(code="no_primary_bpmn_error", - message="There is no primary BPMN model defined for workflow %s" % workflow_spec_id)) + raise (ApiError(code="no_primary_bpmn_error", + message="There is no primary BPMN model defined for workflow %s" % workflow_spec_id)) try: spec = parser.get_spec(process_id) except ValidationException as ve: raise ApiError(code="workflow_validation_error", - message="Failed to parse Workflow Specification '%s' %s." % (workflow_spec_id, version) + + message="Failed to parse Workflow Specification '%s'" % workflow_spec_id + "Error is %s" % str(ve), file_name=ve.filename, task_id=ve.id, @@ -301,8 +305,8 @@ class WorkflowProcessor(object): Returns the new version. """ - version = WorkflowProcessor.get_latest_version_string(self.workflow_spec_id) - spec = WorkflowProcessor.get_spec(self.workflow_spec_id) # Force latest version by NOT specifying version + self.spec_data_files = FileService.get_spec_data_files(workflow_spec_id=self.workflow_spec_id) + spec = WorkflowProcessor.get_spec(self.spec_data_files, self.workflow_spec_id) # spec = WorkflowProcessor.get_spec(self.workflow_spec_id, version) bpmn_workflow = BpmnWorkflow(spec, script_engine=self._script_engine) bpmn_workflow.data = self.bpmn_workflow.data @@ -310,14 +314,10 @@ class WorkflowProcessor(object): task.data = self.bpmn_workflow.last_task.data bpmn_workflow.do_engine_steps() self.bpmn_workflow = bpmn_workflow - return version def get_status(self): return self.status_of(self.bpmn_workflow) - def get_spec_version(self): - return self.workflow_model.spec_version - def do_engine_steps(self): try: self.bpmn_workflow.do_engine_steps() @@ -398,32 +398,7 @@ class WorkflowProcessor(object): return [t for t in all_tasks if not self.bpmn_workflow._is_engine_task(t.task_spec) and t.state in [t.COMPLETED, t.CANCELLED]] - @staticmethod - def get_process_id(et_root: ElementTree.Element): - process_elements = [] - for child in et_root: - if child.tag.endswith('process') and child.attrib.get('isExecutable', False): - process_elements.append(child) - - if len(process_elements) == 0: - raise ValidationException('No executable process tag found') - - # There are multiple root elements - if len(process_elements) > 1: - - # Look for the element that has the startEvent in it - for e in process_elements: - this_element: ElementTree.Element = e - for child_element in list(this_element): - if child_element.tag.endswith('startEvent'): - return this_element.attrib['id'] - - raise ValidationException('No start event found in %s' % et_root.attrib['id']) - - return process_elements[0].attrib['id'] - def get_nav_item(self, task): for nav_item in self.bpmn_workflow.get_nav_list(): if nav_item['task_id'] == task.id: return nav_item - diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index 1f6cfe8f..eb488537 100644 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -40,8 +40,10 @@ class WorkflowService(object): def test_spec(cls, spec_id): """Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good sanity check.""" - version = WorkflowProcessor.get_latest_version_string(spec_id) - spec = WorkflowProcessor.get_spec(spec_id, version) + + spec = WorkflowProcessor.get_spec( + file_data_models=FileService.get_spec_data_files(workflow_spec_id=spec_id), + workflow_spec_id=spec_id) bpmn_workflow = BpmnWorkflow(spec, script_engine=CustomBpmnScriptEngine()) bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = 1 bpmn_workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] = spec_id @@ -269,7 +271,7 @@ class WorkflowService(object): user_uid=g.user.uid, workflow_id=workflow_model.id, workflow_spec_id=workflow_model.workflow_spec_id, - spec_version=workflow_model.spec_version, + spec_version=processor.get_version_string(), action=action, task_id=task.id, task_name=task.name, diff --git a/tests/test_approvals_service.py b/tests/test_approvals_service.py index 0ae37941..1ec6db75 100644 --- a/tests/test_approvals_service.py +++ b/tests/test_approvals_service.py @@ -9,7 +9,12 @@ from crc.services.workflow_processor import WorkflowProcessor class TestApprovalsService(BaseTest): def test_create_approval_record(self): + self.create_reference_document() workflow = self.create_workflow("empty_workflow") + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr" ) + ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") self.assertEquals(1, db.session.query(ApprovalModel).count()) model = db.session.query(ApprovalModel).first() @@ -17,10 +22,14 @@ class TestApprovalsService(BaseTest): self.assertEquals(workflow.id, model.workflow_id) self.assertEquals("dhf8r", model.approver_uid) self.assertEquals(1, model.version) - self.assertIsNotNone(model.workflow_hash) def test_new_requests_dont_add_if_approval_exists_for_current_workflow(self): + self.create_reference_document() workflow = self.create_workflow("empty_workflow") + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr" ) + ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") self.assertEquals(1, db.session.query(ApprovalModel).count()) @@ -31,15 +40,15 @@ class TestApprovalsService(BaseTest): self.load_example_data() self.create_reference_document() workflow = self.create_workflow('empty_workflow') - processor = WorkflowProcessor(workflow) - task = processor.next_task() + FileService.add_workflow_file(workflow_id=workflow.id, + name="anything.png", content_type="text", + binary_data=b'5678', irb_doc_code="AD_CoCAppr") ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") - irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs. FileService.add_workflow_file(workflow_id=workflow.id, name="anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code_1) + binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr") ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r") self.assertEquals(2, db.session.query(ApprovalModel).count()) @@ -48,38 +57,3 @@ class TestApprovalsService(BaseTest): self.assertEquals(2, models[1].version) - def test_generate_workflow_hash_and_version(self): - self.load_example_data() - self.create_reference_document() - workflow = self.create_workflow('empty_workflow') - processor = WorkflowProcessor(workflow) - task = processor.next_task() - irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs. - irb_code_2 = "NonUVAIRB_AssuranceForm" # The second file in above. - # Add a task file to the workflow. - FileService.add_workflow_file(workflow_id=workflow.id, - name="anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code_1) - # Add a two form field files with the same irb_code, but different names - FileService.add_workflow_file(workflow_id=workflow.id, - name="anything.png", content_type="text", - binary_data=b'1234', irb_doc_code=irb_code_2) - FileService.add_workflow_file(workflow_id=workflow.id, - name="another_anything.png", content_type="text", - binary_data=b'5678', irb_doc_code=irb_code_2) - - - # Workflow hash should look be id[1]-id[1]-id[1] - # Sould be three files, each with a version of 1. - # where id is the file id, which we don't know, thus the regex. - latest_files = FileService.get_workflow_files(workflow.id) - self.assertRegexpMatches(ApprovalService._generate_workflow_hash(latest_files), "\d+\[1\]-\d+\[1\]-\d+\[1\]") - - # Replace last file - # should now be id[1]-id[1]-id[2] - FileService.add_workflow_file(workflow_id=workflow.id, - irb_doc_code=irb_code_2, - name="another_anything.png", content_type="text", - binary_data=b'9999') - self.assertRegexpMatches(ApprovalService._generate_workflow_hash(latest_files), "\d+\[1\]-\d+\[1\]-\d+\[2\]") - diff --git a/tests/test_file_service.py b/tests/test_file_service.py index 0b5ae5cf..705fef95 100644 --- a/tests/test_file_service.py +++ b/tests/test_file_service.py @@ -23,7 +23,11 @@ class TestFileService(BaseTest): file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(1, len(file_models)) - self.assertEquals(2, file_models[0].latest_version) + + file_data = FileService.get_workflow_data_files(workflow_id=workflow.id) + self.assertEquals(1, len(file_data)) + self.assertEquals(2, file_data[0].version) + def test_add_file_from_form_increments_version_and_replaces_on_subsequent_add_with_same_name(self): self.load_example_data() @@ -44,7 +48,10 @@ class TestFileService(BaseTest): file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(1, len(file_models)) - self.assertEquals(2, file_models[0].latest_version) + + file_data = FileService.get_workflow_data_files(workflow_id=workflow.id) + self.assertEquals(1, len(file_data)) + self.assertEquals(2, file_data[0].version) def test_add_file_from_form_allows_multiple_files_with_different_names(self): self.load_example_data() @@ -64,5 +71,3 @@ class TestFileService(BaseTest): binary_data=b'5678') file_models = FileService.get_workflow_files(workflow_id=workflow.id) self.assertEquals(2, len(file_models)) - self.assertEquals(1, file_models[0].latest_version) - self.assertEquals(1, file_models[1].latest_version) \ No newline at end of file diff --git a/tests/test_files_api.py b/tests/test_files_api.py index 79f8dbf2..ecce309c 100644 --- a/tests/test_files_api.py +++ b/tests/test_files_api.py @@ -4,7 +4,7 @@ import json from tests.base_test import BaseTest from crc import session -from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel +from crc.models.file import FileModel, FileType, FileSchema, FileModelSchema from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService from crc.services.workflow_processor import WorkflowProcessor @@ -165,16 +165,16 @@ class TestFilesApi(BaseTest): content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) self.assertIsNotNone(rv.get_data()) - json_data = json.loads(rv.get_data(as_text=True)) - file = FileModelSchema().load(json_data, session=session) - self.assertEqual(2, file.latest_version) - self.assertEqual(FileType.bpmn, file.type) - self.assertEqual("application/octet-stream", file.content_type) + file_json = json.loads(rv.get_data(as_text=True)) + self.assertEqual(2, file_json['latest_version']) + self.assertEqual(FileType.bpmn.value, file_json['type']) + self.assertEqual("application/octet-stream", file_json['content_type']) self.assertEqual(spec.id, file.workflow_spec_id) # Assure it is updated in the database and properly persisted. file_model = session.query(FileModel).filter(FileModel.id == file.id).first() - self.assertEqual(2, file_model.latest_version) + file_data = FileService.get_file_data(file_model.id) + self.assertEqual(2, file_data.version) rv = self.app.get('/v1.0/file/%i/data' % file.id, headers=self.logged_in_headers()) self.assert_success(rv) @@ -191,15 +191,13 @@ class TestFilesApi(BaseTest): content_type='multipart/form-data', headers=self.logged_in_headers()) self.assertIsNotNone(rv.get_data()) json_data = json.loads(rv.get_data(as_text=True)) - file = FileModelSchema().load(json_data, session=session) - self.assertEqual(1, file.latest_version) + self.assertEqual(1, json_data['latest_version']) data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' - rv = self.app.put('/v1.0/file/%i/data' % file.id, data=data, follow_redirects=True, + rv = self.app.put('/v1.0/file/%i/data' % json_data['id'], data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assertIsNotNone(rv.get_data()) json_data = json.loads(rv.get_data(as_text=True)) - file = FileModelSchema().load(json_data, session=session) - self.assertEqual(1, file.latest_version) + self.assertEqual(1, json_data['latest_version']) def test_get_file(self): self.load_example_data() diff --git a/tests/test_request_approval_script.py b/tests/test_request_approval_script.py index 142da5c5..2f4ab49e 100644 --- a/tests/test_request_approval_script.py +++ b/tests/test_request_approval_script.py @@ -1,3 +1,4 @@ +from crc.services.file_service import FileService from tests.base_test import BaseTest from crc.scripts.request_approval import RequestApproval @@ -17,7 +18,10 @@ class TestRequestApprovalScript(BaseTest): processor = WorkflowProcessor(workflow) task = processor.next_task() task.data = {"study": {"approval1": "dhf8r", 'approval2':'lb3dp'}} - + FileService.add_workflow_file(workflow_id=workflow.id, + irb_doc_code="UVACompl_PRCAppr", + name="anything.png", content_type="text", + binary_data=b'1234') script = RequestApproval() script.do_task(task, workflow.study_id, workflow.id, "study.approval1", "study.approval2") self.assertEquals(2, db.session.query(ApprovalModel).count()) diff --git a/tests/test_study_service.py b/tests/test_study_service.py index aa77cc24..52babbb8 100644 --- a/tests/test_study_service.py +++ b/tests/test_study_service.py @@ -74,7 +74,6 @@ class TestStudyService(BaseTest): # workflow should not be started, and it should have 0 completed tasks, and 0 total tasks. self.assertEqual(WorkflowStatus.not_started, workflow.status) - self.assertEqual(None, workflow.spec_version) self.assertEqual(0, workflow.total_tasks) self.assertEqual(0, workflow.completed_tasks) diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index 176c2278..d95990b5 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -182,7 +182,6 @@ class TestTasksApi(BaseTest): self.assertEquals("Task 2b", nav[5]['title']) self.assertEquals("Task 3", nav[6]['title']) - def test_document_added_to_workflow_shows_up_in_file_list(self): self.load_example_data() self.create_reference_document() diff --git a/tests/test_workflow_processor.py b/tests/test_workflow_processor.py index fe182e28..36d23755 100644 --- a/tests/test_workflow_processor.py +++ b/tests/test_workflow_processor.py @@ -254,12 +254,12 @@ class TestWorkflowProcessor(BaseTest): study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("decision_table") processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_spec_version().startswith('v1.1')) + self.assertTrue(processor.get_version_string().startswith('v1.1')) file_service = FileService() file_service.add_workflow_spec_file(workflow_spec_model, "new_file.txt", "txt", b'blahblah') processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_spec_version().startswith('v1.1.1')) + self.assertTrue(processor.get_version_string().startswith('v1.1.1')) file_path = os.path.join(app.root_path, '..', 'tests', 'data', 'docx', 'docx.bpmn') file = open(file_path, "rb") @@ -268,7 +268,7 @@ class TestWorkflowProcessor(BaseTest): file_model = db.session.query(FileModel).filter(FileModel.name == "decision_table.bpmn").first() file_service.update_file(file_model, data, "txt") processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_spec_version().startswith('v2.1.1')) + self.assertTrue(processor.get_version_string().startswith('v2.1.1')) def test_restart_workflow(self): self.load_example_data() @@ -339,7 +339,7 @@ class TestWorkflowProcessor(BaseTest): # Assure that creating a new processor doesn't cause any issues, and maintains the spec version. processor.workflow_model.bpmn_workflow_json = processor.serialize() processor2 = WorkflowProcessor(processor.workflow_model) - self.assertTrue(processor2.get_spec_version().startswith("v1 ")) # Still at version 1. + self.assertFalse(processor2.is_latest_spec) # Still at version 1. # Do a hard reset, which should bring us back to the beginning, but retain the data. processor3 = WorkflowProcessor(processor.workflow_model, hard_reset=True) @@ -349,10 +349,6 @@ class TestWorkflowProcessor(BaseTest): self.assertEqual("New Step", processor3.next_task().task_spec.description) self.assertEqual("blue", processor3.next_task().data["color"]) - def test_get_latest_spec_version(self): - workflow_spec_model = self.load_test_spec("two_forms") - version = WorkflowProcessor.get_latest_version_string("two_forms") - self.assertTrue(version.startswith("v1 ")) @patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies') @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') From 22bdb6c760733c624e547d464010ea0da1f47d77 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Thu, 28 May 2020 20:13:33 -0400 Subject: [PATCH 4/7] This migration will clear out any approvals in the database. --- migrations/versions/bec71f7dc652_.py | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 migrations/versions/bec71f7dc652_.py diff --git a/migrations/versions/bec71f7dc652_.py b/migrations/versions/bec71f7dc652_.py new file mode 100644 index 00000000..34872c90 --- /dev/null +++ b/migrations/versions/bec71f7dc652_.py @@ -0,0 +1,63 @@ +"""empty message + +Revision ID: bec71f7dc652 +Revises: 23c62c933848 +Create Date: 2020-05-28 20:08:45.891406 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'bec71f7dc652' +down_revision = '23c62c933848' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + op.create_table('workflow_spec_dependency_file', + sa.Column('file_data_id', sa.Integer(), nullable=False), + sa.Column('workflow_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['file_data_id'], ['file_data.id'], ), + sa.ForeignKeyConstraint(['workflow_id'], ['workflow.id'], ), + sa.PrimaryKeyConstraint('file_data_id', 'workflow_id') + ) + op.drop_column('approval', 'workflow_hash') + op.execute( + """ + delete from approval_file; + delete from approval; + """ + ) + op.add_column('approval_file', sa.Column('file_data_id', sa.Integer(), nullable=False)) + op.drop_constraint('approval_file_file_id_fkey', 'approval_file', type_='foreignkey') + op.create_foreign_key(None, 'approval_file', 'file_data', ['file_data_id'], ['id']) + op.drop_column('approval_file', 'id') + op.drop_column('approval_file', 'file_version') + op.drop_column('approval_file', 'file_id') + op.drop_column('file', 'latest_version') + op.add_column('file_data', sa.Column('date_created', sa.DateTime(timezone=True), nullable=True)) + op.drop_column('file_data', 'last_updated') + op.drop_column('workflow', 'spec_version') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('workflow', sa.Column('spec_version', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.add_column('file_data', sa.Column('last_updated', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=True)) + op.drop_column('file_data', 'date_created') + op.add_column('file', sa.Column('latest_version', sa.INTEGER(), autoincrement=False, nullable=True)) + op.add_column('approval_file', sa.Column('file_id', sa.INTEGER(), autoincrement=False, nullable=False)) + op.add_column('approval_file', sa.Column('file_version', sa.INTEGER(), autoincrement=False, nullable=False)) + op.add_column('approval_file', sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False)) + op.drop_constraint(None, 'approval_file', type_='foreignkey') + op.create_foreign_key('approval_file_file_id_fkey', 'approval_file', 'file', ['file_id'], ['id']) + op.drop_column('approval_file', 'file_data_id') + op.add_column('approval', sa.Column('workflow_hash', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.drop_table('workflow_spec_dependency_file') + # ### end Alembic commands ### From 11413838a77000f088b790c946df0d337d7c19d2 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Fri, 29 May 2020 01:39:39 -0400 Subject: [PATCH 5/7] Faster lookup fields. We were parsing the spec each time to get details about how to search. We're just grabbing the workflow id and task id now and building that straight into the full text search index for faster lookups. Should be peppy. Another speed improvement - data in the FileDataModel is deferred, and not queried until it is specifically used, as the new data structures need to use this model frequently. --- crc/api.yml | 9 +- crc/api/workflow.py | 21 +-- crc/models/file.py | 22 ++- crc/services/file_service.py | 19 ++- crc/services/lookup_service.py | 148 +++++++++++------- crc/services/workflow_processor.py | 13 +- crc/services/workflow_service.py | 32 ++-- migrations/versions/5064b72284b7_.py | 36 +++++ .../enum_options_with_search/sponsors.xls | Bin 108544 -> 109056 bytes .../sponsors_modified.xls | Bin 0 -> 108032 bytes tests/test_lookup_service.py | 97 ++++++++---- tests/test_tasks_api.py | 8 +- tests/test_workflow_service.py | 34 +--- 13 files changed, 257 insertions(+), 182 deletions(-) create mode 100644 migrations/versions/5064b72284b7_.py create mode 100644 tests/data/enum_options_with_search/sponsors_modified.xls diff --git a/crc/api.yml b/crc/api.yml index c061307d..edc3861b 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -672,7 +672,7 @@ paths: application/json: schema: $ref: "#/components/schemas/Workflow" - /workflow/{workflow_id}/task/{task_id}/lookup/{field_id}: + /workflow/{workflow_id}/lookup/{field_id}: parameters: - name: workflow_id in: path @@ -681,13 +681,6 @@ paths: schema: type: integer format: int32 - - name: task_id - in: path - required: true - description: The id of the task - schema: - type: string - format: uuid - name: field_id in: path required: true diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 9154652a..efcccc26 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -219,26 +219,13 @@ def delete_workflow_spec_category(cat_id): session.commit() -def lookup(workflow_id, task_id, field_id, query, limit): +def lookup(workflow_id, field_id, query, limit): """ given a field in a task, attempts to find the lookup table or function associated with that field and runs a full-text query against it to locate the values and labels that would be returned to a type-ahead box. + Tries to be fast, but first runs will be very slow. """ - workflow_model = session.query(WorkflowModel).filter_by(id=workflow_id).first() - if not workflow_model: - raise ApiError("unknown_workflow", "No workflow found with id: %i" % workflow_id) - processor = WorkflowProcessor(workflow_model) - task_id = uuid.UUID(task_id) - spiff_task = processor.bpmn_workflow.get_task(task_id) - if not spiff_task: - raise ApiError("unknown_task", "No task with %s found in workflow: %i" % (task_id, workflow_id)) - field = None - for f in spiff_task.task_spec.form.fields: - if f.id == field_id: - field = f - if not field: - raise ApiError("unknown_field", "No field named %s in task %s" % (task_id, spiff_task.task_spec.name)) - - lookup_data = LookupService.lookup(spiff_task, field, query, limit) + workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() + lookup_data = LookupService.lookup(workflow, field_id, query, limit) return LookupDataSchema(many=True).dump(lookup_data) \ No newline at end of file diff --git a/crc/models/file.py b/crc/models/file.py index e779f52a..184979e6 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -6,6 +6,7 @@ from marshmallow_enum import EnumField from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from sqlalchemy import func, Index from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import deferred from crc import db, ma @@ -61,7 +62,7 @@ class FileDataModel(db.Model): __tablename__ = 'file_data' id = db.Column(db.Integer, primary_key=True) md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) - data = db.Column(db.LargeBinary) + data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to. version = db.Column(db.Integer, default=0) date_created = db.Column(db.DateTime(timezone=True), default=func.now()) file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) @@ -127,25 +128,22 @@ class FileSchema(ma.Schema): class LookupFileModel(db.Model): - """Takes the content of a file (like a xlsx, or csv file) and creates a key/value - store that can be used for lookups and searches. This table contains the metadata, - so we know the version of the file that was used, and what key column, and value column - were used to generate this lookup table. ie, the same xls file might have multiple - lookup file models, if different keys and labels are used - or someone decides to - make a change. We need to handle full text search over the label and value columns, - and not every column, because we don't know how much information will be in there. """ + """Gives us a quick way to tell what kind of lookup is set on a form field. + Connected to the file data model, so that if a new version of the same file is + created, we can update the listing.""" + #fixme: What happens if they change the file associated with a lookup field? __tablename__ = 'lookup_file' id = db.Column(db.Integer, primary_key=True) - label_column = db.Column(db.String) - value_column = db.Column(db.String) + workflow_spec_id = db.Column(db.String) + field_id = db.Column(db.String) + is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup. file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) - + dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") class LookupDataModel(db.Model): __tablename__ = 'lookup_data' id = db.Column(db.Integer, primary_key=True) lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id')) - lookup_file_model = db.relationship(LookupFileModel) value = db.Column(db.String) label = db.Column(db.String) # In the future, we might allow adding an additional "search" column if we want to search things not in label. diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 3585b047..beb22831 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -204,22 +204,27 @@ class FileService(object): return results @staticmethod - def get_spec_data_files(workflow_spec_id, workflow_id=None): + def get_spec_data_files(workflow_spec_id, workflow_id=None, name=None): """Returns all the FileDataModels related to a workflow specification. If a workflow is specified, returns the version of the spec relatted to that workflow, otherwise, returns the lastes files.""" if workflow_id: - files = session.query(FileDataModel) \ - .join(WorkflowSpecDependencyFile) \ - .filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \ - .order_by(FileDataModel.id).all() - return files + query = session.query(FileDataModel) \ + .join(WorkflowSpecDependencyFile) \ + .filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \ + .order_by(FileDataModel.id) + if name: + query = query.join(FileModel).filter(FileModel.name == name) + return query.all() else: """Returns all the latest files related to a workflow specification""" file_models = FileService.get_files(workflow_spec_id=workflow_spec_id) latest_data_files = [] for file_model in file_models: - latest_data_files.append(FileService.get_file_data(file_model.id)) + if name and file_model.name == name: + latest_data_files.append(FileService.get_file_data(file_model.id)) + elif not name: + latest_data_files.append(FileService.get_file_data(file_model.id)) return latest_data_files @staticmethod diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index 43e17f3c..95902fe0 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,4 +1,5 @@ import logging +import re from pandas import ExcelFile from sqlalchemy import func, desc @@ -8,8 +9,11 @@ from crc import db from crc.api.common import ApiError from crc.models.api_models import Task from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel +from crc.models.workflow import WorkflowModel, WorkflowSpecDependencyFile from crc.services.file_service import FileService from crc.services.ldap_service import LdapService +from crc.services.workflow_processor import WorkflowProcessor + class TSRank(GenericFunction): package = 'full_text' @@ -31,33 +35,56 @@ class LookupService(object): """ @staticmethod - def lookup(spiff_task, field, query, limit): - """Executes the lookup for the given field.""" - if field.type != Task.FIELD_TYPE_AUTO_COMPLETE: - raise ApiError.from_task("invalid_field_type", - "Field '%s' must be an autocomplete field to use lookups." % field.label, - task=spiff_task) - - # If this field has an associated options file, then do the lookup against that field. - if field.has_property(Task.PROP_OPTIONS_FILE): - lookup_table = LookupService.get_lookup_table(spiff_task, field) - return LookupService._run_lookup_query(lookup_table, query, limit) - # If this is a ldap lookup, use the ldap service to provide the fields to return. - elif field.has_property(Task.PROP_LDAP_LOOKUP): - return LookupService._run_ldap_query(query, limit) - else: - raise ApiError.from_task("unknown_lookup_option", - "Lookup supports using spreadsheet options or ldap options, and neither was" - "provided.") + def get_lookup_model(spiff_task, field): + workflow_id = spiff_task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] + workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() + return LookupService.__get_lookup_model(workflow, field.id) @staticmethod - def get_lookup_table(spiff_task, field): - """ Checks to see if the options are provided in a separate lookup table associated with the + def __get_lookup_model(workflow, field_id): + lookup_model = db.session.query(LookupFileModel) \ + .filter(LookupFileModel.workflow_spec_id == workflow.workflow_spec_id) \ + .filter(LookupFileModel.field_id == field_id).first() + + # one more quick query, to see if the lookup file is still related to this workflow. + # if not, we need to rebuild the lookup table. + is_current = False + if lookup_model: + is_current = db.session.query(WorkflowSpecDependencyFile).\ + filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count() + + if not is_current: + if lookup_model: + db.session.delete(lookup_model) + # Very very very expensive, but we don't know need this till we do. + lookup_model = LookupService.create_lookup_model(workflow, field_id) + + return lookup_model + + @staticmethod + def lookup(workflow, field_id, query, limit): + + lookup_model = LookupService.__get_lookup_model(workflow, field_id) + + if lookup_model.is_ldap: + return LookupService._run_ldap_query(query, limit) + else: + return LookupService._run_lookup_query(lookup_model, query, limit) + + + + @staticmethod + def create_lookup_model(workflow_model, field_id): + """ + This is all really expensive, but should happen just once (per file change). + Checks to see if the options are provided in a separate lookup table associated with the workflow, and if so, assures that data exists in the database, and return a model than can be used to locate that data. - Returns: an array of LookupData, suitable for returning to the api. """ + processor = WorkflowProcessor(workflow_model) # VERY expensive, Ludicrous for lookup / type ahead + spiff_task, field = processor.find_task_and_field_by_field_id(field_id) + if field.has_property(Task.PROP_OPTIONS_FILE): if not field.has_property(Task.PROP_OPTIONS_VALUE_COLUMN) or \ not field.has_property(Task.PROP_OPTIONS_LABEL_COL): @@ -72,52 +99,67 @@ class LookupService(object): file_name = field.get_property(Task.PROP_OPTIONS_FILE) value_column = field.get_property(Task.PROP_OPTIONS_VALUE_COLUMN) label_column = field.get_property(Task.PROP_OPTIONS_LABEL_COL) - data_model = FileService.get_workflow_file_data(spiff_task.workflow, file_name) - lookup_model = LookupService.get_lookup_table_from_data_model(data_model, value_column, label_column) - return lookup_model + latest_files = FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id, + workflow_id=workflow_model.id, + name=file_name) + if len(latest_files) < 1: + raise ApiError("missing_file", "Unable to locate the lookup data file '%s'" % file_name) + else: + data_model = latest_files[0] + + lookup_model = LookupService.build_lookup_table(data_model, value_column, label_column, + workflow_model.workflow_spec_id, field_id) + + elif field.has_property(Task.PROP_LDAP_LOOKUP): + lookup_model = LookupFileModel(workflow_spec_id=workflow_model.workflow_spec_id, + field_id=field_id, + is_ldap=True) + else: + raise ApiError("unknown_lookup_option", + "Lookup supports using spreadsheet options or ldap options, and neither " + "was provided.") + db.session.add(lookup_model) + db.session.commit() + return lookup_model @staticmethod - def get_lookup_table_from_data_model(data_model: FileDataModel, value_column, label_column): + def build_lookup_table(data_model: FileDataModel, value_column, label_column, workflow_spec_id, field_id): """ In some cases the lookup table can be very large. This method will add all values to the database in a way that can be searched and returned via an api call - rather than sending the full set of options along with the form. It will only open the file and process the options if something has changed. """ + xls = ExcelFile(data_model.data) + df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. + if value_column not in df: + raise ApiError("invalid_emum", + "The file %s does not contain a column named % s" % (data_model.file_model.name, + value_column)) + if label_column not in df: + raise ApiError("invalid_emum", + "The file %s does not contain a column named % s" % (data_model.file_model.name, + label_column)) - lookup_model = db.session.query(LookupFileModel) \ - .filter(LookupFileModel.file_data_model_id == data_model.id) \ - .filter(LookupFileModel.value_column == value_column) \ - .filter(LookupFileModel.label_column == label_column).first() - - if not lookup_model: - xls = ExcelFile(data_model.data) - df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. - if value_column not in df: - raise ApiError("invalid_emum", - "The file %s does not contain a column named % s" % (data_model.file_model.name, - value_column)) - if label_column not in df: - raise ApiError("invalid_emum", - "The file %s does not contain a column named % s" % (data_model.file_model.name, - label_column)) - - lookup_model = LookupFileModel(label_column=label_column, value_column=value_column, - file_data_model_id=data_model.id) - - db.session.add(lookup_model) - for index, row in df.iterrows(): - lookup_data = LookupDataModel(lookup_file_model=lookup_model, - value=row[value_column], - label=row[label_column], - data=row.to_json()) - db.session.add(lookup_data) - db.session.commit() + lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id, + field_id=field_id, + file_data_model_id=data_model.id, + is_ldap=False) + db.session.add(lookup_model) + for index, row in df.iterrows(): + lookup_data = LookupDataModel(lookup_file_model=lookup_model, + value=row[value_column], + label=row[label_column], + data=row.to_json()) + db.session.add(lookup_data) + db.session.commit() return lookup_model @staticmethod def _run_lookup_query(lookup_file_model, query, limit): db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) + query = re.sub('[^A-Za-z0-9 ]+', '', query) + print("Query: " + query) query = query.strip() if len(query) > 0: if ' ' in query: diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py index 0834cac1..3cfaa056 100644 --- a/crc/services/workflow_processor.py +++ b/crc/services/workflow_processor.py @@ -100,7 +100,7 @@ class WorkflowProcessor(object): STUDY_ID_KEY = "study_id" VALIDATION_PROCESS_KEY = "validate_only" - def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False): + def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False, validate_only=False): """Create a Workflow Processor based on the serialized information available in the workflow model. If soft_reset is set to true, it will try to use the latest version of the workflow specification. If hard_reset is set to true, it will create a new Workflow, but embed the data from the last @@ -121,6 +121,7 @@ class WorkflowProcessor(object): self.workflow_spec_id = workflow_model.workflow_spec_id try: self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec) + self.bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only self.bpmn_workflow.script_engine = self._script_engine if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data: @@ -402,3 +403,13 @@ class WorkflowProcessor(object): for nav_item in self.bpmn_workflow.get_nav_list(): if nav_item['task_id'] == task.id: return nav_item + + def find_task_and_field_by_field_id(self, field_id): + """Tracks down a form field by name in the workflow spec, + only looks at ready tasks. Returns a tuple of the task, and form""" + for spiff_task in self.bpmn_workflow.get_tasks(SpiffTask.READY): + if hasattr(spiff_task.task_spec, "form"): + for field in spiff_task.task_spec.form.fields: + if field.id == field_id: + return spiff_task, field + raise ApiError("invalid_field", "Unable to find a ready task with field: %s" % field_id) diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index eb488537..452b3327 100644 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -18,6 +18,7 @@ from crc.api.common import ApiError from crc.models.api_models import Task, MultiInstanceType from crc.models.file import LookupDataModel from crc.models.stats import TaskEventModel +from crc.models.workflow import WorkflowModel, WorkflowStatus from crc.services.file_service import FileService from crc.services.lookup_service import LookupService from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine @@ -41,18 +42,20 @@ class WorkflowService(object): """Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good sanity check.""" - spec = WorkflowProcessor.get_spec( - file_data_models=FileService.get_spec_data_files(workflow_spec_id=spec_id), - workflow_spec_id=spec_id) - bpmn_workflow = BpmnWorkflow(spec, script_engine=CustomBpmnScriptEngine()) - bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = 1 - bpmn_workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] = spec_id - bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = True + workflow_model = WorkflowModel(status=WorkflowStatus.not_started, + workflow_spec_id=spec_id, + last_updated=datetime.now(), + study_id=1) + try: + processor = WorkflowProcessor(workflow_model, validate_only=True) + except WorkflowException as we: + raise ApiError.from_task_spec("workflow_execution_exception", str(we), + we.sender) - while not bpmn_workflow.is_completed(): + while not processor.bpmn_workflow.is_completed(): try: - bpmn_workflow.do_engine_steps() - tasks = bpmn_workflow.get_tasks(SpiffTask.READY) + processor.bpmn_workflow.do_engine_steps() + tasks = processor.bpmn_workflow.get_tasks(SpiffTask.READY) for task in tasks: task_api = WorkflowService.spiff_task_to_api_task( task, @@ -60,8 +63,10 @@ class WorkflowService(object): WorkflowService.populate_form_with_random_data(task, task_api) task.complete() except WorkflowException as we: + db.session.delete(workflow_model) raise ApiError.from_task_spec("workflow_execution_exception", str(we), we.sender) + db.session.delete(workflow_model) @staticmethod def populate_form_with_random_data(task, task_api): @@ -84,7 +89,7 @@ class WorkflowService(object): " with no options" % field.id, task) elif field.type == "autocomplete": - lookup_model = LookupService.get_lookup_table(task, field) + lookup_model = LookupService.get_lookup_model(task, field) if field.has_property(Task.PROP_LDAP_LOOKUP): form_data[field.id] = { "label": "dhf8r", @@ -250,12 +255,12 @@ class WorkflowService(object): @staticmethod def process_options(spiff_task, field): - lookup_model = LookupService.get_lookup_table(spiff_task, field) # If this is an auto-complete field, do not populate options, a lookup will happen later. if field.type == Task.FIELD_TYPE_AUTO_COMPLETE: pass - else: + elif field.has_property(Task.PROP_OPTIONS_FILE): + lookup_model = LookupService.get_lookup_model(spiff_task, field) data = db.session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_model).all() if not hasattr(field, 'options'): field.options = [] @@ -286,3 +291,4 @@ class WorkflowService(object): ) db.session.add(task_event) db.session.commit() + diff --git a/migrations/versions/5064b72284b7_.py b/migrations/versions/5064b72284b7_.py new file mode 100644 index 00000000..00309e84 --- /dev/null +++ b/migrations/versions/5064b72284b7_.py @@ -0,0 +1,36 @@ +"""empty message + +Revision ID: 5064b72284b7 +Revises: bec71f7dc652 +Create Date: 2020-05-28 23:54:45.623361 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '5064b72284b7' +down_revision = 'bec71f7dc652' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('lookup_file', sa.Column('field_id', sa.String(), nullable=True)) + op.add_column('lookup_file', sa.Column('is_ldap', sa.Boolean(), nullable=True)) + op.add_column('lookup_file', sa.Column('workflow_spec_id', sa.String(), nullable=True)) + op.drop_column('lookup_file', 'value_column') + op.drop_column('lookup_file', 'label_column') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('lookup_file', sa.Column('label_column', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.add_column('lookup_file', sa.Column('value_column', sa.VARCHAR(), autoincrement=False, nullable=True)) + op.drop_column('lookup_file', 'workflow_spec_id') + op.drop_column('lookup_file', 'is_ldap') + op.drop_column('lookup_file', 'field_id') + # ### end Alembic commands ### diff --git a/tests/data/enum_options_with_search/sponsors.xls b/tests/data/enum_options_with_search/sponsors.xls index 8c977446b446586a4d05a0658d44ea4fe252fe05..d697bb67bb6e647f2ddf4f861ada04a5f3c56753 100644 GIT binary patch delta 1041 zcmZva%}*0i6vgj-Q(No@145^rw!pYhw=H31@GF6hEBr{@xIxpjT~HP+3l=(6*Cd!m z8xmqd!k&OGdGOk_uW38yyx+{6b7yLM!P?j0YBu&6 z)EWZ#w0$iU!NdA&X*u^67xL9&sazQJ9c|Eca#}@4cwcJ|!W8Vrpuk(j5fcRE zd;(EG`m9{hTF}lvTWbRVqvw4Mg6>Icy!qTLpnD0WLK6nW5L_T(K(|K`dOYga_CSMv3H57W`CXQj*-B$z&JFp>!o%l4FXrJ4iPS zoJw~}NpxJHsSY#@97=aeOL9Vy=`NB(=}MWdpqVrB7tA#tt>c4>M8i$~QELoLFt{we z%Tz1_?H}uS8SB|Vtn~(q#xT9)kel>Rs!h*+|JYdGz=?tApLfgGr@mqRPQYou-@vy5 S_Zmx^_(B}KZXa!8(D)A|hN71M delta 372 zcmZp;!q#wsZGr*sMFuDkU|=}6(UzT?F?F&lcO!cdlLk;iV)I$B|NlP-UjW(&qA!8yiHX~{_p&guPo3`2$H>Asd3)VoE?-F%OMaCn z4xf+xSW0({sUOlHq+v*-kj5cRLYjs&3uzwGBILS|mLaV|T8Fd=X&Z8VNV|~s zAss?GhI9()9MUDEYsd{DH->ZzxhbT3NRN=7A-zI+hx7^Q8`3YNf5?E4fgyuJ28Rp@ z85%MyWO&GkkdYxbhl~mt9Wo|lY{MHulfcG*LVxw@(UE0bM)zW7(_|L^Mj&+`9c5B%@;RW$zU{humjIEs@e zi#?O>aCFGWAN!|ciDTQ6?hpM~?4yuwp{0d$L&IJ}`^5df>KeqaL$5^9I_ygnj@$4l zQEXk%^s%k+x;wsqZT#6MRH_$?i;nl`igYefz4&!s6yGlt-%lUPtw=XX8 zXcOc8+`s10s;T}z^JuQB|KHa)g?$a@N!KQgzW3yzt;DbAKRi{s-#uzGSJ(g3|ENFk&-tWEw<2*k`lrUO zM8k|zlf(}*O-&v@%se$|{4mSZ6!F8XQ_J;efl(A`v z!W~%3ec_B2OIa_L|7U5 zR&gEMH;!x5x@FgJXV+n1zqnTY`VQ(dXn4QTaScZd8`*34#IV%n!~2dMJ!aIzFo*iR zdxsIDMvfTOYfQhmcKt^8>ouy+z;bad$Mh{1mfX7E__)Ty2Mp~syl*tJTwL?vqsNrH z5>`KD>{znjZ!!C%%oghtZOb2BSa5uoICU8Oe!a^0m+Mv9upj$|EsxIdnd0|p#xT2! z51xI!RqwWSd&K+HAb)m6qxKwH$h0tKf6`L%Q@x&EV^B+NKqV_Bp@pO5Zx)X9 zzgct+NjPmTSoHLjuwL{ume9h%`!|c8*1|K{U%ub`umZ3+SV34JSYcQZSW#FpSaDbh zSV>qZSoAcSaKDs+m4%gqm4{V;MNhj4ms<%IE>(Z4R|QrTRt;7i_WKq7uWh3y>P1iA z3D-G#8c%4oVbRli!g_UJbz${j^<-vU*ecj+*c#Yc*qyL-u)AP)!`8z#z&66}f!zz+ z1ltU|4|YH70oa4EhhPuG9)Udy+XCAP+XmYXdknS%_BiYb*iP6k*pslQU{AxIf$fGp z3)=(R3)=_V4|@*wJnRM7i?EkqFT)PN4#Hl69fBQ(9f7?HI|@4ndkywF>0udQ8DW`VnPFLASz*~=*=f%S#;gY}0EfDMEVf(?cZ zfenQXgAIp`fQ^LR3>yU-4I2X+3mXR;51Rm+2)hM#E9^GdB-muw6xdYQG}v_54A@NA zEZA(=9N1jgJlK5L0@y;>BG_Ws64+AMGT3t13fS$iJ76net6-~PYhY_(cf!`e?t#Xwj1^=Y!7TNY#(et>^a!;uoqx2!d`;C3_Acj2zv!~2zD5D1okTIDC`*Q zHQ4L0H(+nVj>AsC-h#aidk6L|>^;~?*!!>#U?0Lx!A`?If_)781okQHGuY>_FJNE7 zzJh%X`v&$c>^s=^ursi;upeMQ!p_0Y!+wJO4EqIk0ro5GH`qnkCD>)y6Yil!m7cl!)m~4!mfeUg4Kpy3#$XG3#$jK4{HEx2x|mu3~K^w3Tp;y4r>9s4%QOZ z3f3Cd2G$mKJ**w9J*)$)BdimwGpq}&E9?f?jj(R8n_%5xJzzayy<-vU*ecj+*c#Yc z*qyL-u)AP)!`8z#z&66}f!zz+1ltU|4|YH70oa4EhhPuG9)Udy+XCAP+XmYXdknS% z_BiYb*iP6k*pslQU{AxIf$fGp3)=(R3)=_V4|@*wJnRM7i?EkqFT)PN4#Hl69fBQ( z9f7?HI|@4ndkywF>mJgO6Rsa?UD+ntDD-0_FD+((HD-J6GD+wzFD-A0HD+?3Gs|u?Is}8FHs|mXXRtr`eb}g(9tS+n`tUjy(tRbustTC(!tSPJ+tU0U& z>^fLWSSwg-SQ}Vd*!8e>u=cPHu#T`!u+Fe9u&%HhU^l|L!ES^|82um@ld!XAP>40{CjC~OOCD{LEV zJM1yo4%p+cCty2ayI@bko`O9Mdj_@}_AG1R#U@yX6g1rno06PeK z1$GE_7##RqZ^DklPQc!Ry$yQ@_Acx_*h$#?un%A#!cM_X!#;w2 z4EqH3DeN=Y=ddqeU&6kEeGU5t_ATr?*!QqAu(PlqU_Zjn!Op{eg8dBp1$F`UE9^Je zMc5_SW!M#1^!p|M_Vd56_+N&Y;NJ-*LA|7~WU%D06tI-ARIt>rG_bU=bg=ZW46ux_ zOt8$bEU>JwY_ROG9I%|QT(I1*Jg~g5e6akm0LpSy(w(c~}KlMOY}J>~*l5@o*jU&&*m&3k z*hJVZuv=lb!6v~b!=}Kd!luEd!)Cx{!e+r{!{)%|!sfx|!xq36!WO|6!}A*i*g@DUutTuJup_WnVMk%dV6VYmhrI!N6LuVS0`?Z{ZP+`ocVX|rPQu=Y zeE|Cqb_#YH_7Uu3*e9@0VV}W1hkXJ26807BYuGoiZ(-lTzK5NGorV1X`w?~yb{_T< z>}S|7unVwXVZXsH!Y;us!>+*M|CC>X_j-wv{r#O_VptMbQdlxr_+w&!d+(S6mJ*f< zmKv4@mKK%{mL8S?mJyZ-mKl}>mKBx_mK~M@mJ^l>mK&A_mKT-}mLFCC76&T`D+DVH zD*`JDD+VhLD*-DBD+MbJD+4PFD+enNs{pGAs|2eIs{*SEs|KqMs{yMCy9QPZRvUIL ztPZR$tRAdBtO2YctP!j+tO=|stQo91tOe{kSW8$dSZi1tSX=f%S#;gY}0EfDMEVf(?cZfenQXgAIp`fQ^LR z3>yU-4I2X+3mXR;51Rm+2)hM#E9^GdB-muw6xdYQG}v_54A@NAEZA(=9N1jgJlK5L z0@y;>BG_Ws64+AMGT3t13fS$iJ76net6-~PYhY_(cf!`e?t#Xwj1^= zY!7TNY#(et>^a!;uoqx2!d`;C3_Acj2zv!~2zD5D1okTIDC`*QHQ4L0H(+nVj>AsC z-h#aidk6L|>^;~?*!!>#U?0Lx!A`?If_)781okQHGuY>_FJNE7zJh%X`v&$c>^s=^ zursi;upeMQ!p_0Y!+wJO4EqIk0ro5GH`qnkCD>)y6^mIN05%>3Wx zmkgF1mI9U%mI{^{mIjsmJgO6 zRsa?UD+ntDD-0_FD+((HD-J6GD+wzFD-A0HD+?3Gs|u?Is}8FH zs|mXXRtr`eb}g(9tS+n`tUjy(tRbustTC(!tSPJ+tU0U&>^fLWSSwg-SQ}Vd*!8e> zu=cPHu#T`!u+Fe9u&%HhU^l|L!ES^|82um@ld!XAP>40{CjC~OOCD{LEVJM1yo4%p+cCty2ayI@bk zo`O9Mdj_@}_AG1R#U@yX6g1rno06PeK1$GE_7##RqZ^DklPQc!Ry$yQ@_Acx_*h$#?un%A#!cM_X!#;w24EqH3DeN=Y=ddqeU&6kE zeGU5t_ATr?*!QqAu(PlqU_Zjn!Op{eg8dBp1$F`UE9^JeMc5_SW!M#1EXCj7*Cm1_ zh9!X|g(ZU}hoyj}gr$O|hNXd}g{6a~hh>0egk^$dhGl_eg=K?fhvk6fgyn+ehUJ0f zh2?|ghZTUu!3x3(!3x8Qz>30(!HUC5z)HeO!Air*z{b2H1_TZm^qR-C;doJz>3IyU!(k&}BVjkgM!`nI#=yqH#=*wJCcq}bZh_qjyA3u8HW@YrHWfAvHXSwt zHWM}rHXAkvHWxMzHXpVCwh*=mwivbqwiLDuwj8zsc0242*h<(c*lO4s*jm_~uywGz zV0XjT!#2P+!tR0H3)=+S47(3@KkNb6gRqBS55pdTJqp_b+X~wT+YWmSwgdJ!>8!=8cdhCK`01KSJR2ip&O4)#3k1=x$QmtZf$4!{n=UV$Bg9flo&y$U-D zI|h3V_B!kh*qgB9uoJMiU~j|TfxQcR4|WpvKI{Y7hprG_bU=bg=ZW46ux_Ot8$bEU>JwY_ROG9I%|QT(I1* zJg~g5e6akm0LpSy(w(c~}KlMOY}J>~*l5@o*jU&&*m&3k*hJVZuv=lb!6v~b!=}Kd!luEd z!)Cx{!e+r{!{)%|!sfx|!xq36!WO|6!}A*i*g@DUutTuJup_Wn zVMk%dV6VYmhrI!N6LuVS0`?Z{ZP+`ocVX|rPQu=YeE|Cqb_#YH_7Uu3*e9@0VV}W1 zhkXJ26807BYuGoiZ(-lTzK5NGorV1X`w?~yb{_T<>}S|7unVwXVZXsH!Y;us!>+($ zsqp>}mKc@G>mJ*fmKBx_mK~M@mJ^l> zmK&A_mKT-}mLFCC76&T`D+DVHD*`JDD+VhLD*-DBD+MbJD+4PFD+enNs{pGAs|2eI zs{*SEs|KqMs{yMCy9QPZRvUILtPZR$tRAdBtO2YctP!j+tO=|stQo91tOe{kSW8$d zSZi1tSX=f%S#;gY}0E zfDMEVf(?cZfenQXgAIp`fQ^LR3>yU-4I2X+3mXR;51Rm+2)hM#E9^GdB-muw6xdYQ zG}v_54A@NAEZA(=9N1jgJlK5L0@y;>BG_Ws64+AMGT3t13fS$iJ76net6-~PYhY_( zcf!`e?t#Xwj1^=Y!7TNY#(et>^a!;uoqx2!d`;C3_Acj2zv!~2zD5D z1okTIDC`*QHQ4L0H(+nVj>AsC-h#aidk6L|>^;~?*!!>#U?0Lx!A`?If_)781okQH zGuY>_FJNE7zJh%X`v&$c>^s=^ursi;upeMQ!p_0Y!+wJO4EqIk0ro5GH`qnkCD>)y z6<91a-v7Z8!;-*~!ji#~!&1Of!cxIf!_vUg!qUOg!!p1!!ZN`!!?M7#!m`1#!*alK z!g9fK!}7rL!t%lL!wSISU)zZfz^f8gVl#MfHj0Qf;EOUfi;CSgEfb>fL#Y` z32Oyw4Qm5y3%efH4%Qyl0oD=L3Dz0b1=baI1MEgvH`q?N-3FTkn+%%* zn+lr-n+}@+n+cl*n+=--n+ux8auhTR9dANBz3LD)mEhhdMv9))dzZG~-vZHGMu z+W~tV_5^GvY!~cF*i*2lVb8#J!=8ohf$fFugYAbs2YVj&0_;WDOR$$=2Ve(bufPt$ z4#SSXUWFZn9fQ3FdmZ)$>`mBl*a_HMu(x6Fz}|(u2RjLSANB$4L)a`T~Ju&-g?z`lij2m2m&26h(q1MElGIoNsFPq3e1zrZfQeue!8y9m1k zy9~Pmi>1N)KUiW|5?E4LGFWn03Rp^5Dp+b*8dzFbI#_yG23ST|CRk=z7FbqTHduC8 z4p>fDE?90@9#~#jK3INO0azTYAgmCqFsulyD6AN)IIIM$B&-yyG^`A)EUX-?Jgfq& zBCHauGOP-$Dy$l;I;;k)ChQtmEm&>XwXiy{y0Ch%`mhGDhOkDk#;_)^rm$wP=CBs9 z>tHQmtzfNTZD4I-*TdSu+QT})I>I`^I>Wlay25UN-3aRjy9w4E)&tfP)(h4f)(6%X z)(_SnHUKsdHV8HtHUu^lHVif#HUc&hb~9`gY&2{PY%FXXY&>iNY$EIy*sZYJV3S~z zVN+mJVbfsKVKZPeVY6VfVRK+}Ve??~VGCdjVT)jkVM}03Vas64VJl#_!|s5sgsp=D?bur08yux+sI zu*YCKV2{I|fbE3sf;|a)3idSY8Q5;vv#>p|y|8_-{jld?&%<7Ty$E{=_A=}M>>%tF z*df?q*b&&Pu%obJu-9O(!`^_s2|Eru0ecJfHtZePyRi3QCt>fyK7f4)I|Vxp`v~?i z>=W3hu+Lzh!@huh3Hu86HS8PMx3KSE-^0$p&cc3x{Rle;I}iH__A~4k*ag_Hu-{-8 zVV7W+VOLZ| z%MQx{%L&T`%MHr|%L~f~%MU96i-Q$}6@nFp6@e9n6@wLrm4KCmm4cOqm4TIom4lUs zRe)85Rf1K9Re@Dad+ExRE7QY?Vn)c!kXa$KL*|6c4Vf1*KV(73!jMHFi$j)#EDc!} zvOHu($n7C_gsco%6|y>HO~~4iJ44om+!b~Yu=u${17uqR=NuU z>$80#SYlWbSW;LrSaMhjSV~wbSZY`rSXx**SbA6nSVmYTSY}ujSXNjz zSaw(rSWZ|jSZ-JzSYB8@SbkUmSRAY%tPrd)tO%?qtQf2~tOTqitQ4#?tPHFytQ@R7 ztOBeetP-p;tO~3utQxF3tOl$m>>5}tSZ&y~usX21uzIlium-S(utu=PuqLpkux7C4 zuokfEU@c*-V69B2zv?kGVB2C zAnX;`A=qKq5!kD+qp)MJ*I=*1-hjOcI}SSmdkgk9>>b#>Jp(uf1W3`+t_3QGn{4od+`2}=b_4NC({3rhz}56b|{2+IV^49fz` z3d;t|4$A?{3Cjh`4a)<|3(E(~4=VtRgB64of)$1pffa=ngB6FBfR%)mf|Z7qft7`o zgO!I>fK`N5f>nl9fmMZ7gH?ysfYpRu1FHqA4Z9Xr2UZtW4^|)60M-!J2-X~`24 zu$8b?u+^|Nu(hx|Ve4Rb!S05whi!mugxv$X7q$tu8FnA+e%J%B2VoDv9)>*vdla?> zwiUJwwjK5uYzORd*b}gwuwAexVNb!HhCKt@4SN>02eucs54Ion9PD}63$PbqFTq}h z9e^E#y#hM~I}AGldlhyRb`16!>~+{1us31HVJBd3!QO_w1A7~q)`urFa>!M=ul1N#>C9qfD98Q59a53nC$=V0eyKf!*6{Q|oH z`xW*Z>>}(E>@w^MES3T9|6qw>NnlA~$zaK0DPSpKsbHyLX<%t#>0s$$8DJSTog0Moc!muK+qOfAH;;<61lCV;+(y%hH zvaoWn@~{f9im*zs%CIW1s<3LX>aZHHny_nNwP3Yj*TU++>cZ;5>cbkq8p0aE8pE2v zn!=jFn!{Sau7kCNwSu*VwSl#TT@PyqYY*!H>j>)v>kR7x>k7L8b|b7C>?T-uSPxiF zST9&_SRYtlSU*^Q*Z|l-*dW+o*bvxI*f7{|*a+B2*v+s}u+gwFu(7alu<@`7u!*o+ zV7J0hE0J@g-wG^hs}V^gw2A@hRuP^h0TM_hb@3Dge`(ChAn|Dg)M_Ehpm9! z4!Z-k61EDq8ny>=30ut#8z z!nVM+!nVP-!ybd}fISX-0=5&j3-%=JDcIAnXJETw&%*Y=_QLkT_QRfoJr8>U_9E;h z*vqg3u!FEyV25CbVMk!E!j8g@!Cr&C4toRkChR!u1ne!?+pu?F@50`LorJv)`vCSK z>=f)Y>?7F6uuov0!ajq24*LT3CG0EM*RXG3-@?9weGfYWI}7^(_9N^Z>^$rz*w3(E zU>9J&!hVBYgk6GNhFyWhGUELoEd0u^(Eau2gp)zZfz^f8gVl#M zfHj0Qf;EOUfi;CSgEfb>fL#Y`32Oyw4Qm5y3%efH4%Qyl0oD=L3Dz0b1=baI1MEgv zH`q?N-3FTkn+%%*n+lr-n+}@+n+cl*n+=--n+ux8auhTR9dANBz3 zLD)mEhhdMv9))dzZG~-vZHGMu+W~tV_5^GvY!~cF*i*2lVb8#J!=8ohf$fFugYAbs z2YVj&0_;WDOR$$=2Ve(bufPt$4#SSXUWFZn9fQ3FdmZ)$>`mBl*a_HMu(x6Fz}|(u z2RjLSANB$4L)a`T~Ju&-g?z`lij2m2m&26h(q1MElG zIoNsFPq3e1zrZfQeue!8y9m1ky9~Pmi)H%zd%Z-k#IPi=q_AYLXb%1q*b%J$de{coM%X>DdtsYkn_>6C z?uR`9dl2>z>|xj=ut#BAU|V6^VB2Aj!FIqNhdlw?3EKsG6804AY1lKc-LPk2dtiHE z`(XQF&%vIDy#RX=_7d!6*a6r<*ekF@u*0w;uvcM6VaH&v!Cr^G0ecg69CiZs7VK@< zJFs_Q@4-&O-iLhv`w(^tb{h5(>|@v`uuoy1!9It50s9j673^!+H?VJE-@(3zoq?T& z{Q&zBb`EwP_7m)9*e|dPuwP-n!7jos!7jtDz+##4{tuQImIRg*mJF60mI9U%mI{^{ zmIjsmJgO6Rsa?UD+ntDD-0_F zD+((HD-J6GD+wzFD-A0HD+?3Gs|u?Is}8FHs|mXXRtr`eb}g(9 ztS+n`tUjy(tRbustTC(!tSPJ+tU0U&>^fLWSSwg-SQ}Vd*!8e>u=cPHu#T`!u+Fe9 zu&%HhU^l|L!ES^|82um@ld!XAP>40{CjC~OOCD{LEVJM1yo4%p+cCty2ayI@bko`O9Mdj_@}_AG1< zY%gpdY(MNd*z>R#U@yX6g1rno06PeK1$GE_7##RqZ^DklPQc!R zy$yQ@_Acx_*h$#?un%A#!cM_X!#;w24EqH3DeN=Y=ddqeU&6kEeGU5t_ATr?*!QqA zu(PlqU_Zjn!Op{eg8dBp1$F`UE9^JeMc5_SW!M#1EDPTM!4kugz>>m}!IHyLz*53e z!BWH0z|z9f!P3Jrz%s%z!7{_Lz_P-!!Lq}0z;eQJ!E(d$!1BWK!Scfjz~W#9VTE9Y zVMSm?VZ~s@VI^QCVWnWDVP#-tVdY@uVHIE%VU=K&VO3yNVbx&OVKrbiVb{QF!D_>< zh1G%8h1G-Ahc$pTgf)UShBbjTg*AgUhqZuR2Wtsy1#1m!18WPr9@Y-l9@YWY5!MOT z8P)~X6?OycMp!r4O|b5;9n*o~%n+2NpQFZ1MfyONl}$Q7HdjN*qdwLuqp2)bW}<7X1#q zXk4Q3UyfLkFfLKz+GsPb@lm4UTo>iyB>i{=-N zjuwzCzT$wCIbs8%CH~QchIb`&QR(+K{^yAQ8U9z@15sPL9KO>i(HS9X08!J3%F#C; z4gaV6@4E*Qg>N`XIGCd?%$PE|&ctH-;`^~C(Rr!WpgyBUj2_W{OkCp$efkZJt69Bl zhhDw=4ISDqzL_)_)W3g?->d)U`l0>(zR>^NvF})zb`Yk zJl!vcLb+bZpIyE1VQQVdxkjfyktq6nZbhbp!>XQ$R`dU}`)Au9t!LunM~<&6mnY+j zRbl(fT-eemSh57xEm(BDiWU?lx_17#jsMI;mH+Afo9uyhu@T|FF|oK<=@sE5}ZRcvsVMwiyB$zI)|l?iVcbNj$e-j@>;F%6|L*>e?y3GyEjyae|DWam EFB Date: Fri, 29 May 2020 04:42:48 -0400 Subject: [PATCH 6/7] re-working the way the redirects function, so we pass arguments as a get parameter. Just trying to get rid of the weird lag on production. I noticed the validation sometimes looks ahead for files, so looking at all the tasks now, not just the ready tasks for the lookup field. Ran into an issue with validation where a workflow model was required, so I create one and delete it. Another refactor for another day. --- crc/api/user.py | 8 ++++-- crc/services/workflow_processor.py | 12 ++++----- crc/services/workflow_service.py | 41 ++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/crc/api/user.py b/crc/api/user.py index 01459526..afa2e894 100644 --- a/crc/api/user.py +++ b/crc/api/user.py @@ -1,6 +1,7 @@ import json import connexion +import flask from flask import redirect, g, request from crc import app, db @@ -109,8 +110,11 @@ def _handle_login(user_info: LdapUserInfo, redirect_url=app.config['FRONTEND_AUT # Return the frontend auth callback URL, with auth token appended. auth_token = user.encode_auth_token().decode() if redirect_url is not None: - app.logger.info("SSO_LOGIN: REDIRECTING TO: " + redirect_url) - return redirect('%s/%s' % (redirect_url, auth_token)) + if redirect_url.find("http://") != 0 and redirect_url.find("https://") != 0: + redirect_url = "http://" + redirect_url + url = '%s?token=%s' % (redirect_url, auth_token) + app.logger.info("SSO_LOGIN: REDIRECTING TO: " + url) + return flask.redirect(url, code=302) else: app.logger.info("SSO_LOGIN: NO REDIRECT, JUST RETURNING AUTH TOKEN.") return auth_token diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py index 3cfaa056..d032b94a 100644 --- a/crc/services/workflow_processor.py +++ b/crc/services/workflow_processor.py @@ -120,8 +120,7 @@ class WorkflowProcessor(object): spec = self.get_spec(self.spec_data_files, workflow_model.workflow_spec_id) self.workflow_spec_id = workflow_model.workflow_spec_id try: - self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec) - self.bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only + self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec, validate_only) self.bpmn_workflow.script_engine = self._script_engine if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data: @@ -156,13 +155,13 @@ class WorkflowProcessor(object): else: self.is_latest_spec = False - def __get_bpmn_workflow(self, workflow_model: WorkflowModel, spec: WorkflowSpec): + def __get_bpmn_workflow(self, workflow_model: WorkflowModel, spec: WorkflowSpec, validate_only=False): if workflow_model.bpmn_workflow_json: bpmn_workflow = self._serializer.deserialize_workflow(workflow_model.bpmn_workflow_json, workflow_spec=spec) else: bpmn_workflow = BpmnWorkflow(spec, script_engine=self._script_engine) bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = workflow_model.study_id - bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = False + bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only bpmn_workflow.do_engine_steps() return bpmn_workflow @@ -407,9 +406,10 @@ class WorkflowProcessor(object): def find_task_and_field_by_field_id(self, field_id): """Tracks down a form field by name in the workflow spec, only looks at ready tasks. Returns a tuple of the task, and form""" - for spiff_task in self.bpmn_workflow.get_tasks(SpiffTask.READY): + for spiff_task in self.bpmn_workflow.get_tasks(): if hasattr(spiff_task.task_spec, "form"): for field in spiff_task.task_spec.form.fields: if field.id == field_id: return spiff_task, field - raise ApiError("invalid_field", "Unable to find a ready task with field: %s" % field_id) + raise ApiError("invalid_field", + "Unable to find a task in the workflow with a lookup field called: %s" % field_id) diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index 452b3327..c6cb8638 100644 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -17,10 +17,14 @@ from crc import db, app from crc.api.common import ApiError from crc.models.api_models import Task, MultiInstanceType from crc.models.file import LookupDataModel +from crc.models.protocol_builder import ProtocolBuilderStatus from crc.models.stats import TaskEventModel +from crc.models.study import StudyModel +from crc.models.user import UserModel from crc.models.workflow import WorkflowModel, WorkflowStatus from crc.services.file_service import FileService from crc.services.lookup_service import LookupService +from crc.services.study_service import StudyService from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine @@ -37,18 +41,39 @@ class WorkflowService(object): But for now, this contains tools for converting spiff-workflow models into our own API models with additional information and capabilities.""" - @classmethod - def test_spec(cls, spec_id): - """Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good - sanity check.""" - + @staticmethod + def make_test_workflow(spec_id): + user = db.session.query(UserModel).filter_by(uid="test").first() + if not user: + db.session.add(UserModel(uid="test")) + study = db.session.query(StudyModel).filter_by(user_uid="test").first() + if not study: + db.session.add(StudyModel(user_uid="test", title="test")) + db.session.commit() workflow_model = WorkflowModel(status=WorkflowStatus.not_started, workflow_spec_id=spec_id, last_updated=datetime.now(), - study_id=1) + study=study) + return workflow_model + + @staticmethod + def delete_test_data(): + for study in db.session.query(StudyModel).filter(StudyModel.user_uid=="test"): + StudyService.delete_study(study.id) + db.session.commit() + db.session.query(UserModel).filter_by(uid="test").delete() + + @staticmethod + def test_spec(spec_id): + """Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good + sanity check.""" + + workflow_model = WorkflowService.make_test_workflow(spec_id) + try: processor = WorkflowProcessor(workflow_model, validate_only=True) except WorkflowException as we: + WorkflowService.delete_test_data() raise ApiError.from_task_spec("workflow_execution_exception", str(we), we.sender) @@ -63,10 +88,10 @@ class WorkflowService(object): WorkflowService.populate_form_with_random_data(task, task_api) task.complete() except WorkflowException as we: - db.session.delete(workflow_model) + WorkflowService.delete_test_data() raise ApiError.from_task_spec("workflow_execution_exception", str(we), we.sender) - db.session.delete(workflow_model) + WorkflowService.delete_test_data() @staticmethod def populate_form_with_random_data(task, task_api): From 895d2f88527994fc324bcdf7be723d6d2b074ce8 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Fri, 29 May 2020 05:04:18 -0400 Subject: [PATCH 7/7] Slight change to the way the approval model is connected. --- crc/models/approval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crc/models/approval.py b/crc/models/approval.py index ff46673b..f7aa2e06 100644 --- a/crc/models/approval.py +++ b/crc/models/approval.py @@ -87,9 +87,9 @@ class Approval(object): instance.associated_files = [] for approval_file in model.approval_files: associated_file = {} - associated_file['id'] = approval_file.file.id - associated_file['name'] = approval_file.file.name - associated_file['content_type'] = approval_file.file.content_type + associated_file['id'] = approval_file.file_data.file_model.id + associated_file['name'] = approval_file.file_data.file_model.name + associated_file['content_type'] = approval_file.file_data.file_model.content_type instance.associated_files.append(associated_file) return instance