diff --git a/.gitignore b/.gitignore index 92974e1a..62b22954 100755 --- a/.gitignore +++ b/.gitignore @@ -238,3 +238,6 @@ postgres/var/ .coverage coverage.xml .~lock.* + +# Specification files +SPECS \ No newline at end of file diff --git a/config/default.py b/config/default.py index 4a35f40c..22ab87f1 100644 --- a/config/default.py +++ b/config/default.py @@ -88,3 +88,6 @@ MAIL_USE_SSL = environ.get('MAIL_USE_SSL', default=False) MAIL_USE_TLS = environ.get('MAIL_USE_TLS', default=False) MAIL_USERNAME = environ.get('MAIL_USERNAME', default='') MAIL_PASSWORD = environ.get('MAIL_PASSWORD', default='') + +# Local file path +SYNC_FILE_ROOT = './SPECS' \ No newline at end of file diff --git a/config/testing.py b/config/testing.py index c922fe3d..4f9f0d16 100644 --- a/config/testing.py +++ b/config/testing.py @@ -30,3 +30,5 @@ print('TESTING = ', TESTING) #Use the mock ldap. LDAP_URL = 'mock' + +SYNC_FILE_ROOT = 'tests/test_sync_files' diff --git a/crc/__init__.py b/crc/__init__.py index ebd7e0a5..3a1f6858 100644 --- a/crc/__init__.py +++ b/crc/__init__.py @@ -66,7 +66,7 @@ def process_waiting_tasks(): @app.before_first_request def init_scheduler(): scheduler.add_job(process_waiting_tasks, 'interval', minutes=1) - scheduler.add_job(FileService.cleanup_file_data, 'interval', minutes=1440) # once a day + # scheduler.add_job(FileService.cleanup_file_data, 'interval', minutes=1440) # once a day scheduler.start() @@ -106,6 +106,15 @@ print('TESTING = ', app.config['TESTING']) print('TEST_UID = ', app.config['TEST_UID']) print('ADMIN_UIDS = ', app.config['ADMIN_UIDS']) + +@app.cli.command() +def load_files_from_filesystem(): + """Load file data into the database.""" + from crc.services.temp_migration_service import FromFilesystemService + location = app.config['SYNC_FILE_ROOT'] + FromFilesystemService().update_file_metadata_from_filesystem(location) + + @app.cli.command() def load_example_data(): """Load example data into the database.""" diff --git a/crc/api.yml b/crc/api.yml index e1c7fe33..dca1a498 100755 --- a/crc/api.yml +++ b/crc/api.yml @@ -30,6 +30,7 @@ paths: responses: '304': description: Redirection to the hosted frontend with an auth_token header. + /user: parameters: - name: admin_impersonate_uid @@ -50,6 +51,7 @@ paths: application/json: schema: $ref: "#/components/schemas/User" + /list_users: get: operationId: crc.api.user.get_all_users @@ -160,6 +162,8 @@ paths: type: array items: $ref: "#/components/schemas/Study" + + /workflow_sync/pullall: get: operationId: crc.api.workflow_sync.sync_all_changed_workflows @@ -188,9 +192,6 @@ paths: type: string example : ['top_level_workflow','3b495037-f7d4-4509-bf58-cee41c0c6b0e'] - - - /workflow_sync/diff: get: operationId: crc.api.workflow_sync.get_changed_workflows @@ -240,7 +241,6 @@ paths: schema: $ref: "#/components/schemas/WorkflowSpec" - /workflow_sync/{workflow_spec_id}/files: get: operationId: crc.api.workflow_sync.get_workflow_spec_files @@ -300,7 +300,6 @@ paths: type : string example : ["data_security_plan.dmn",'some_other_file.xml'] - /workflow_sync/{workflow_spec_id}/files/diff: get: operationId: crc.api.workflow_sync.get_changed_files @@ -334,7 +333,6 @@ paths: items: $ref: "#/components/schemas/WorkflowSpecFilesDiff" - /workflow_sync/all: get: operationId: crc.api.workflow_sync.get_all_spec_state @@ -523,7 +521,6 @@ paths: schema: $ref: "#/components/schemas/WorkflowSpec" - /workflow-specification/{spec_id}/library/{library_id}: parameters: - name: spec_id @@ -565,7 +562,6 @@ paths: schema: $ref: "#/components/schemas/WorkflowSpec" - /workflow-specification/{spec_id}: parameters: - name: spec_id @@ -803,14 +799,9 @@ paths: type: array items: $ref: "#/components/schemas/WorkflowSpecCategory" + /file: parameters: - - name: workflow_spec_id - in: query - required: false - description: The unique id of a workflow specification - schema: - type: string - name: workflow_id in: query required: false @@ -1027,12 +1018,13 @@ paths: application/json: schema: $ref: "#/components/schemas/File" + /reference_file: get: - operationId: crc.api.file.get_reference_files + operationId: crc.api.reference_file.get_reference_files summary: Provides a list of existing reference files that are available in the system. tags: - - Files + - Reference Files responses: '200': description: An array of file descriptions (not the file content) @@ -1043,10 +1035,12 @@ paths: items: $ref: "#/components/schemas/File" post: - operationId: crc.api.file.add_reference_file + operationId: crc.api.reference_file.add_reference_file + security: + - auth_admin: [ 'secret' ] summary: Add a new reference file. tags: - - Files + - Reference Files requestBody: content: multipart/form-data: @@ -1072,13 +1066,13 @@ paths: schema: type: string get: - operationId: crc.api.file.get_reference_file - summary: Reference files are called by name rather than by id. + operationId: crc.api.reference_file.get_reference_file_info + summary: Returns the file info for a reference file tags: - - Files + - Reference Files responses: '200': - description: Returns the actual file + description: Returns the info for a reference file content: application/octet-stream: schema: @@ -1086,12 +1080,107 @@ paths: format: binary example: '' put: - operationId: crc.api.file.set_reference_file + operationId: crc.api.reference_file.update_reference_file_info security: - auth_admin: ['secret'] - summary: Update the contents of a named reference file. + summary: Update the file_info of a named reference file. tags: - - Files + - Reference Files + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/File" + responses: + '200': + description: File info updated successfully + content: + application/json: + schema: + $ref: "#/components/schemas/File" + delete: + operationId: crc.api.reference_file.delete_reference_file + summary: Remove an existing reference file. + tags: + - Reference Files + responses: + '204': + description: The reference file was removed. + /reference_file/{name}/data: + parameters: + - name: name + in: path + required: true + description: The special name of the reference file. + schema: + type: string + get: + operationId: crc.api.reference_file.get_reference_file_data + summary: Returns only the reference file content + tags: + - Reference Files + responses: + '200': + description: Returns the actual reference file + content: + application/octet-stream: + schema: + type: string + format: binary + put: + operationId: crc.api.reference_file.update_reference_file_data + security: + - auth_admin: ['secret'] + summary: Update the contents of a reference file + tags: + - Reference Files + requestBody: + content: + multipart/form-data: + schema: + x-body-name: file + type: object + properties: + file: + type: string + format: binary + required: + - file + responses: + '200': + description: Returns the updated file model + content: + application/json: + schema: + $ref: "#/components/schemas/File" + + /spec_file: + parameters: + - name: workflow_spec_id + in: query + required: true + description: The unique id of a workflow specification + schema: + type: string + get: + operationId: crc.api.spec_file.get_spec_files + summary: Provide a list of workflow spec files for the given workflow_spec_id. IMPORTANT, only includes metadata, not the file content. + tags: + - Spec Files + responses: + '200': + description: An array of file descriptions (not the file content) + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/File" + post: + operationId: crc.api.spec_file.add_spec_file + summary: Add a new workflow spec file + tags: + - Spec Files requestBody: content: multipart/form-data: @@ -1103,23 +1192,103 @@ paths: format: binary responses: '200': - description: Returns the actual file + description: Metadata about the uploaded file, but not the file content. + content: + application/json: + schema: + $ref: "#components/schemas/File" + /spec_file/{file_id}: + parameters: + - name: file_id + in: path + required: true + description: The id of the spec file + schema: + type: integer + get: + operationId: crc.api.spec_file.get_spec_file_info + summary: Returns metadata about the file + tags: + - Spec Files + responses: + '200': + description: Returns the file information requested. + content: + application/json: + schema: + $ref: "#components/schemas/File" + put: + operationId: crc.api.spec_file.update_spec_file_info + summary: Update existing spec file with the given parameters. + tags: + - Spec Files + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/File" + responses: + '200': + description: File info updated successfully. + content: + application/json: + schema: + $ref: "#/components/schemas/File" + delete: + operationId: crc.api.spec_file.delete_spec_file + summary: Removes an existing workflow spec file. + tags: + - Spec Files + responses: + '204': + description: The file was removed. + /spec_file/{file_id}/data: + parameters: + - name: file_id + in: path + required: true + description: The id of the requested file + schema: + type: integer + get: + operationId: crc.api.spec_file.get_spec_file_data + summary: Returns only the spec file content + tags: + - Spec Files + responses: + '200': + description: Returns the actual spec file content: application/octet-stream: schema: type: string format: binary - example: '' - + put: + operationId: crc.api.spec_file.update_spec_file_data + summary: Update the contents of a spec file + tags: + - Spec Files + requestBody: + content: + multipart/form-data: + schema: + x-body-name: file + type: object + properties: + file: + type: string + format: binary + required: + - file + responses: + '200': + description: Returns the updated file model + content: + application/json: + schema: + $ref: "#/components/schemas/File" /dmn_from_ss: -# parameters: -# - name: workflow_spec_id -# in: query -# required: true -# description: The unique id of a workflow specification -# schema: -# type: string post: operationId: crc.api.file.dmn_from_ss summary: Create a DMN table from a spreadsheet @@ -1537,6 +1706,7 @@ paths: text/plain: schema: type: string + /datastore: post: operationId: crc.api.data_store.add_datastore @@ -1555,7 +1725,6 @@ paths: application/json: schema: $ref: "#/components/schemas/DataStore" - /datastore/{id}: parameters: - name: id @@ -1609,8 +1778,6 @@ paths: application/json: schema: $ref: "#/components/schemas/DataStore" - - /datastore/study/{study_id}: parameters: - name: study_id @@ -1674,6 +1841,7 @@ paths: application/json: schema: $ref: "#/components/schemas/DataStore" + components: securitySchemes: jwt: diff --git a/crc/api/file.py b/crc/api/file.py index 8e4f4595..12e3cb9c 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -1,6 +1,5 @@ import io from datetime import datetime -from typing import List import connexion from flask import send_file @@ -8,41 +7,41 @@ from flask import send_file from crc import session from crc.api.common import ApiError from crc.api.user import verify_token -from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel, FileType -from crc.models.workflow import WorkflowSpecModel +from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel from crc.services.document_service import DocumentService from crc.services.file_service import FileService +from crc.services.reference_file_service import ReferenceFileService +from crc.services.spec_file_service import SpecFileService + def to_file_api(file_model): """Converts a FileModel object to something we can return via the api""" - return File.from_models(file_model, FileService.get_file_data(file_model.id), + if file_model.workflow_spec_id is not None: + file_data_model = SpecFileService().get_spec_file_data(file_model.id) + elif file_model.is_reference: + file_data_model = ReferenceFileService().get_reference_file_data(file_model.name) + else: + file_data_model = FileService.get_file_data(file_model.id) + return File.from_models(file_model, file_data_model, DocumentService.get_dictionary()) -def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None,study_id=None): - if all(v is None for v in [workflow_spec_id, workflow_id, form_field_key,study_id]): +def get_files(workflow_id=None, form_field_key=None, study_id=None): + if workflow_id is None: raise ApiError('missing_parameter', - 'Please specify either a workflow_spec_id or a ' - 'workflow_id with an optional form_field_key') + 'Please specify a workflow_id with an optional form_field_key') if study_id is not None: file_models = FileService.get_files_for_study(study_id=study_id, irb_doc_code=form_field_key) else: - file_models = FileService.get_files(workflow_spec_id=workflow_spec_id, - workflow_id=workflow_id, - irb_doc_code=form_field_key) + file_models = FileService.get_files(workflow_id=workflow_id, + irb_doc_code=form_field_key) files = (to_file_api(model) for model in file_models) return FileSchema(many=True).dump(files) -def get_reference_files(): - results = FileService.get_files(is_reference=True) - files = (to_file_api(model) for model in results) - return FileSchema(many=True).dump(files) - - -def add_file(workflow_spec_id=None, workflow_id=None, task_spec_name=None, form_field_key=None): +def add_file(workflow_id=None, task_spec_name=None, form_field_key=None): file = connexion.request.files['file'] if workflow_id: if form_field_key is None: @@ -55,65 +54,12 @@ def add_file(workflow_spec_id=None, workflow_id=None, task_spec_name=None, form_ task_spec_name=task_spec_name, name=file.filename, content_type=file.content_type, binary_data=file.stream.read()) - elif workflow_spec_id: - # check if we have a primary already - have_primary = FileModel.query.filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.type==FileType.bpmn, FileModel.primary==True).all() - # set this to primary if we don't already have one - if not have_primary: - primary = True - else: - primary = False - workflow_spec = session.query(WorkflowSpecModel).filter_by(id=workflow_spec_id).first() - file_model = FileService.add_workflow_spec_file(workflow_spec, file.filename, file.content_type, - file.stream.read(), primary=primary) else: raise ApiError("invalid_file", "You must supply either a workflow spec id or a workflow_id and form_field_key.") return FileSchema().dump(to_file_api(file_model)) -def get_reference_file(name): - file_data = FileService.get_reference_file_data(name) - return send_file( - io.BytesIO(file_data.data), - attachment_filename=file_data.file_model.name, - mimetype=file_data.file_model.content_type, - cache_timeout=-1 # Don't cache these files on the browser. - ) - - -def set_reference_file(name): - """Uses the file service to manage reference-files. They will be used in script tasks to compute values.""" - if 'file' not in connexion.request.files: - raise ApiError('invalid_file', - 'Expected a file named "file" in the multipart form request', status_code=400) - - file = connexion.request.files['file'] - - name_extension = FileService.get_extension(name) - file_extension = FileService.get_extension(file.filename) - if name_extension != file_extension: - raise ApiError('invalid_file_type', - "The file you uploaded has an extension '%s', but it should have an extension of '%s' " % - (file_extension, name_extension)) - - file_models = FileService.get_files(name=name, is_reference=True) - if len(file_models) == 0: - file_model = FileService.add_reference_file(name, file.content_type, file.stream.read()) - else: - file_model = file_models[0] - FileService.update_file(file_models[0], file.stream.read(), file.content_type) - - return FileSchema().dump(to_file_api(file_model)) - - -def add_reference_file(): - file = connexion.request.files['file'] - file_model = FileService.add_reference_file(name=file.filename, content_type=file.content_type, - binary_data=file.stream.read()) - return FileSchema().dump(to_file_api(file_model)) - - def update_file_data(file_id): file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first() file = connexion.request.files['file'] @@ -122,36 +68,48 @@ def update_file_data(file_id): file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) return FileSchema().dump(to_file_api(file_model)) + def get_file_data_by_hash(md5_hash): filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first() - return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version) + return get_file_data(filedatamodel.file_model_id, version=filedatamodel.version) + def get_file_data(file_id, version=None): - file_data = FileService.get_file_data(file_id, version) - if file_data is None: - raise ApiError('no_such_file', f'The file id you provided ({file_id}) does not exist') - return send_file( - io.BytesIO(file_data.data), - attachment_filename=file_data.file_model.name, - mimetype=file_data.file_model.content_type, - cache_timeout=-1, # Don't cache these files on the browser. - last_modified=file_data.date_created - ) + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + if file_model is not None: + file_data_model = FileService.get_file_data(file_id, version) + if file_data_model is not None: + return send_file( + io.BytesIO(file_data_model.data), + attachment_filename=file_model.name, + mimetype=file_model.content_type, + cache_timeout=-1 # Don't cache these files on the browser. + ) + else: + raise ApiError('missing_data_model', f'The data model for file ({file_id}) does not exist') + else: + raise ApiError('missing_file_model', f'The file id you provided ({file_id}) does not exist') def get_file_data_link(file_id, auth_token, version=None): if not verify_token(auth_token): raise ApiError('not_authenticated', 'You need to include an authorization token in the URL with this') - file_data = FileService.get_file_data(file_id, version) + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + if file_model.workflow_spec_id is not None: + file_data = SpecFileService().get_spec_file_data(file_id) + elif file_model.is_reference: + file_data = ReferenceFileService().get_reference_file_data(file_id) + else: + file_data = FileService.get_file_data(file_id, version) if file_data is None: raise ApiError('no_such_file', f'The file id you provided ({file_id}) does not exist') return send_file( io.BytesIO(file_data.data), - attachment_filename=file_data.file_model.name, - mimetype=file_data.file_model.content_type, + attachment_filename=file_model.name, + mimetype=file_model.content_type, cache_timeout=-1, # Don't cache these files on the browser. last_modified=file_data.date_created, - as_attachment = True + as_attachment=True ) diff --git a/crc/api/reference_file.py b/crc/api/reference_file.py new file mode 100644 index 00000000..43cf2e96 --- /dev/null +++ b/crc/api/reference_file.py @@ -0,0 +1,94 @@ +from crc import session +from crc.api.common import ApiError +from crc.api.file import to_file_api +from crc.models.file import FileModel, FileSchema, CONTENT_TYPES +from crc.services.file_service import FileService +from crc.services.reference_file_service import ReferenceFileService + +from flask import send_file + +import io +import connexion + + +def get_reference_files(): + """Gets a list of all reference files""" + results = ReferenceFileService.get_reference_files() + files = (to_file_api(model) for model in results) + return FileSchema(many=True).dump(files) + + +def get_reference_file_data(name): + file_extension = FileService.get_extension(name) + content_type = CONTENT_TYPES[file_extension] + file_data = ReferenceFileService().get_reference_file_data(name) + return send_file( + io.BytesIO(file_data.data), + attachment_filename=name, + mimetype=content_type, + cache_timeout=-1 # Don't cache these files on the browser. + ) + + +def get_reference_file_info(name): + """Return metadata for a reference file""" + file_model = session.query(FileModel).\ + filter_by(name=name).with_for_update().\ + filter_by(archived=False).with_for_update().\ + first() + if file_model is None: + # TODO: Should this be 204 or 404? + raise ApiError('no_such_file', f'The reference file name you provided ({name}) does not exist', status_code=404) + return FileSchema().dump(to_file_api(file_model)) + + +def update_reference_file_data(name): + """Uses the file service to manage reference-files. They will be used in script tasks to compute values.""" + if 'file' not in connexion.request.files: + raise ApiError('invalid_file', + 'Expected a file named "file" in the multipart form request', status_code=400) + + file = connexion.request.files['file'] + + name_extension = FileService.get_extension(name) + file_extension = FileService.get_extension(file.filename) + if name_extension != file_extension: + raise ApiError('invalid_file_type', + "The file you uploaded has an extension '%s', but it should have an extension of '%s' " % + (file_extension, name_extension)) + + file_model = session.query(FileModel).filter(FileModel.name==name).first() + if not file_model: + raise ApiError(code='file_does_not_exist', + message=f"The reference file {name} does not exist.") + else: + ReferenceFileService().update_reference_file(file_model, file.stream.read()) + + return FileSchema().dump(to_file_api(file_model)) + + +# TODO: do we need a test for this? +def update_reference_file_info(name, body): + if name is None: + raise ApiError(code='missing_parameter', + message='Please provide a reference file name') + file_model = session.query(FileModel).filter(FileModel.name==name).first() + if file_model is None: + raise ApiError(code='no_such_file', + message=f"No reference file was found with name: {name}") + new_file_model = ReferenceFileService.update_reference_file_info(file_model, body) + return FileSchema().dump(to_file_api(new_file_model)) + + +def add_reference_file(): + file = connexion.request.files['file'] + file_model = ReferenceFileService.add_reference_file(name=file.filename, + content_type=file.content_type, + binary_data=file.stream.read()) + return FileSchema().dump(to_file_api(file_model)) + + +def delete_reference_file(name): + ReferenceFileService().delete_reference_file(name) + + diff --git a/crc/api/spec_file.py b/crc/api/spec_file.py new file mode 100644 index 00000000..673826ee --- /dev/null +++ b/crc/api/spec_file.py @@ -0,0 +1,97 @@ +from crc import session +from crc.api.common import ApiError +from crc.api.file import to_file_api, get_file_info +from crc.models.file import FileModel, FileSchema, FileType +from crc.models.workflow import WorkflowSpecModel +from crc.services.spec_file_service import SpecFileService + +from flask import send_file + +import io +import connexion + + +def get_spec_files(workflow_spec_id, include_libraries=False): + if workflow_spec_id is None: + raise ApiError(code='missing_spec_id', + message='Please specify the workflow_spec_id.') + file_models = SpecFileService.get_spec_files(workflow_spec_id=workflow_spec_id, + include_libraries=include_libraries) + files = [to_file_api(model) for model in file_models] + return FileSchema(many=True).dump(files) + + +def add_spec_file(workflow_spec_id): + if workflow_spec_id: + file = connexion.request.files['file'] + # check if we have a primary already + have_primary = FileModel.query.filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.type==FileType.bpmn, FileModel.primary==True).all() + # set this to primary if we don't already have one + if not have_primary: + primary = True + else: + primary = False + workflow_spec = session.query(WorkflowSpecModel).filter_by(id=workflow_spec_id).first() + file_model = SpecFileService.add_workflow_spec_file(workflow_spec, file.filename, file.content_type, + file.stream.read(), primary=primary) + return FileSchema().dump(to_file_api(file_model)) + else: + raise ApiError(code='missing_workflow_spec_id', + message="You must include a workflow_spec_id") + + +def update_spec_file_data(file_id): + file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first() + if file_model is None: + raise ApiError('no_such_file', f'The file id you provided ({file_id}) does not exist') + if file_model.workflow_spec_id is None: + raise ApiError(code='no_spec_id', + message=f'There is no workflow_spec_id for file {file_id}.') + workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==file_model.workflow_spec_id).first() + if workflow_spec_model is None: + raise ApiError(code='missing_spec', + message=f'The workflow spec for id {file_model.workflow_spec_id} does not exist.') + + file = connexion.request.files['file'] + SpecFileService().update_spec_file_data(workflow_spec_model, file_model.name, file.stream.read()) + return FileSchema().dump(to_file_api(file_model)) + + +def get_spec_file_data(file_id): + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + if file_model is not None: + file_data_model = SpecFileService().get_spec_file_data(file_id) + if file_data_model is not None: + return send_file( + io.BytesIO(file_data_model.data), + attachment_filename=file_model.name, + mimetype=file_model.content_type, + cache_timeout=-1 # Don't cache these files on the browser. + ) + else: + raise ApiError(code='missing_data_model', + message=f'The data model for file {file_id} does not exist.') + else: + raise ApiError(code='missing_file_model', + message=f'The file model for file_id {file_id} does not exist.') + + +def get_spec_file_info(file_id): + return get_file_info(file_id) + + +def update_spec_file_info(file_id, body): + if file_id is None: + raise ApiError('no_such_file', 'Please provide a valid File ID.') + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + if file_model is None: + raise ApiError('unknown_file_model', 'The file_model "' + file_id + '" is not recognized.') + + new_file_model = SpecFileService().update_spec_file_info(file_model, body) + return FileSchema().dump(to_file_api(new_file_model)) + + +def delete_spec_file(file_id): + SpecFileService.delete_spec_file(file_id) + + diff --git a/crc/models/api_models.py b/crc/models/api_models.py index 38e44b21..55eb5289 100644 --- a/crc/models/api_models.py +++ b/crc/models/api_models.py @@ -212,15 +212,13 @@ class DocumentDirectory(object): class WorkflowApi(object): def __init__(self, id, status, next_task, navigation, - spec_version, is_latest_spec, workflow_spec_id, total_tasks, completed_tasks, + workflow_spec_id, total_tasks, completed_tasks, last_updated, is_review, title, study_id): self.id = id self.status = status self.next_task = next_task # The next task that requires user input. self.navigation = navigation self.workflow_spec_id = workflow_spec_id - self.spec_version = spec_version - self.is_latest_spec = is_latest_spec self.total_tasks = total_tasks self.completed_tasks = completed_tasks self.last_updated = last_updated @@ -232,7 +230,7 @@ class WorkflowApiSchema(ma.Schema): class Meta: model = WorkflowApi fields = ["id", "status", "next_task", "navigation", - "workflow_spec_id", "spec_version", "is_latest_spec", "total_tasks", "completed_tasks", + "workflow_spec_id", "total_tasks", "completed_tasks", "last_updated", "is_review", "title", "study_id"] unknown = INCLUDE @@ -243,7 +241,7 @@ class WorkflowApiSchema(ma.Schema): @marshmallow.post_load def make_workflow(self, data, **kwargs): keys = ['id', 'status', 'next_task', 'navigation', - 'workflow_spec_id', 'spec_version', 'is_latest_spec', "total_tasks", "completed_tasks", + 'workflow_spec_id', "total_tasks", "completed_tasks", "last_updated", "is_review", "title", "study_id"] filtered_fields = {key: data[key] for key in keys} filtered_fields['next_task'] = TaskSchema().make_task(data['next_task']) diff --git a/crc/models/file.py b/crc/models/file.py index 0ccdbddd..9a6d29b6 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -1,7 +1,6 @@ import enum import urllib -import connexion import flask from flask import url_for from marshmallow import INCLUDE, EXCLUDE, Schema @@ -12,7 +11,7 @@ from sqlalchemy import func, Index from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import deferred, relationship -from crc import db, ma, app +from crc import db, ma from crc.models.data_store import DataStoreModel @@ -100,7 +99,7 @@ class FileModel(db.Model): class File(object): @classmethod - def from_models(cls, model: FileModel, data_model: FileDataModel, doc_dictionary): + def from_models(cls, model: FileModel, data_model, doc_dictionary): instance = cls() instance.id = model.id instance.name = model.name @@ -175,9 +174,11 @@ class LookupFileModel(db.Model): task_spec_id = db.Column(db.String) field_id = db.Column(db.String) is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup. - file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) + file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) + last_updated = db.Column(db.DateTime(timezone=True)) dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") + file_model = db.relationship("FileModel") class LookupDataModel(db.Model): diff --git a/crc/models/study.py b/crc/models/study.py index 5848dfc6..6b1a90d2 100644 --- a/crc/models/study.py +++ b/crc/models/study.py @@ -140,7 +140,6 @@ class WorkflowMetadata(object): id=workflow.id, display_name=workflow.workflow_spec.display_name, description=workflow.workflow_spec.description, - spec_version=workflow.spec_version(), category_id=workflow.workflow_spec.category_id, category_display_name=workflow.workflow_spec.category.display_name, state=WorkflowState.optional, diff --git a/crc/models/workflow.py b/crc/models/workflow.py index 861ba579..672a517a 100644 --- a/crc/models/workflow.py +++ b/crc/models/workflow.py @@ -89,15 +89,6 @@ class WorkflowStatus(enum.Enum): erroring = "erroring" -class WorkflowSpecDependencyFile(db.Model): - """Connects to a workflow to test the version of the specification files it depends on to execute""" - file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True) - workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True) - - file_data = db.relationship(FileDataModel) - - - class WorkflowLibraryModelSchema(SQLAlchemyAutoSchema): class Meta: model = WorkflowLibraryModel @@ -106,6 +97,7 @@ class WorkflowLibraryModelSchema(SQLAlchemyAutoSchema): library = marshmallow.fields.Nested('WorkflowSpecModelSchema') + class WorkflowModel(db.Model): __tablename__ = 'workflow' id = db.Column(db.Integer, primary_key=True) @@ -119,10 +111,3 @@ class WorkflowModel(db.Model): completed_tasks = db.Column(db.Integer, default=0) last_updated = db.Column(db.DateTime(timezone=True), server_default=func.now()) user_id = db.Column(db.String, default=None) - # Order By is important or generating hashes on reviews. - dependencies = db.relationship(WorkflowSpecDependencyFile, cascade="all, delete, delete-orphan", - order_by="WorkflowSpecDependencyFile.file_data_id") - - def spec_version(self): - dep_ids = list(dep.file_data_id for dep in self.dependencies) - return "-".join(str(dep_ids)) diff --git a/crc/scripts/complete_template.py b/crc/scripts/complete_template.py index f545c7c6..eda5c7b4 100644 --- a/crc/scripts/complete_template.py +++ b/crc/scripts/complete_template.py @@ -10,6 +10,7 @@ from crc.models.workflow import WorkflowModel from crc.scripts.script import Script from crc.services.file_service import FileService from crc.services.jinja_service import JinjaService +from crc.services.spec_file_service import SpecFileService from crc.services.workflow_processor import WorkflowProcessor @@ -56,18 +57,17 @@ Takes two arguments: raise ApiError(code="invalid_argument", message="The given task does not match the given study.") - file_data_model = None + file_data = None if workflow is not None: # Get the workflow specification file with the given name. - file_data_models = FileService.get_spec_data_files( - workflow_spec_id=workflow.workflow_spec_id, - workflow_id=workflow.id, - name=file_name) - if len(file_data_models) > 0: - file_data_model = file_data_models[0] + file_models = SpecFileService().get_spec_files( + workflow_spec_id=workflow.workflow_spec_id, file_name=file_name) + if len(file_models) > 0: + file_model = file_models[0] else: raise ApiError(code="invalid_argument", message="Uable to locate a file with the given name.") + file_data = SpecFileService().get_spec_file_data(file_model.id).data # Get images from file/files fields if len(args) == 3: @@ -76,7 +76,7 @@ Takes two arguments: image_file_data = None try: - return JinjaService().make_template(BytesIO(file_data_model.data), task.data, image_file_data) + return JinjaService().make_template(BytesIO(file_data), task.data, image_file_data) except ApiError as ae: # In some cases we want to provide a very specific error, that does not get obscured when going # through the python expression engine. We can do that by throwing a WorkflowTaskExecException, diff --git a/crc/services/document_service.py b/crc/services/document_service.py index c7437eb9..74ebee2c 100644 --- a/crc/services/document_service.py +++ b/crc/services/document_service.py @@ -1,6 +1,7 @@ +from crc import session from crc.api.common import ApiError from crc.models.api_models import DocumentDirectory -from crc.services.file_service import FileService +from crc.models.file import FileModel from crc.services.lookup_service import LookupService @@ -37,8 +38,11 @@ class DocumentService(object): @staticmethod def get_dictionary(): """Returns a dictionary of document details keyed on the doc_code.""" - file_data = FileService.get_reference_file_data(DocumentService.DOCUMENT_LIST) - lookup_model = LookupService.get_lookup_model_for_file_data(file_data, 'code', 'description') + file_id = session.query(FileModel.id). \ + filter(FileModel.name == DocumentService.DOCUMENT_LIST). \ + filter(FileModel.is_reference == True). \ + scalar() + lookup_model = LookupService.get_lookup_model_for_file_data(file_id, DocumentService.DOCUMENT_LIST, 'code', 'description') doc_dict = {} for lookup_data in lookup_model.dependencies: doc_dict[lookup_data.value] = lookup_data.data diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 6a148b68..e3a4248f 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -1,8 +1,6 @@ import hashlib import io -import json import os -from datetime import datetime import random import string @@ -11,8 +9,6 @@ from github import Github, GithubObject, UnknownObjectException from uuid import UUID from lxml import etree -from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException -from lxml.etree import XMLSyntaxError from sqlalchemy import desc from sqlalchemy.exc import IntegrityError @@ -20,7 +16,7 @@ from crc import session, app from crc.api.common import ApiError from crc.models.data_store import DataStoreModel from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel -from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile, WorkflowLibraryModel +from crc.models.workflow import WorkflowModel from crc.services.cache_service import cache from crc.services.user_service import UserService import re @@ -41,31 +37,6 @@ def camel_to_snake(camel): class FileService(object): - @staticmethod - def add_workflow_spec_file(workflow_spec: WorkflowSpecModel, - name, content_type, binary_data, primary=False, is_status=False): - """Create a new file and associate it with a workflow spec.""" - file_model = session.query(FileModel)\ - .filter(FileModel.workflow_spec_id == workflow_spec.id)\ - .filter(FileModel.name == name).first() - - if file_model: - if not file_model.archived: - # Raise ApiError if the file already exists and is not archived - raise ApiError(code="duplicate_file", - message='If you want to replace the file, use the update mechanism.') - else: - file_model = FileModel( - workflow_spec_id=workflow_spec.id, - name=name, - primary=primary, - is_status=is_status, - ) - - return FileService.update_file(file_model, binary_data, content_type) - - - @staticmethod @cache def is_workflow_review(workflow_spec_id): @@ -113,20 +84,6 @@ class FileService(object): filter(FileModel.archived == False).\ order_by(FileModel.id).all() - @staticmethod - def add_reference_file(name, content_type, binary_data): - """Create a file with the given name, but not associated with a spec or workflow. - Only one file with the given reference name can exist.""" - file_model = session.query(FileModel). \ - filter(FileModel.is_reference == True). \ - filter(FileModel.name == name).first() - if not file_model: - file_model = FileModel( - name=name, - is_reference=True - ) - return FileService.update_file(file_model, binary_data, content_type) - @staticmethod def get_extension(file_name): basename, file_extension = os.path.splitext(file_name) @@ -167,15 +124,6 @@ class FileService(object): else: version = latest_data_model.version + 1 - # If this is a BPMN, extract the process id. - if file_model.type == FileType.bpmn: - try: - bpmn: etree.Element = etree.fromstring(binary_data) - file_model.primary_process_id = FileService.get_process_id(bpmn) - file_model.is_review = FileService.has_swimlane(bpmn) - except XMLSyntaxError as xse: - raise ApiError("invalid_xml", "Failed to parse xml: " + str(xse), file_name=file_model.name) - try: user_uid = UserService.current_user().uid except ApiError as ae: @@ -204,30 +152,6 @@ class FileService(object): retval = True return retval - @staticmethod - def get_process_id(et_root: etree.Element): - process_elements = [] - for child in et_root: - if child.tag.endswith('process') and child.attrib.get('isExecutable', False): - process_elements.append(child) - - if len(process_elements) == 0: - raise ValidationException('No executable process tag found') - - # There are multiple root elements - if len(process_elements) > 1: - - # Look for the element that has the startEvent in it - for e in process_elements: - this_element: etree.Element = e - for child_element in list(this_element): - if child_element.tag.endswith('startEvent'): - return this_element.attrib['id'] - - raise ValidationException('No start event found in %s' % et_root.attrib['id']) - - return process_elements[0].attrib['id'] - @staticmethod def get_files_for_study(study_id, irb_doc_code=None): query = session.query(FileModel).\ @@ -239,59 +163,20 @@ class FileService(object): return query.all() @staticmethod - def get_files(workflow_spec_id=None, workflow_id=None, - name=None, is_reference=False, irb_doc_code=None, include_libraries=False): - query = session.query(FileModel).filter_by(is_reference=is_reference) - if workflow_spec_id: - if include_libraries: - libraries = session.query(WorkflowLibraryModel).filter( - WorkflowLibraryModel.workflow_spec_id==workflow_spec_id).all() - library_workflow_specs = [x.library_spec_id for x in libraries] - library_workflow_specs.append(workflow_spec_id) - query = query.filter(FileModel.workflow_spec_id.in_(library_workflow_specs)) - else: - query = query.filter(FileModel.workflow_spec_id == workflow_spec_id) - - elif workflow_id: - query = query.filter_by(workflow_id=workflow_id) + def get_files(workflow_id=None, name=None, irb_doc_code=None): + if workflow_id is not None: + query = session.query(FileModel).filter_by(workflow_id=workflow_id) if irb_doc_code: query = query.filter_by(irb_doc_code=irb_doc_code) - elif is_reference: - query = query.filter_by(is_reference=True) - if name: - query = query.filter_by(name=name) - - query = query.filter(FileModel.archived == False) - - query = query.order_by(FileModel.id) - - results = query.all() - return results - - @staticmethod - def get_spec_data_files(workflow_spec_id, workflow_id=None, name=None, include_libraries=False): - """Returns all the FileDataModels related to a workflow specification. - If a workflow is specified, returns the version of the spec related - to that workflow, otherwise, returns the lastest files.""" - if workflow_id: - query = session.query(FileDataModel) \ - .join(WorkflowSpecDependencyFile) \ - .filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \ - .order_by(FileDataModel.id) if name: - query = query.join(FileModel).filter(FileModel.name == name) - return query.all() - else: - """Returns all the latest files related to a workflow specification""" - file_models = FileService.get_files(workflow_spec_id=workflow_spec_id,include_libraries=include_libraries) - latest_data_files = [] - for file_model in file_models: - if name and file_model.name == name: - latest_data_files.append(FileService.get_file_data(file_model.id)) - elif not name: - latest_data_files.append(FileService.get_file_data(file_model.id)) - return latest_data_files + query = query.filter_by(name=name) + + query = query.filter(FileModel.archived == False) + query = query.order_by(FileModel.id) + + results = query.all() + return results @staticmethod def get_workflow_data_files(workflow_id=None): @@ -315,60 +200,13 @@ class FileService(object): query = query.order_by(desc(FileDataModel.date_created)) return query.first() - @staticmethod - def get_reference_file_data(file_name): - file_model = session.query(FileModel). \ - filter(FileModel.is_reference == True). \ - filter(FileModel.name == file_name).first() - if not file_model: - raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name) - return FileService.get_file_data(file_model.id) - - @staticmethod - def get_workflow_file_data(workflow, file_name): - """This method should be deleted, find where it is used, and remove this method. - Given a SPIFF Workflow Model, tracks down a file with the given name in the database and returns its data""" - workflow_spec_model = FileService.find_spec_model_in_db(workflow) - - if workflow_spec_model is None: - raise ApiError(code="unknown_workflow", - message="Something is wrong. I can't find the workflow you are using.") - - file_data_model = session.query(FileDataModel) \ - .join(FileModel) \ - .filter(FileModel.name == file_name) \ - .filter(FileModel.workflow_spec_id == workflow_spec_model.id).first() - - if file_data_model is None: - raise ApiError(code="file_missing", - message="Can not find a file called '%s' within workflow specification '%s'" - % (file_name, workflow_spec_model.id)) - - return file_data_model - - @staticmethod - def find_spec_model_in_db(workflow): - """ Search for the workflow """ - # When the workflow spec model is created, we record the primary process id, - # then we can look it up. As there is the potential for sub-workflows, we - # may need to travel up to locate the primary process. - spec = workflow.spec - workflow_model = session.query(WorkflowSpecModel).join(FileModel). \ - filter(FileModel.primary_process_id == spec.name).first() - if workflow_model is None and workflow != workflow.outer_workflow: - return FileService.find_spec_model_in_db(workflow.outer_workflow) - - return workflow_model - @staticmethod def delete_file(file_id): try: - data_models = session.query(FileDataModel).filter_by(file_model_id=file_id).all() - for dm in data_models: - lookup_files = session.query(LookupFileModel).filter_by(file_data_model_id=dm.id).all() - for lf in lookup_files: - session.query(LookupDataModel).filter_by(lookup_file_model_id=lf.id).delete() - session.query(LookupFileModel).filter_by(id=lf.id).delete() + lookup_files = session.query(LookupFileModel).filter_by(file_model_id=file_id).all() + for lf in lookup_files: + session.query(LookupDataModel).filter_by(lookup_file_model_id=lf.id).delete() + session.query(LookupFileModel).filter_by(id=lf.id).delete() session.query(FileDataModel).filter_by(file_model_id=file_id).delete() session.query(DataStoreModel).filter_by(file_id=file_id).delete() session.query(FileModel).filter_by(id=file_id).delete() @@ -547,49 +385,3 @@ class FileService(object): dmn_file = prefix + etree.tostring(root) return dmn_file - - @staticmethod - def cleanup_file_data(copies_to_keep=1): - if isinstance(copies_to_keep, int) and copies_to_keep > 0: - - deleted_models = [] - saved_models = [] - current_models = [] - - session.flush() - - workflow_spec_models = session.query(WorkflowSpecModel).all() - - for wf_spec_model in workflow_spec_models: - file_models = session.query(FileModel)\ - .filter(FileModel.workflow_spec_id == wf_spec_model.id)\ - .all() - - for file_model in file_models: - file_data_models = session.query(FileDataModel)\ - .filter(FileDataModel.file_model_id == file_model.id)\ - .order_by(desc(FileDataModel.date_created))\ - .all() - current_models.append(file_data_models[:copies_to_keep]) - for fd_model in file_data_models[copies_to_keep:]: - dependencies = session.query(WorkflowSpecDependencyFile)\ - .filter(WorkflowSpecDependencyFile.file_data_id == fd_model.id)\ - .all() - if len(dependencies) > 0: - saved_models.append(fd_model) - continue - lookups = session.query(LookupFileModel)\ - .filter(LookupFileModel.file_data_model_id == fd_model.id)\ - .all() - if len(lookups) > 0: - saved_models.append(fd_model) - continue - deleted_models.append(fd_model) - session.delete(fd_model) - - session.commit() - return current_models, saved_models, deleted_models - - else: - raise ApiError(code='bad_keep', - message='You must keep at least 1 version') diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index 7b85b42c..772c160a 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -4,7 +4,6 @@ from collections import OrderedDict from zipfile import BadZipFile import pandas as pd -import numpy from pandas import ExcelFile from pandas._libs.missing import NA from sqlalchemy import desc @@ -13,10 +12,11 @@ from sqlalchemy.sql.functions import GenericFunction from crc import db from crc.api.common import ApiError from crc.models.api_models import Task -from crc.models.file import FileModel, FileDataModel, LookupFileModel, LookupDataModel +from crc.models.file import LookupFileModel, LookupDataModel from crc.models.ldap import LdapSchema -from crc.models.workflow import WorkflowModel, WorkflowSpecDependencyFile -from crc.services.file_service import FileService +from crc.models.workflow import WorkflowModel +from crc.services.spec_file_service import SpecFileService +from crc.services.reference_file_service import ReferenceFileService from crc.services.ldap_service import LdapService from crc.services.workflow_processor import WorkflowProcessor @@ -50,11 +50,12 @@ class LookupService(object): return LookupService.__get_lookup_model(workflow, spiff_task.task_spec.name, field.id) @staticmethod - def get_lookup_model_for_file_data(file_data: FileDataModel, value_column, label_column): - lookup_model = db.session.query(LookupFileModel).filter(LookupFileModel.file_data_model_id == file_data.id).first() + def get_lookup_model_for_file_data(file_id, file_name, value_column, label_column): + file_data = ReferenceFileService().get_reference_file_data(file_name) + lookup_model = db.session.query(LookupFileModel).filter(LookupFileModel.file_model_id == file_id).first() if not lookup_model: logging.warning("!!!! Making a very expensive call to update the lookup model.") - lookup_model = LookupService.build_lookup_table(file_data, value_column, label_column) + lookup_model = LookupService.build_lookup_table(file_id, file_name, file_data.data, value_column, label_column) return lookup_model @staticmethod @@ -65,17 +66,15 @@ class LookupService(object): .filter(LookupFileModel.task_spec_id == task_spec_id) \ .order_by(desc(LookupFileModel.id)).first() - # one more quick query, to see if the lookup file is still related to this workflow. - # if not, we need to rebuild the lookup table. + # The above may return a model, if it does, it might still be out of date. + # We need to check the file date to assure we have the most recent file. is_current = False if lookup_model: if lookup_model.is_ldap: # LDAP is always current is_current = True else: - is_current = db.session.query(WorkflowSpecDependencyFile). \ - filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).\ - filter(WorkflowSpecDependencyFile.workflow_id == workflow.id).count() - + current_date = SpecFileService().last_modified(lookup_model.file_model.id) + is_current = current_date == lookup_model.last_updated if not is_current: # Very very very expensive, but we don't know need this till we do. @@ -131,15 +130,16 @@ class LookupService(object): file_name = field.get_property(Task.FIELD_PROP_SPREADSHEET_NAME) value_column = field.get_property(Task.FIELD_PROP_VALUE_COLUMN) label_column = field.get_property(Task.FIELD_PROP_LABEL_COLUMN) - latest_files = FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id, - workflow_id=workflow_model.id, - name=file_name) + latest_files = SpecFileService().get_spec_files(workflow_spec_id=workflow_model.workflow_spec_id, + file_name=file_name) if len(latest_files) < 1: raise ApiError("invalid_enum", "Unable to locate the lookup data file '%s'" % file_name) else: - data_model = latest_files[0] + file = latest_files[0] - lookup_model = LookupService.build_lookup_table(data_model, value_column, label_column, + file_data = SpecFileService().get_spec_file_data(file.id).data + + lookup_model = LookupService.build_lookup_table(file.id, file_name, file_data, value_column, label_column, workflow_model.workflow_spec_id, task_spec_id, field_id) # Use the results of an LDAP request to populate enum field options @@ -158,19 +158,19 @@ class LookupService(object): return lookup_model @staticmethod - def build_lookup_table(data_model: FileDataModel, value_column, label_column, + def build_lookup_table(file_id, file_name, file_data, value_column, label_column, workflow_spec_id=None, task_spec_id=None, field_id=None): """ In some cases the lookup table can be very large. This method will add all values to the database in a way that can be searched and returned via an api call - rather than sending the full set of options along with the form. It will only open the file and process the options if something has changed. """ try: - xlsx = ExcelFile(data_model.data, engine='openpyxl') + xlsx = ExcelFile(file_data, engine='openpyxl') # Pandas--or at least openpyxl, cannot read old xls files. # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files except BadZipFile: raise ApiError(code='excel_error', - message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})') + message=f"Error opening excel file {file_name}. You may have an older .xls spreadsheet. (file_model_id: {file_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})") df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet. df = df.convert_dtypes() df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns. @@ -179,17 +179,17 @@ class LookupService(object): if value_column not in df: raise ApiError("invalid_enum", - "The file %s does not contain a column named % s" % (data_model.file_model.name, + "The file %s does not contain a column named % s" % (file_name, value_column)) if label_column not in df: raise ApiError("invalid_enum", - "The file %s does not contain a column named % s" % (data_model.file_model.name, + "The file %s does not contain a column named % s" % (file_name, label_column)) lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id, field_id=field_id, task_spec_id=task_spec_id, - file_data_model_id=data_model.id, + file_model_id=file_id, is_ldap=False) db.session.add(lookup_model) diff --git a/crc/services/reference_file_service.py b/crc/services/reference_file_service.py new file mode 100644 index 00000000..096b2b75 --- /dev/null +++ b/crc/services/reference_file_service.py @@ -0,0 +1,138 @@ +import datetime +import hashlib +import os + +from crc import app, session +from crc.api.common import ApiError +from crc.models.file import FileModel, FileModelSchema, FileDataModel +from crc.services.file_service import FileService, FileType +from crc.services.spec_file_service import SpecFileService + +from uuid import UUID +from sqlalchemy.exc import IntegrityError + + +class ReferenceFileService(object): + + @staticmethod + def get_reference_file_path(file_name): + sync_file_root = SpecFileService().get_sync_file_root() + file_path = os.path.join(sync_file_root, 'Reference', file_name) + return file_path + + @staticmethod + def add_reference_file(name, content_type, binary_data): + """Create a file with the given name, but not associated with a spec or workflow. + Only one file with the given reference name can exist.""" + file_model = session.query(FileModel). \ + filter(FileModel.is_reference == True). \ + filter(FileModel.name == name).first() + if not file_model: + file_extension = FileService.get_extension(name) + file_type = FileType[file_extension].value + + file_model = FileModel( + name=name, + is_reference=True, + type=file_type, + content_type=content_type + ) + session.add(file_model) + session.commit() + else: + raise ApiError(code='file_already_exists', + message=f"The reference file {name} already exists.") + return ReferenceFileService().update_reference_file(file_model, binary_data) + + def update_reference_file(self, file_model, binary_data): + self.write_reference_file_to_system(file_model, binary_data) + print('update_reference_file') + return file_model + + # TODO: need a test for this? + def update_reference_file_info(self, old_file_model, body): + file_data = self.get_reference_file_data(old_file_model.name) + + old_file_path = self.get_reference_file_path(old_file_model.name) + self.delete_reference_file_data(old_file_path) + self.delete_reference_file_info(old_file_path) + + new_file_model = FileModelSchema().load(body, session=session) + new_file_path = self.get_reference_file_path(new_file_model.name) + self.write_reference_file_data_to_system(new_file_path, file_data.data) + self.write_reference_file_info_to_system(new_file_path, new_file_model) + return new_file_model + + def get_reference_file_data(self, file_name): + file_model = session.query(FileModel).filter(FileModel.name == file_name).filter( + FileModel.is_reference == True).first() + if file_model is not None: + file_path = self.get_reference_file_path(file_model.name) + if os.path.exists(file_path): + mtime = os.path.getmtime(file_path) + with open(file_path, 'rb') as f_open: + reference_file_data = f_open.read() + size = len(reference_file_data) + md5_checksum = UUID(hashlib.md5(reference_file_data).hexdigest()) + + reference_file_data_model = FileDataModel(data=reference_file_data, + md5_hash=md5_checksum, + size=size, + date_created=datetime.datetime.fromtimestamp(mtime), + file_model_id=file_model.id + ) + return reference_file_data_model + else: + raise ApiError('file_not_found', + f"There was no file in the location: {file_path}") + else: + raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name) + + def write_reference_file_to_system(self, file_model, file_data): + file_path = self.write_reference_file_data_to_system(file_model.name, file_data) + self.write_reference_file_info_to_system(file_path, file_model) + + def write_reference_file_data_to_system(self, file_name, file_data): + file_path = self.get_reference_file_path(file_name) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, 'wb') as f_handle: + f_handle.write(file_data) + # SpecFileService.write_file_data_to_system(file_path, file_data) + return file_path + + + @staticmethod + def write_reference_file_info_to_system(file_path, file_model): + SpecFileService.write_file_info_to_system(file_path, file_model) + + @staticmethod + def get_reference_files(): + reference_files = session.query(FileModel). \ + filter_by(is_reference=True). \ + filter(FileModel.archived == False). \ + all() + return reference_files + + def delete_reference_file_data(self, file_name): + file_path = self.get_reference_file_path(file_name) + json_file_path = f'{file_path}.json' + os.remove(file_path) + os.remove(json_file_path) + + @staticmethod + def delete_reference_file_info(file_name): + file_model = session.query(FileModel).filter(FileModel.name==file_name).first() + try: + session.delete(file_model) + session.commit() + except IntegrityError as ie: + session.rollback() + file_model = session.query(FileModel).filter(FileModel.name==file_name).first() + file_model.archived = True + session.commit() + app.logger.info("Failed to delete file: %s, so archiving it instead. Due to %s" % (file_name, str(ie))) + + def delete_reference_file(self, file_name): + """This should remove the record in the file table, and both files on the filesystem.""" + self.delete_reference_file_data(file_name) + self.delete_reference_file_info(file_name) diff --git a/crc/services/spec_file_service.py b/crc/services/spec_file_service.py new file mode 100644 index 00000000..ebb60131 --- /dev/null +++ b/crc/services/spec_file_service.py @@ -0,0 +1,363 @@ +import hashlib +import json +import datetime +import os + +from crc import app, session +from crc.api.common import ApiError +from crc.models.file import FileModel, FileModelSchema, FileDataModel +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel, WorkflowLibraryModel +from crc.services.file_service import FileService, FileType + +from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException + +from lxml import etree +from sqlalchemy.exc import IntegrityError +from uuid import UUID + + +class SpecFileService(object): + + """We store spec files on the file system. This allows us to take advantage of Git for + syncing and versioning. + + We keep a record in the File table, but do not have a record in the FileData table. + + For syncing purposes, we keep a copy of the File table info in a json file + + This means there are 3 pieces we have to maintain; File table record, file on the file system, + and json file on the file system. + + The files are stored in a directory whose path is determined by the category and spec names. + """ + + # + # Shared Methods + # + @staticmethod + def get_sync_file_root(): + dir_name = app.config['SYNC_FILE_ROOT'] + app_root = app.root_path + return os.path.join(app_root, '..', dir_name) + + @staticmethod + def get_path_from_spec_file_model(spec_file_model): + workflow_spec_model = session.query(WorkflowSpecModel).filter( + WorkflowSpecModel.id == spec_file_model.workflow_spec_id).first() + category_name = SpecFileService.get_spec_file_category_name(workflow_spec_model) + if category_name is not None: + sync_file_root = SpecFileService.get_sync_file_root() + file_path = os.path.join(sync_file_root, + category_name, + workflow_spec_model.display_name, + spec_file_model.name) + return file_path + + @staticmethod + def write_file_data_to_system(file_path, file_data): + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, 'wb') as f_handle: + f_handle.write(file_data) + + @staticmethod + def write_file_info_to_system(file_path, file_model): + json_file_path = f'{file_path}.json' + latest_file_model = session.query(FileModel).filter(FileModel.id == file_model.id).first() + file_schema = FileModelSchema().dumps(latest_file_model) + with open(json_file_path, 'w') as j_handle: + j_handle.write(file_schema) + + # + # Workflow Spec Methods + # + @staticmethod + def add_workflow_spec_file(workflow_spec: WorkflowSpecModel, + name, content_type, binary_data, primary=False, is_status=False): + """Create a new file and associate it with a workflow spec. + 3 steps; create file model, write file data to filesystem, write file info to file system""" + file_model = session.query(FileModel)\ + .filter(FileModel.workflow_spec_id == workflow_spec.id)\ + .filter(FileModel.name == name).first() + + if file_model: + if not file_model.archived: + # Raise ApiError if the file already exists and is not archived + raise ApiError(code="duplicate_file", + message='If you want to replace the file, use the update mechanism.') + else: + file_model = FileModel( + workflow_spec_id=workflow_spec.id, + name=name, + primary=primary, + is_status=is_status, + ) + + file_model = SpecFileService.update_workflow_spec_file_model(workflow_spec, file_model, binary_data, content_type) + file_path = SpecFileService().write_spec_file_data_to_system(workflow_spec, file_model.name, binary_data) + SpecFileService().write_spec_file_info_to_system(file_path, file_model) + + return file_model + + def update_workflow_spec_file(self, workflow_spec_model, file_model, file_data, content_type): + self.update_workflow_spec_file_model(workflow_spec_model, file_model, file_data, content_type) + self.update_spec_file_data(workflow_spec_model, file_model.name, file_data) + self.update_spec_file_info() + + @staticmethod + def update_workflow_spec_file_model(workflow_spec: WorkflowSpecModel, file_model: FileModel, binary_data, content_type): + # Verify the extension + file_extension = FileService.get_extension(file_model.name) + if file_extension not in FileType._member_names_: + raise ApiError('unknown_extension', + 'The file you provided does not have an accepted extension:' + + file_extension, status_code=404) + else: + file_model.type = FileType[file_extension] + file_model.content_type = content_type + file_model.archived = False # Unarchive the file if it is archived. + + # If this is a BPMN, extract the process id. + if file_model.type == FileType.bpmn: + try: + bpmn: etree.Element = etree.fromstring(binary_data) + file_model.primary_process_id = SpecFileService.get_process_id(bpmn) + file_model.is_review = FileService.has_swimlane(bpmn) + except etree.XMLSyntaxError as xse: + raise ApiError("invalid_xml", "Failed to parse xml: " + str(xse), file_name=file_model.name) + + session.add(file_model) + session.commit() + + return file_model + + @staticmethod + def update_spec_file_data(workflow_spec, file_name, binary_data): + file_path = SpecFileService().write_spec_file_data_to_system(workflow_spec, file_name, binary_data) + return file_path + + def update_spec_file_info(self, old_file_model, body): + + file_data = self.get_spec_file_data(old_file_model.id) + + old_file_path = self.get_path_from_spec_file_model(old_file_model) + self.delete_spec_file_data(old_file_path) + self.delete_spec_file_info(old_file_path) + + new_file_model = FileModelSchema().load(body, session=session) + new_file_path = self.get_path_from_spec_file_model(new_file_model) + self.write_file_data_to_system(new_file_path, file_data.data) + self.write_file_info_to_system(new_file_path, new_file_model) + print('update_spec_file_info') + return new_file_model + + @staticmethod + def delete_spec_file_data(file_path): + os.remove(file_path) + + @staticmethod + def delete_spec_file_info(file_path): + json_file_path = f'{file_path}.json' + os.remove(json_file_path) + + # Placeholder. Not sure if we need this. + # Might do this work in delete_spec_file + def delete_spec_file_model(self): + pass + + @staticmethod + def delete_spec_file(file_id): + """This should remove the record in the file table, and both files on the filesystem.""" + sync_file_root = SpecFileService.get_sync_file_root() + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + workflow_spec_id = file_model.workflow_spec_id + workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==workflow_spec_id).first() + category_name = SpecFileService.get_spec_file_category_name(workflow_spec_model) + file_model_name = file_model.name + spec_directory_path = os.path.join(sync_file_root, + category_name, + workflow_spec_model.display_name) + file_path = os.path.join(spec_directory_path, + file_model_name) + json_file_path = os.path.join(spec_directory_path, + f'{file_model_name}.json') + + try: + os.remove(file_path) + os.remove(json_file_path) + session.delete(file_model) + session.commit() + except IntegrityError as ie: + session.rollback() + file_model = session.query(FileModel).filter_by(id=file_id).first() + file_model.archived = True + session.commit() + app.logger.info("Failed to delete file, so archiving it instead. %i, due to %s" % (file_id, str(ie))) + + def write_spec_file_data_to_system(self, workflow_spec_model, file_name, file_data): + if workflow_spec_model is not None: + category_name = self.get_spec_file_category_name(workflow_spec_model) + if category_name is not None: + sync_file_root = self.get_sync_file_root() + file_path = os.path.join(sync_file_root, + category_name, + workflow_spec_model.display_name, + file_name) + self.write_file_data_to_system(file_path, file_data) + return file_path + + def write_spec_file_info_to_system(self, file_path, file_model): + self.write_file_info_to_system(file_path, file_model) + # json_file_path = f'{file_path}.json' + # latest_file_model = session.query(FileModel).filter(FileModel.id == file_model.id).first() + # file_schema = FileModelSchema().dumps(latest_file_model) + # with open(json_file_path, 'w') as j_handle: + # j_handle.write(file_schema) + + def write_spec_file_to_system(self, workflow_spec_model, file_model, file_data): + file_path = self.write_spec_file_data_to_system(workflow_spec_model, file_model, file_data) + self.write_spec_file_info_to_system(file_path, file_model) + + @staticmethod + def get_spec_file_category_name(spec_model): + category_name = None + if hasattr(spec_model, 'category_id') and spec_model.category_id is not None: + category_model = session.query(WorkflowSpecCategoryModel).\ + filter(WorkflowSpecCategoryModel.id == spec_model.category_id).\ + first() + category_name = category_model.display_name + + elif spec_model.is_master_spec: + category_name = 'Master Specification' + + elif spec_model.library: + category_name = 'Library Specs' + + elif spec_model.standalone: + category_name = 'Standalone' + + return category_name + + def get_path(self, file_id: int): + # Returns the path on the file system for the given File id + + # Assure we have a file. + file_model = session.query(FileModel).filter(FileModel.id==file_id).first() + if not file_model: + raise ApiError(code='model_not_found', + message=f'No model found for file with file_id: {file_id}') + + # Assure we have a spec. + spec_model = session.query(WorkflowSpecModel).filter( + WorkflowSpecModel.id == file_model.workflow_spec_id).first() + if not spec_model: + raise ApiError(code='spec_not_found', + message=f'No spec found for file with file_id: ' + f'{file_model.id}, and spec_id: {file_model.workflow_spec_id}') + + # Calculate the path. + sync_file_root = self.get_sync_file_root() + category_name = self.get_spec_file_category_name(spec_model) + return os.path.join(sync_file_root, category_name, spec_model.display_name, file_model.name) + + + def last_modified(self, file_id: int): + path = self.get_path(file_id) + return self.__last_modified(path) + + def __last_modified(self, file_path: str): + # Returns the last modified date of the given file. + timestamp = os.path.getmtime(file_path) + return datetime.datetime.fromtimestamp(timestamp) + + def get_spec_file_data(self, file_id: int): + file_path = self.get_path(file_id) + date = self.last_modified(file_id) + with open(file_path, 'rb') as f_handle: + spec_file_data = f_handle.read() + size = len(spec_file_data) + md5_checksum = UUID(hashlib.md5(spec_file_data).hexdigest()) + last_modified = self.__last_modified(file_path) + file_data_model = FileDataModel(data=spec_file_data, + md5_hash=md5_checksum, + size=size, + date_created=last_modified, + file_model_id=file_id) + return file_data_model + + + @staticmethod + def get_process_id(et_root: etree.Element): + process_elements = [] + for child in et_root: + if child.tag.endswith('process') and child.attrib.get('isExecutable', False): + process_elements.append(child) + + if len(process_elements) == 0: + raise ValidationException('No executable process tag found') + + # There are multiple root elements + if len(process_elements) > 1: + + # Look for the element that has the startEvent in it + for e in process_elements: + this_element: etree.Element = e + for child_element in list(this_element): + if child_element.tag.endswith('startEvent'): + return this_element.attrib['id'] + + raise ValidationException('No start event found in %s' % et_root.attrib['id']) + + return process_elements[0].attrib['id'] + + @staticmethod + def get_spec_files(workflow_spec_id, file_name=None, include_libraries=False): + if include_libraries: + libraries = session.query(WorkflowLibraryModel).filter( + WorkflowLibraryModel.workflow_spec_id==workflow_spec_id).all() + library_workflow_specs = [x.library_spec_id for x in libraries] + library_workflow_specs.append(workflow_spec_id) + query = session.query(FileModel).filter(FileModel.workflow_spec_id.in_(library_workflow_specs)) + else: + query = session.query(FileModel).filter(FileModel.workflow_spec_id == workflow_spec_id) + + if file_name: + query = query.filter(FileModel.name == file_name) + + query = query.filter(FileModel.archived == False) + query = query.order_by(FileModel.id) + + results = query.all() + return results + + @staticmethod + def get_workflow_file_data(workflow, file_name): + """This method should be deleted, find where it is used, and remove this method. + Given a SPIFF Workflow Model, tracks down a file with the given name in the database and returns its data""" + workflow_spec_model = SpecFileService.find_spec_model_in_db(workflow) + + if workflow_spec_model is None: + raise ApiError(code="unknown_workflow", + message="Something is wrong. I can't find the workflow you are using.") + file_id = session.query(FileModel.id).filter(FileModel.workflow_spec_id==workflow_spec_model.id).filter(FileModel.name==file_name).scalar() + file_data_model = SpecFileService().get_spec_file_data(file_id) + + if file_data_model is None: + raise ApiError(code="file_missing", + message="Can not find a file called '%s' within workflow specification '%s'" + % (file_name, workflow_spec_model.id)) + + return file_data_model + + @staticmethod + def find_spec_model_in_db(workflow): + """ Search for the workflow """ + # When the workflow spec model is created, we record the primary process id, + # then we can look it up. As there is the potential for sub-workflows, we + # may need to travel up to locate the primary process. + spec = workflow.spec + workflow_model = session.query(WorkflowSpecModel).join(FileModel). \ + filter(FileModel.primary_process_id == spec.name).first() + if workflow_model is None and workflow != workflow.outer_workflow: + return SpecFileService.find_spec_model_in_db(workflow.outer_workflow) + + return workflow_model diff --git a/crc/services/study_service.py b/crc/services/study_service.py index 4f720548..cde04367 100755 --- a/crc/services/study_service.py +++ b/crc/services/study_service.py @@ -20,7 +20,7 @@ from crc.models.study import StudyModel, Study, StudyStatus, Category, WorkflowM from crc.models.task_event import TaskEventModel from crc.models.task_log import TaskLogModel from crc.models.workflow import WorkflowSpecCategoryModel, WorkflowModel, WorkflowSpecModel, WorkflowState, \ - WorkflowStatus, WorkflowSpecDependencyFile + WorkflowStatus from crc.services.document_service import DocumentService from crc.services.file_service import FileService from crc.services.ldap_service import LdapService @@ -236,7 +236,6 @@ class StudyService(object): return session.query(TaskEventModel).filter_by(workflow_id=workflow.id).delete() - session.query(WorkflowSpecDependencyFile).filter_by(workflow_id=workflow_id).delete(synchronize_session='fetch') session.query(FileModel).filter_by(workflow_id=workflow_id).update({'archived': True, 'workflow_id': None}) session.delete(workflow) @@ -311,8 +310,11 @@ class StudyService(object): @staticmethod def get_investigator_dictionary(): """Returns a dictionary of document details keyed on the doc_code.""" - file_data = FileService.get_reference_file_data(StudyService.INVESTIGATOR_LIST) - lookup_model = LookupService.get_lookup_model_for_file_data(file_data, 'code', 'label') + file_id = session.query(FileModel.id). \ + filter(FileModel.name == StudyService.INVESTIGATOR_LIST). \ + filter(FileModel.is_reference == True). \ + scalar() + lookup_model = LookupService.get_lookup_model_for_file_data(file_id, StudyService.INVESTIGATOR_LIST, 'code', 'label') doc_dict = {} for lookup_data in lookup_model.dependencies: doc_dict[lookup_data.value] = lookup_data.data diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py index 34bc1db5..80fd6d7e 100644 --- a/crc/services/workflow_processor.py +++ b/crc/services/workflow_processor.py @@ -1,13 +1,10 @@ -import re +from typing import List from SpiffWorkflow.bpmn.PythonScriptEngine import PythonScriptEngine -from SpiffWorkflow.bpmn.specs.UserTask import UserTask from SpiffWorkflow.serializer.exceptions import MissingSpecError from SpiffWorkflow.util.metrics import timeit, firsttime, sincetime from lxml import etree -import shlex from datetime import datetime -from typing import List from SpiffWorkflow import Task as SpiffTask, WorkflowException, Task from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException @@ -19,16 +16,16 @@ from SpiffWorkflow.dmn.parser.BpmnDmnParser import BpmnDmnParser from SpiffWorkflow.exceptions import WorkflowTaskExecException from SpiffWorkflow.specs import WorkflowSpec -import crc -from crc import session, app +from crc import session from crc.api.common import ApiError -from crc.models.file import FileDataModel, FileModel, FileType +from crc.models.file import FileModel, FileType from crc.models.task_event import TaskEventModel from crc.models.user import UserModelSchema -from crc.models.workflow import WorkflowStatus, WorkflowModel, WorkflowSpecDependencyFile +from crc.models.workflow import WorkflowStatus, WorkflowModel from crc.scripts.script import Script from crc.services.file_service import FileService from crc import app +from crc.services.spec_file_service import SpecFileService from crc.services.user_service import UserService @@ -107,15 +104,11 @@ class WorkflowProcessor(object): self.workflow_model = workflow_model - if workflow_model.bpmn_workflow_json is None: # The workflow was never started. - self.spec_data_files = FileService.get_spec_data_files( - workflow_spec_id=workflow_model.workflow_spec_id,include_libraries=True) - spec = self.get_spec(self.spec_data_files, workflow_model.workflow_spec_id) - else: - self.spec_data_files = FileService.get_spec_data_files( - workflow_spec_id=workflow_model.workflow_spec_id, - workflow_id=workflow_model.id) - spec = None + spec = None + if workflow_model.bpmn_workflow_json is None: + self.spec_files = SpecFileService().get_spec_files( + workflow_spec_id=workflow_model.workflow_spec_id, include_libraries=True) + spec = self.get_spec(self.spec_files, workflow_model.workflow_spec_id) self.workflow_spec_id = workflow_model.workflow_spec_id @@ -146,14 +139,8 @@ class WorkflowProcessor(object): except MissingSpecError as ke: raise ApiError(code="unexpected_workflow_structure", message="Failed to deserialize workflow" - " '%s' version %s, due to a mis-placed or missing task '%s'" % - (self.workflow_spec_id, self.get_version_string(), str(ke))) - - # set whether this is the latest spec file. - if self.spec_data_files == FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id): - self.is_latest_spec = True - else: - self.is_latest_spec = False + " '%s' due to a mis-placed or missing task '%s'" % + (self.workflow_spec_id, str(ke))) @staticmethod def reset(workflow_model, clear_data=False, delete_files=False): @@ -191,10 +178,6 @@ class WorkflowProcessor(object): bpmn_workflow = BpmnWorkflow(spec, script_engine=self._script_engine) bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = workflow_model.study_id bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only -# try: -# bpmn_workflow.do_engine_steps() -# except WorkflowException as we: -# raise ApiError.from_task_spec("error_loading_workflow", str(we), we.sender) return bpmn_workflow def save(self): @@ -206,71 +189,18 @@ class WorkflowProcessor(object): self.workflow_model.total_tasks = len(tasks) self.workflow_model.completed_tasks = sum(1 for t in tasks if t.state in complete_states) self.workflow_model.last_updated = datetime.utcnow() - self.update_dependencies(self.spec_data_files) session.add(self.workflow_model) session.commit() - def get_version_string(self): - # this could potentially become expensive to load all the data in the data models. - # in which case we might consider using a deferred loader for the actual data, but - # trying not to pre-optimize. - file_data_models = FileService.get_spec_data_files(self.workflow_model.workflow_spec_id, - self.workflow_model.id) - return WorkflowProcessor.__get_version_string_for_data_models(file_data_models) - - @staticmethod - def get_latest_version_string_for_spec(spec_id): - file_data_models = FileService.get_spec_data_files(spec_id) - return WorkflowProcessor.__get_version_string_for_data_models(file_data_models) - - @staticmethod - def __get_version_string_for_data_models(file_data_models): - """Version is in the format v[VERSION] (FILE_ID_LIST) - For example, a single bpmn file with only one version would be - v1 (12) Where 12 is the id of the file data model that is used to create the - specification. If multiple files exist, they are added on in - dot notation to both the version number and the file list. So - a Spec that includes a BPMN, DMN, an a Word file all on the first - version would be v1.1.1 (12.45.21)""" - - major_version = 0 # The version of the primary file. - minor_version = [] # The versions of the minor files if any. - file_ids = [] - for file_data in file_data_models: - file_ids.append(file_data.id) - if file_data.file_model.primary: - major_version = file_data.version - else: - minor_version.append(file_data.version) - minor_version.insert(0, major_version) # Add major version to beginning. - version = ".".join(str(x) for x in minor_version) - files = ".".join(str(x) for x in file_ids) - full_version = "v%s (%s)" % (version, files) - return full_version - - def update_dependencies(self, spec_data_files): - existing_dependencies = FileService.get_spec_data_files( - workflow_spec_id=self.workflow_model.workflow_spec_id, - workflow_id=self.workflow_model.id) - - # Don't save the dependencies if they haven't changed. - if existing_dependencies == spec_data_files: - return - - # Remove all existing dependencies, and replace them. - self.workflow_model.dependencies = [] - for file_data in spec_data_files: - self.workflow_model.dependencies.append(WorkflowSpecDependencyFile(file_data_id=file_data.id)) - @staticmethod @timeit def run_master_spec(spec_model, study): """Executes a BPMN specification for the given study, without recording any information to the database Useful for running the master specification, which should not persist. """ lasttime = firsttime() - spec_data_files = FileService.get_spec_data_files(spec_model.id) + spec_files = SpecFileService().get_spec_files(spec_model.id, include_libraries=True) lasttime = sincetime('load Files', lasttime) - spec = WorkflowProcessor.get_spec(spec_data_files, spec_model.id) + spec = WorkflowProcessor.get_spec(spec_files, spec_model.id) lasttime = sincetime('get spec', lasttime) try: bpmn_workflow = BpmnWorkflow(spec, script_engine=WorkflowProcessor._script_engine) @@ -294,22 +224,23 @@ class WorkflowProcessor(object): return parser @staticmethod - def get_spec(file_data_models: List[FileDataModel], workflow_spec_id): + def get_spec(files: List[FileModel], workflow_spec_id): """Returns a SpiffWorkflow specification for the given workflow spec, using the files provided. The Workflow_spec_id is only used to generate better error messages.""" parser = WorkflowProcessor.get_parser() process_id = None - for file_data in file_data_models: - if file_data.file_model.type == FileType.bpmn: - bpmn: etree.Element = etree.fromstring(file_data.data) - if file_data.file_model.primary and file_data.file_model.workflow_spec_id == workflow_spec_id: - process_id = FileService.get_process_id(bpmn) - parser.add_bpmn_xml(bpmn, filename=file_data.file_model.name) - elif file_data.file_model.type == FileType.dmn: - dmn: etree.Element = etree.fromstring(file_data.data) - parser.add_dmn_xml(dmn, filename=file_data.file_model.name) + for file in files: + data = SpecFileService().get_spec_file_data(file.id).data + if file.type == FileType.bpmn: + bpmn: etree.Element = etree.fromstring(data) + if file.primary and file.workflow_spec_id == workflow_spec_id: + process_id = SpecFileService.get_process_id(bpmn) + parser.add_bpmn_xml(bpmn, filename=file.name) + elif file.type == FileType.dmn: + dmn: etree.Element = etree.fromstring(data) + parser.add_dmn_xml(dmn, filename=file.name) if process_id is None: raise (ApiError(code="no_primary_bpmn_error", message="There is no primary BPMN model defined for workflow %s" % workflow_spec_id)) @@ -337,19 +268,6 @@ class WorkflowProcessor(object): else: return WorkflowStatus.waiting - # def hard_reset(self): - # """Recreate this workflow. This will be useful when a workflow specification changes. - # """ - # self.spec_data_files = FileService.get_spec_data_files(workflow_spec_id=self.workflow_spec_id) - # new_spec = WorkflowProcessor.get_spec(self.spec_data_files, self.workflow_spec_id) - # new_bpmn_workflow = BpmnWorkflow(new_spec, script_engine=self._script_engine) - # new_bpmn_workflow.data = self.bpmn_workflow.data - # try: - # new_bpmn_workflow.do_engine_steps() - # except WorkflowException as we: - # raise ApiError.from_task_spec("hard_reset_engine_steps_error", str(we), we.sender) - # self.bpmn_workflow = new_bpmn_workflow - def get_status(self): return self.status_of(self.bpmn_workflow) diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index d500c2ab..6df7cc14 100755 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -35,6 +35,7 @@ from crc.services.document_service import DocumentService from crc.services.file_service import FileService from crc.services.jinja_service import JinjaService from crc.services.lookup_service import LookupService +from crc.services.spec_file_service import SpecFileService from crc.services.study_service import StudyService from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor @@ -576,8 +577,6 @@ class WorkflowService(object): next_task=None, navigation=navigation, workflow_spec_id=processor.workflow_spec_id, - spec_version=processor.get_version_string(), - is_latest_spec=processor.is_latest_spec, total_tasks=len(navigation), completed_tasks=processor.workflow_model.completed_tasks, last_updated=processor.workflow_model.last_updated, @@ -764,7 +763,7 @@ class WorkflowService(object): try: doc_file_name = spiff_task.task_spec.name + ".md" - data_model = FileService.get_workflow_file_data(spiff_task.workflow, doc_file_name) + data_model = SpecFileService.get_workflow_file_data(spiff_task.workflow, doc_file_name) raw_doc = data_model.data.decode("utf-8") except ApiError: raw_doc = documentation @@ -914,7 +913,6 @@ class WorkflowService(object): user_uid=user_uid, workflow_id=processor.workflow_model.id, workflow_spec_id=processor.workflow_model.workflow_spec_id, - spec_version=processor.get_version_string(), action=action, task_id=task.id, task_name=task.name, diff --git a/example_data.py b/example_data.py index cb8a0aa5..d9d92aa0 100644 --- a/example_data.py +++ b/example_data.py @@ -1,5 +1,4 @@ import glob -import glob import os from crc import app, db, session @@ -9,6 +8,8 @@ from crc.models.user import UserModel from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel from crc.services.document_service import DocumentService from crc.services.file_service import FileService +from crc.services.reference_file_service import ReferenceFileService +from crc.services.spec_file_service import SpecFileService from crc.services.study_service import StudyService @@ -187,7 +188,7 @@ class ExampleDataLoader: def load_rrt(self): file_path = os.path.join(app.root_path, 'static', 'reference', 'rrt_documents.xlsx') file = open(file_path, "rb") - FileService.add_reference_file(FileService.DOCUMENT_LIST, + ReferenceFileService.add_reference_file(FileService.DOCUMENT_LIST, binary_data=file.read(), content_type=CONTENT_TYPES['xls']) file.close() @@ -276,7 +277,7 @@ class ExampleDataLoader: file = open(file_path, 'rb') data = file.read() content_type = CONTENT_TYPES[file_extension[1:]] - file_service.add_workflow_spec_file(workflow_spec=spec, name=filename, content_type=content_type, + SpecFileService.add_workflow_spec_file(workflow_spec=spec, name=filename, content_type=content_type, binary_data=data, primary=is_primary, is_status=is_status) except IsADirectoryError as de: # Ignore sub directories @@ -289,16 +290,16 @@ class ExampleDataLoader: def load_reference_documents(self): file_path = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx') file = open(file_path, "rb") - FileService.add_reference_file(DocumentService.DOCUMENT_LIST, + ReferenceFileService.add_reference_file(DocumentService.DOCUMENT_LIST, binary_data=file.read(), - content_type=CONTENT_TYPES['xls']) + content_type=CONTENT_TYPES['xlsx']) file.close() file_path = os.path.join(app.root_path, 'static', 'reference', 'investigators.xlsx') file = open(file_path, "rb") - FileService.add_reference_file(StudyService.INVESTIGATOR_LIST, + ReferenceFileService.add_reference_file(StudyService.INVESTIGATOR_LIST, binary_data=file.read(), - content_type=CONTENT_TYPES['xls']) + content_type=CONTENT_TYPES['xlsx']) file.close() def load_default_user(self): diff --git a/migrations/versions/65b5ed6ae05b_remove_slashes_from_name_values.py b/migrations/versions/65b5ed6ae05b_remove_slashes_from_name_values.py new file mode 100644 index 00000000..db0532f6 --- /dev/null +++ b/migrations/versions/65b5ed6ae05b_remove_slashes_from_name_values.py @@ -0,0 +1,29 @@ +"""Remove slashes from name values + +Revision ID: 65b5ed6ae05b +Revises: 7225d990740e +Create Date: 2021-12-17 11:16:52.165479 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '65b5ed6ae05b' +down_revision = '1fb36d682c7f' +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("UPDATE file SET name = REPLACE(name, '/', '-')") + op.execute("UPDATE workflow_spec SET display_name = REPLACE(display_name, '/', '-')") + op.execute("UPDATE workflow_spec_category SET display_name = REPLACE(display_name, '/', '-')") + + +def downgrade(): + # There are already valid uses of '-' in these tables. + # We probably don't want to change all of them to '/' + # So, we pass here. No downgrade. + pass diff --git a/migrations/versions/7225d990740e_move_files_to_filesystem.py b/migrations/versions/7225d990740e_move_files_to_filesystem.py new file mode 100644 index 00000000..32d05900 --- /dev/null +++ b/migrations/versions/7225d990740e_move_files_to_filesystem.py @@ -0,0 +1,323 @@ +"""Move files to filesystem + +Revision ID: 7225d990740e +Revises: 44dd9397c555 +Create Date: 2021-12-14 10:52:50.785342 + +""" + +from alembic import op +import sqlalchemy as sa + +# import crc +from crc import app, session +from crc.models.file import FileModel, FileModelSchema, FileDataModel, LookupFileModel, CONTENT_TYPES +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecModelSchema, WorkflowSpecCategoryModel, WorkflowSpecCategoryModelSchema +from crc.services.file_service import FileService +from crc.services.spec_file_service import SpecFileService +from crc.services.reference_file_service import ReferenceFileService +from crc.services.workflow_service import WorkflowService +# from crc.services.temp_migration_service import FromFilesystemService, ToFilesystemService + +from shutil import rmtree +import json +import os + +# revision identifiers, used by Alembic. +revision = '7225d990740e' +down_revision = '65b5ed6ae05b' +branch_labels = None +depends_on = None + + +class FromFilesystemService(object): + + @staticmethod + def process_directory(directory): + files = [] + directories = [] + directory_items = os.scandir(directory) + for item in directory_items: + if item.is_dir(): + directories.append(item) + elif item.is_file(): + files.append(item) + + return files, directories + + @staticmethod + def process_workflow_spec(json_file, directory): + file_path = os.path.join(directory, json_file) + + with open(file_path, 'r') as f_open: + data = f_open.read() + data_obj = json.loads(data) + workflow_spec_model = session.query(WorkflowSpecModel).\ + filter(WorkflowSpecModel.id == data_obj['id']).\ + first() + if not workflow_spec_model: + category_id = None + if data_obj['category'] is not None: + category_id = session.query(WorkflowSpecCategoryModel.id).filter( + WorkflowSpecCategoryModel.display_name == data_obj['category']['display_name']).scalar() + workflow_spec_model = WorkflowSpecModel(id=data_obj['id'], + display_name=data_obj['display_name'], + description=data_obj['description'], + is_master_spec=data_obj['is_master_spec'], + category_id=category_id, + display_order=data_obj['display_order'], + standalone=data_obj['standalone'], + library=data_obj['library']) + session.add(workflow_spec_model) + session.commit() + + return workflow_spec_model + + @staticmethod + def process_workflow_spec_file(json_file, spec_directory): + file_path = os.path.join(spec_directory, json_file) + + with open(file_path, 'r') as json_handle: + data = json_handle.read() + data_obj = json.loads(data) + spec_file_name = '.'.join(json_file.name.split('.')[:-1]) + spec_file_path = os.path.join(spec_directory, spec_file_name) + + with open(spec_file_path, 'rb') as spec_handle: + # workflow_spec_name = spec_directory.split('/')[-1] + # workflow_spec = session.query(WorkflowSpecModel).filter( + # WorkflowSpecModel.display_name == workflow_spec_name).first() + + workflow_spec_file_model = session.query(FileModel).\ + filter(FileModel.workflow_spec_id == data_obj['workflow_spec_id']).\ + filter(FileModel.name == spec_file_name).\ + first() + if workflow_spec_file_model: + # update workflow_spec_file_model + FileService.update_file(workflow_spec_file_model, spec_handle.read(), CONTENT_TYPES[spec_file_name.split('.')[-1]]) + else: + # create new model + workflow_spec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==data_obj['workflow_spec_id']).first() + workflow_spec_file_model = FileService.add_workflow_spec_file(workflow_spec, + name=spec_file_name, + content_type=CONTENT_TYPES[spec_file_name.split('.')[-1]], + binary_data=spec_handle.read()) + + print(f'process_workflow_spec_file: data_obj: {data_obj}') + return workflow_spec_file_model + + @staticmethod + def process_category(json_file, root): + print(f'process_category: json_file: {json_file}') + file_path = os.path.join(root, json_file) + + with open(file_path, 'r') as f_open: + data = f_open.read() + data_obj = json.loads(data) + category = session.query(WorkflowSpecCategoryModel).filter( + WorkflowSpecCategoryModel.display_name == data_obj['display_name']).first() + if not category: + category = WorkflowSpecCategoryModel(display_name=data_obj['display_name'], + display_order=data_obj['display_order'], + admin=data_obj['admin']) + session.add(category) + else: + category.display_order = data_obj['display_order'] + category.admin = data_obj['admin'] + # print(data) + print(f'process_category: category: {category}') + + session.commit() + return category + + def process_workflow_spec_directory(self, spec_directory): + print(f'process_workflow_spec_directory: {spec_directory}') + files, directories = self.process_directory(spec_directory) + + for file in files: + if file.name.endswith('.json'): + file_model = self.process_workflow_spec_file(file, spec_directory) + + def process_category_directory(self, category_directory): + print(f'process_category_directory: {category_directory}') + files, directories = self.process_directory(category_directory) + + for file in files: + if file.name.endswith('.json'): + workflow_spec = self.process_workflow_spec(file, category_directory) + + for workflow_spec_directory in directories: + directory_path = os.path.join(category_directory, workflow_spec_directory) + self.process_workflow_spec_directory(directory_path) + + def process_root_directory(self, root_directory): + + files, directories = self.process_directory(root_directory) + for file in files: + if file.name.endswith('.json'): + category_model = self.process_category(file, root_directory) + WorkflowService.cleanup_workflow_spec_category_display_order() + + for directory in directories: + directory_path = os.path.join(root_directory, directory) + self.process_category_directory(directory_path) + + def update_file_metadata_from_filesystem(self, root_directory): + self.process_root_directory(root_directory) + + +class ToFilesystemService(object): + + @staticmethod + def process_category(location, category): + # Make sure a directory exists for the category + # Add a json file dumped from the category model + category_path = os.path.join(location, category.display_name) + os.makedirs(os.path.dirname(category_path), exist_ok=True) + json_file_name = f'{category.display_name}.json' + json_file_path = os.path.join(location, json_file_name) + category_model_schema = WorkflowSpecCategoryModelSchema().dumps(category) + with open(json_file_path, 'w') as j_handle: + j_handle.write(category_model_schema) + + @staticmethod + def process_workflow_spec(location, workflow_spec, category_name_string): + # Make sure a directory exists for the workflow spec + # Add a json file dumped from the workflow spec model + workflow_spec_path = os.path.join(location, category_name_string, workflow_spec.display_name) + os.makedirs(os.path.dirname(workflow_spec_path), exist_ok=True) + json_file_name = f'{workflow_spec.display_name}.json' + json_file_path = os.path.join(location, category_name_string, json_file_name) + workflow_spec_schema = WorkflowSpecModelSchema().dumps(workflow_spec) + with open(json_file_path, 'w') as j_handle: + j_handle.write(workflow_spec_schema) + + @staticmethod + def process_workflow_spec_file(session, workflow_spec_file, workflow_spec_file_path): + # workflow_spec_file_path = os.path.join + os.makedirs(os.path.dirname(workflow_spec_file_path), exist_ok=True) + + file_data_model = session.query(FileDataModel). \ + filter(FileDataModel.file_model_id == workflow_spec_file.id). \ + order_by(sa.desc(FileDataModel.version)). \ + first() + with open(workflow_spec_file_path, 'wb') as f_handle: + f_handle.write(file_data_model.data) + + json_file_path = f'{workflow_spec_file_path}.json' + workflow_spec_file_model = session.query(FileModel).filter(FileModel.id==file_data_model.file_model_id).first() + workflow_spec_file_schema = FileModelSchema().dumps(workflow_spec_file_model) + with open(json_file_path, 'w') as j_handle: + j_handle.write(workflow_spec_file_schema) + + def write_file_to_system(self, session, file_model, location): + + category_name = None + # location = SpecFileService.get_sync_file_root() + + if file_model.workflow_spec_id is not None: + # we have a workflow spec file + workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == file_model.workflow_spec_id).first() + if workflow_spec_model: + + if workflow_spec_model.category_id is not None: + category_model = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id == workflow_spec_model.category_id).first() + self.process_category(location, category_model) + category_name = category_model.display_name + + elif workflow_spec_model.is_master_spec: + category_name = 'Master Specification' + + elif workflow_spec_model.library: + category_name = 'Library Specs' + + elif workflow_spec_model.standalone: + category_name = 'Standalone' + + if category_name is not None: + # Only process if we have a workflow_spec_model and category_name + self.process_workflow_spec(location, workflow_spec_model, category_name) + + file_path = os.path.join(location, + category_name, + workflow_spec_model.display_name, + file_model.name) + self.process_workflow_spec_file(session, file_model, file_path) + + elif file_model.is_reference: + # we have a reference file + category_name = 'Reference' + + # self.process_workflow_spec(location, workflow_spec_model, category_name) + + file_path = os.path.join(location, + category_name, + file_model.name) + + self.process_workflow_spec_file(session, file_model, file_path) + + + +def upgrade(): + + """""" + bind = op.get_bind() + session = sa.orm.Session(bind=bind) + + op.drop_table('workflow_spec_dependency_file') + op.add_column('lookup_file', sa.Column('file_model_id', sa.Integer(), nullable=True)) + op.add_column('lookup_file', sa.Column('last_updated', sa.DateTime(), nullable=True)) + op.create_foreign_key(None, 'lookup_file', 'file', ['file_model_id'], ['id']) + + processed_files = [] + location = SpecFileService.get_sync_file_root() + if os.path.exists(location): + rmtree(location) + # Process workflow spec files + files = session.query(FileModel).filter(FileModel.workflow_spec_id is not None).all() + for file in files: + if file.archived is not True: + ToFilesystemService().write_file_to_system(session, file, location) + processed_files.append(file.id) + + # Process reference files + # get_reference_files only returns files where archived is False + reference_files = ReferenceFileService.get_reference_files() + for reference_file in reference_files: + ToFilesystemService().write_file_to_system(session, reference_file, location) + processed_files.append(reference_file.id) + + session.flush() + lookups = session.query(LookupFileModel).all() + for lookup in lookups: + session.delete(lookup) + session.commit() + for file_id in processed_files: + processed_data_models = session.query(FileDataModel).filter(FileDataModel.file_model_id==file_id).all() + for processed_data_model in processed_data_models: + session.delete(processed_data_model) + session.commit() + print(f'upgrade: in processed files: file_id: {file_id}') + print('upgrade: done: ') + + +def downgrade(): + + # TODO: This is a work in progress, and depends on what we do in upgrade() + op.add_column('lookup_file', sa.Column('file_data_model_id', sa.Integer(), nullable=True)) + op.create_foreign_key(None, 'lookup_file', 'file', ['file_data_model_id'], ['id']) + op.drop_constraint('lookup_file_file_model_id_key', 'lookup_file', type_='foreignkey') + op.drop_column('lookup_file', 'file_model_id') + + op.create_table('workflow_spec_dependency_file', + sa.Column('file_data_id', sa.Integer(), nullable=False), + sa.Column('workflow_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['file_data_id'], ['file_data.id'], ), + sa.ForeignKeyConstraint(['workflow_id'], ['workflow.id'], ), + sa.PrimaryKeyConstraint('file_data_id', 'workflow_id') + ) + + location = SpecFileService.get_sync_file_root() + FromFilesystemService().update_file_metadata_from_filesystem(location) + + print('downgrade: ') diff --git a/tests/base_test.py b/tests/base_test.py index 1c472fd3..252eb450 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -9,18 +9,19 @@ import json import unittest import urllib.parse import datetime +import shutil from flask import g from crc import app, db, session from crc.models.api_models import WorkflowApiSchema, MultiInstanceType -from crc.models.file import FileModel, FileDataModel, CONTENT_TYPES +from crc.models.file import FileModel, CONTENT_TYPES from crc.models.task_event import TaskEventModel from crc.models.study import StudyModel, StudyStatus, ProgressStatus -from crc.models.ldap import LdapModel from crc.models.user import UserModel from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel from crc.services.ldap_service import LdapService -from crc.services.file_service import FileService +from crc.services.reference_file_service import ReferenceFileService +from crc.services.spec_file_service import SpecFileService from crc.services.study_service import StudyService from crc.services.user_service import UserService from crc.services.workflow_service import WorkflowService @@ -82,6 +83,7 @@ class BaseTest(unittest.TestCase): @classmethod def setUpClass(cls): + cls.clear_test_sync_files() app.config.from_object('config.testing') cls.ctx = app.test_request_context() cls.app = app.test_client() @@ -92,7 +94,6 @@ class BaseTest(unittest.TestCase): def tearDownClass(cls): cls.ctx.pop() db.drop_all() - pass def setUp(self): pass @@ -101,6 +102,13 @@ class BaseTest(unittest.TestCase): ExampleDataLoader.clean_db() self.logout() self.auths = {} + self.clear_test_sync_files() + + @staticmethod + def clear_test_sync_files(): + sync_file_root = SpecFileService().get_sync_file_root() + if os.path.exists(sync_file_root): + shutil.rmtree(sync_file_root) def logged_in_headers(self, user=None, redirect_url='http://some/frontend/url'): if user is None: @@ -172,7 +180,8 @@ class BaseTest(unittest.TestCase): self.assertIsNotNone(files) self.assertGreater(len(files), 0) for file in files: - file_data = session.query(FileDataModel).filter_by(file_model_id=file.id).all() + # file_data = session.query(FileDataModel).filter_by(file_model_id=file.id).all() + file_data = SpecFileService().get_spec_file_data(file.id).data self.assertIsNotNone(file_data) self.assertGreater(len(file_data), 0) @@ -246,14 +255,14 @@ class BaseTest(unittest.TestCase): def replace_file(self, name, file_path): """Replaces a stored file with the given name with the contents of the file at the given path.""" - file_service = FileService() file = open(file_path, "rb") data = file.read() file_model = session.query(FileModel).filter(FileModel.name == name).first() + workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==file_model.workflow_spec_id).first() noise, file_extension = os.path.splitext(file_path) content_type = CONTENT_TYPES[file_extension[1:]] - file_service.update_file(file_model, data, content_type) + SpecFileService().update_spec_file_data(workflow_spec_model, file_model.name, data) def create_user(self, uid="dhf8r", email="daniel.h.funk@gmail.com", display_name="Hoopy Frood"): user = session.query(UserModel).filter(UserModel.uid == uid).first() @@ -290,11 +299,10 @@ class BaseTest(unittest.TestCase): def create_reference_document(self): file_path = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx') - file = open(file_path, "rb") - FileService.add_reference_file(DocumentService.DOCUMENT_LIST, - binary_data=file.read(), - content_type=CONTENT_TYPES['xlsx']) - file.close() + with open(file_path, "rb") as file: + ReferenceFileService.add_reference_file(DocumentService.DOCUMENT_LIST, + content_type=CONTENT_TYPES['xlsx'], + binary_data=file.read()) def get_workflow_common(self, url, user): rv = self.app.get(url, @@ -379,7 +387,6 @@ class BaseTest(unittest.TestCase): self.assertEqual(user_uid, event.user_uid) self.assertEqual(workflow.id, event.workflow_id) self.assertEqual(workflow.workflow_spec_id, event.workflow_spec_id) - self.assertEqual(workflow.spec_version, event.spec_version) self.assertEqual(WorkflowService.TASK_ACTION_COMPLETE, event.action) self.assertEqual(task_in.id, task_id) self.assertEqual(task_in.name, event.task_name) @@ -416,4 +423,3 @@ class BaseTest(unittest.TestCase): """Returns a bytesIO object of a well formed BPMN xml file with some string content of your choosing.""" minimal_dbpm = "%s" return (minimal_dbpm % content).encode() - diff --git a/tests/files/test_file_data_cleanup.py b/tests/files/test_file_data_cleanup.py deleted file mode 100644 index 04b4ac82..00000000 --- a/tests/files/test_file_data_cleanup.py +++ /dev/null @@ -1,149 +0,0 @@ -from tests.base_test import BaseTest - -from crc import session -from crc.models.file import FileModel, FileDataModel, LookupFileModel -from crc.models.workflow import WorkflowSpecModel, WorkflowSpecDependencyFile -from crc.services.file_service import FileService - -from sqlalchemy import desc - -import io -import json - - -class TestFileDataCleanup(BaseTest): - - xml_str_one = b""" - - - - - - - - - - - - """ - - xml_str_two = b""" - - - - Flow_1v0s5ht - - - # Hello - Flow_1v0s5ht - Flow_12k5ua1 - - - - Flow_12k5ua1 - - - - - - - - - - - - - - - - - - - - - - - - - - """ - - def test_file_data_cleanup(self): - """Update a file twice. Make sure we clean up the correct files""" - - self.load_example_data() - workflow = self.create_workflow('empty_workflow') - file_data_model_count = session.query(FileDataModel).count() - - # Use for comparison after cleanup - replaced_models = [] - - # Get `empty_workflow` workflow spec - workflow_spec_model = session.query(WorkflowSpecModel)\ - .filter(WorkflowSpecModel.id == 'empty_workflow')\ - .first() - - # Get file model for empty_workflow spec - file_model = session.query(FileModel)\ - .filter(FileModel.workflow_spec_id == workflow_spec_model.id)\ - .first() - - # Grab the file data model for empty_workflow file_model - original_file_data_model = session.query(FileDataModel)\ - .filter(FileDataModel.file_model_id == file_model.id)\ - .order_by(desc(FileDataModel.date_created))\ - .first() - - # Add file to dependencies - # It should not get deleted - wf_spec_depend_model = WorkflowSpecDependencyFile(file_data_id=original_file_data_model.id, - workflow_id=workflow.id) - session.add(wf_spec_depend_model) - session.commit() - - # Update first time - replaced_models.append(original_file_data_model) - data = {'file': (io.BytesIO(self.xml_str_one), file_model.name)} - rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - file_json_first = json.loads(rv.get_data(as_text=True)) - - # Update second time - # replaced_models.append(old_file_data_model) - data = {'file': (io.BytesIO(self.xml_str_two), file_model.name)} - rv = self.app.put('/v1.0/file/%i/data' % file_model.id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - file_json_second = json.loads(rv.get_data(as_text=True)) - - # Add lookup file - data = {'file': (io.BytesIO(b'asdf'), 'lookup_1.xlsx')} - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - file_json = json.loads(rv.get_data(as_text=True)) - lookup_file_id = file_json['id'] - lookup_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == lookup_file_id).first() - lookup_model = LookupFileModel(file_data_model_id=lookup_data_model.id, - workflow_spec_id=workflow_spec_model.id) - session.add(lookup_model) - session.commit() - - # Update lookup file - data = {'file': (io.BytesIO(b'1234'), 'lookup_1.xlsx')} - rv = self.app.put('/v1.0/file/%i/data' % lookup_file_id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - - # Run the cleanup files process - current_models, saved_models, deleted_models = FileService.cleanup_file_data() - - # assert correct versions are removed - new_count = session.query(FileDataModel).count() - self.assertEqual(8, new_count) - self.assertEqual(4, len(current_models)) - self.assertEqual(2, len(saved_models)) - self.assertEqual(1, len(deleted_models)) - - print('test_file_data_cleanup') diff --git a/tests/files/test_file_service.py b/tests/files/test_file_service.py index 0a00386c..f8b5d992 100644 --- a/tests/files/test_file_service.py +++ b/tests/files/test_file_service.py @@ -1,11 +1,14 @@ from github import UnknownObjectException -from sqlalchemy import desc +from sqlalchemy import desc, column from tests.base_test import BaseTest from unittest.mock import patch, Mock -from crc import db -from crc.models.file import FileDataModel +from crc import db, session +from crc.api.common import ApiError +from crc.models.file import FileModel, FileDataModel, CONTENT_TYPES +from crc.models.workflow import WorkflowModel, WorkflowSpecModel from crc.services.file_service import FileService +from crc.services.spec_file_service import SpecFileService from crc.services.workflow_processor import WorkflowProcessor @@ -53,7 +56,6 @@ class TestFileService(BaseTest): def test_add_file_from_task_increments_version_and_replaces_on_subsequent_add(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -78,7 +80,6 @@ class TestFileService(BaseTest): def test_add_file_from_form_increments_version_and_replaces_on_subsequent_add_with_same_name(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -97,7 +98,6 @@ class TestFileService(BaseTest): def test_replace_archive_file_unarchives_the_file_and_updates(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -137,7 +137,6 @@ class TestFileService(BaseTest): def test_add_file_from_form_allows_multiple_files_with_different_names(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -161,7 +160,6 @@ class TestFileService(BaseTest): mock_github.return_value = FakeGithub() self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -185,7 +183,6 @@ class TestFileService(BaseTest): mock_github.return_value = FakeGithubCreates() self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -204,7 +201,6 @@ class TestFileService(BaseTest): mock_github.return_value = FakeGithub() self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -225,3 +221,43 @@ class TestFileService(BaseTest): branches = FileService.get_repo_branches() self.assertIsInstance(branches, list) + + def test_add_workflow_spec_file(self): + + self.load_example_data() + spec = db.session.query(WorkflowSpecModel).first() + + file_data = b"abcdef" + file_name = 'random_fact.svg' + content_type = CONTENT_TYPES[file_name[-3:]] + + # This creates a file on the filesystem + file_model = SpecFileService().add_workflow_spec_file(spec, file_name, content_type, file_data) + + # This reads from a file on the filesystem + spec_file_data = SpecFileService().get_spec_file_data(file_model.id).data + + self.assertEqual(file_data, spec_file_data) + + def test_delete_workflow_spec_file(self): + self.load_example_data() + file_model = session.query(FileModel).filter(column('workflow_spec_id').isnot(None)).first() + file_data_before = SpecFileService().get_spec_file_data(file_model.id).data + self.assertGreater(len(file_data_before), 0) + + SpecFileService().delete_spec_file(file_model.id) + + with self.assertRaises(ApiError) as ae: + SpecFileService().get_spec_file_data(file_model.id) + + self.assertIn('No model found for file with file_id', ae.exception.message) + print('test_delete_workflow_spec_file') + + def test_get_spec_files(self): + self.load_example_data() + spec = session.query(WorkflowSpecModel.id).first() + spec_files = SpecFileService().get_spec_files(spec.id) + workflow = session.query(WorkflowModel).first() + processor = WorkflowProcessor(workflow) + self.assertIsInstance(processor, WorkflowProcessor) + print('test_get_spec_files') diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py index 43bd3d0c..f4793a6a 100644 --- a/tests/files/test_files_api.py +++ b/tests/files/test_files_api.py @@ -5,15 +5,16 @@ import os from tests.base_test import BaseTest from crc import session, db, app -from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel +from crc.models.file import FileModel, FileType, FileModelSchema from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService +from crc.services.spec_file_service import SpecFileService from crc.services.workflow_processor import WorkflowProcessor from crc.models.data_store import DataStoreModel from crc.services.document_service import DocumentService from example_data import ExampleDataLoader -from sqlalchemy import desc +from sqlalchemy import column class TestFilesApi(BaseTest): @@ -22,7 +23,7 @@ class TestFilesApi(BaseTest): self.load_example_data(use_crc_data=True) spec_id = 'core_info' spec = session.query(WorkflowSpecModel).filter_by(id=spec_id).first() - rv = self.app.get('/v1.0/file?workflow_spec_id=%s' % spec_id, + rv = self.app.get('/v1.0/spec_file?workflow_spec_id=%s' % spec_id, follow_redirects=True, content_type="application/json", headers=self.logged_in_headers()) self.assert_success(rv) @@ -35,23 +36,21 @@ class TestFilesApi(BaseTest): def test_list_multiple_files_for_workflow_spec(self): self.load_example_data() spec = self.load_test_spec("random_fact") - svgFile = FileModel(name="test.svg", type=FileType.svg, - primary=False, workflow_spec_id=spec.id) - session.add(svgFile) - session.flush() - rv = self.app.get('/v1.0/file?workflow_spec_id=%s' % spec.id, + data = {'file': (io.BytesIO(b"abcdef"), 'test.svg')} + self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/spec_file?workflow_spec_id=%s' % spec.id, follow_redirects=True, content_type="application/json", headers=self.logged_in_headers()) self.assert_success(rv) json_data = json.loads(rv.get_data(as_text=True)) self.assertEqual(3, len(json_data)) - - def test_create_file(self): + def test_create_spec_file(self): self.load_example_data() spec = session.query(WorkflowSpecModel).first() data = {'file': (io.BytesIO(b"abcdef"), 'random_fact.svg')} - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, + rv = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) @@ -87,7 +86,6 @@ class TestFilesApi(BaseTest): def test_archive_file_no_longer_shows_up(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) processor.do_engine_steps() @@ -115,13 +113,14 @@ class TestFilesApi(BaseTest): self.assert_success(rv) self.assertEqual(0, len(json.loads(rv.get_data(as_text=True)))) - def test_set_reference_file(self): + def test_update_reference_file_data(self): + self.load_example_data() file_name = "documents.xlsx" filepath = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx') with open(filepath, 'rb') as myfile: file_data = myfile.read() data = {'file': (io.BytesIO(file_data), file_name)} - rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + rv = self.app.put('/v1.0/reference_file/%s/data' % file_name, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) self.assertIsNotNone(rv.get_data()) @@ -130,28 +129,42 @@ class TestFilesApi(BaseTest): self.assertEqual(FileType.xlsx, file.type) self.assertTrue(file.is_reference) self.assertEqual("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", file.content_type) - self.assertEqual('dhf8r', json_data['user_uid']) + # self.assertEqual('dhf8r', json_data['user_uid']) def test_set_reference_file_bad_extension(self): file_name = DocumentService.DOCUMENT_LIST data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")} - rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + rv = self.app.put('/v1.0/reference_file/%s/data' % file_name, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_failure(rv, error_code="invalid_file_type") - def test_get_reference_file(self): + def test_get_reference_file_data(self): + ExampleDataLoader().load_reference_documents() file_name = "irb_document_types.xls" filepath = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx') - with open(filepath, 'rb') as myfile: - file_data = myfile.read() + with open(filepath, 'rb') as f_open: + file_data = f_open.read() data = {'file': (io.BytesIO(file_data), file_name)} - rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - rv = self.app.get('/v1.0/reference_file/%s' % file_name, headers=self.logged_in_headers()) + self.app.post('/v1.0/reference_file', data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/reference_file/%s/data' % file_name, headers=self.logged_in_headers()) self.assert_success(rv) data_out = rv.get_data() self.assertEqual(file_data, data_out) + def test_get_reference_file_info(self): + self.load_example_data() + reference_file_model = session.query(FileModel).filter(FileModel.is_reference==True).first() + name = reference_file_model.name + rv = self.app.get('/v1.0/reference_file/%s' % name, headers=self.logged_in_headers()) + self.assert_success(rv) + self.assertIsNotNone(rv.get_data()) + json_data = json.loads(rv.get_data(as_text=True)) + + self.assertEqual(reference_file_model.name, json_data['name']) + self.assertEqual(reference_file_model.type.value, json_data['type']) + self.assertEqual(reference_file_model.id, json_data['id']) + def test_add_reference_file(self): ExampleDataLoader().load_reference_documents() @@ -167,6 +180,18 @@ class TestFilesApi(BaseTest): self.assertFalse(file.primary) self.assertEqual(True, file.is_reference) + def test_delete_reference_file(self): + ExampleDataLoader().load_reference_documents() + reference_file = session.query(FileModel).filter(FileModel.is_reference == True).first() + rv = self.app.get('/v1.0/reference_file/%s' % reference_file.name, headers=self.logged_in_headers()) + self.assert_success(rv) + self.app.delete('/v1.0/reference_file/%s' % reference_file.name, headers=self.logged_in_headers()) + db.session.flush() + rv = self.app.get('/v1.0/reference_file/%s' % reference_file.name, headers=self.logged_in_headers()) + self.assertEqual(404, rv.status_code) + self.assertIsNotNone(rv.get_data()) + json_data = json.loads(rv.get_data(as_text=True)) + self.assertIn('The reference file name you provided', json_data['message']) def test_list_reference_files(self): ExampleDataLoader.clean_db() @@ -176,7 +201,7 @@ class TestFilesApi(BaseTest): with open(filepath, 'rb') as myfile: file_data = myfile.read() data = {'file': (io.BytesIO(file_data), file_name)} - rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + rv = self.app.post('/v1.0/reference_file', data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) rv = self.app.get('/v1.0/reference_file', @@ -191,21 +216,29 @@ class TestFilesApi(BaseTest): def test_update_file_info(self): self.load_example_data() - self.create_reference_document() - file: FileModel = session.query(FileModel).filter(FileModel.is_reference==False).first() - file.name = "silly_new_name.bpmn" + file: FileModel = session.query(FileModel).filter(column('workflow_spec_id').isnot(None)).first() + file_model = FileModel(id=file.id, + name="silly_new_name.bpmn", + type=file.type, + content_type=file.content_type, + is_reference=file.is_reference, + primary=file.primary, + primary_process_id=file.primary_process_id, + workflow_id=file.workflow_id, + workflow_spec_id=file.workflow_spec_id, + archived=file.archived) + # file.name = "silly_new_name.bpmn" - rv = self.app.put('/v1.0/file/%i' % file.id, + rv = self.app.put('/v1.0/spec_file/%i' % file.id, content_type="application/json", - data=json.dumps(FileModelSchema().dump(file)), headers=self.logged_in_headers()) + data=json.dumps(FileModelSchema().dump(file_model)), headers=self.logged_in_headers()) self.assert_success(rv) db_file = session.query(FileModel).filter_by(id=file.id).first() self.assertIsNotNone(db_file) - self.assertEqual(file.name, db_file.name) + self.assertEqual("silly_new_name.bpmn", db_file.name) def test_load_valid_url_for_files(self): self.load_example_data() - self.create_reference_document() file: FileModel = session.query(FileModel).filter(FileModel.is_reference == False).first() rv = self.app.get('/v1.0/file/%i' % file.id, content_type="application/json", headers=self.logged_in_headers()) self.assert_success(rv) @@ -220,55 +253,44 @@ class TestFilesApi(BaseTest): spec = session.query(WorkflowSpecModel).first() data = {} data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - file_json = json.loads(rv.get_data(as_text=True)) - self.assertEqual(80, file_json['size']) + rv_1 = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + file_json_1 = json.loads(rv_1.get_data(as_text=True)) + self.assertEqual(80, file_json_1['size']) + + file_id = file_json_1['id'] + rv_2 = self.app.get('/v1.0/spec_file/%i/data' % file_id, headers=self.logged_in_headers()) + self.assert_success(rv_2) + rv_data_2 = rv_2.get_data() + self.assertIsNotNone(rv_data_2) + self.assertEqual(self.minimal_bpmn("abcdef"), rv_data_2) data['file'] = io.BytesIO(self.minimal_bpmn("efghijk")), 'my_new_file.bpmn' - rv = self.app.put('/v1.0/file/%i/data' % file_json['id'], data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assert_success(rv) - self.assertIsNotNone(rv.get_data()) - file_json = json.loads(rv.get_data(as_text=True)) - self.assertEqual(2, file_json['latest_version']) - self.assertEqual(FileType.bpmn.value, file_json['type']) - self.assertEqual("application/octet-stream", file_json['content_type']) - self.assertEqual(spec.id, file_json['workflow_spec_id']) + rv_3 = self.app.put('/v1.0/spec_file/%i/data' % file_id, data=data, follow_redirects=True, + content_type='multipart/form-data', headers=self.logged_in_headers()) + self.assert_success(rv_3) + self.assertIsNotNone(rv_3.get_data()) + file_json_3 = json.loads(rv_3.get_data(as_text=True)) + self.assertEqual(FileType.bpmn.value, file_json_3['type']) + self.assertEqual("application/octet-stream", file_json_3['content_type']) + self.assertEqual(spec.id, file_json_3['workflow_spec_id']) # Assure it is updated in the database and properly persisted. - file_model = session.query(FileModel).filter(FileModel.id == file_json['id']).first() - file_data = FileService.get_file_data(file_model.id) - self.assertEqual(2, file_data.version) + file_model = session.query(FileModel).filter(FileModel.id == file_id).first() + file_data = SpecFileService().get_spec_file_data(file_model.id) + self.assertEqual(81, len(file_data.data)) - rv = self.app.get('/v1.0/file/%i/data' % file_json['id'], headers=self.logged_in_headers()) - self.assert_success(rv) - data = rv.get_data() + rv_4 = self.app.get('/v1.0/spec_file/%i/data' % file_id, headers=self.logged_in_headers()) + self.assert_success(rv_4) + data = rv_4.get_data() self.assertIsNotNone(data) self.assertEqual(self.minimal_bpmn("efghijk"), data) - def test_update_with_same_exact_data_does_not_increment_version(self): - self.load_example_data() - spec = session.query(WorkflowSpecModel).first() - data = {} - data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assertIsNotNone(rv.get_data()) - json_data = json.loads(rv.get_data(as_text=True)) - self.assertEqual(1, json_data['latest_version']) - data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' - rv = self.app.put('/v1.0/file/%i/data' % json_data['id'], data=data, follow_redirects=True, - content_type='multipart/form-data', headers=self.logged_in_headers()) - self.assertIsNotNone(rv.get_data()) - json_data = json.loads(rv.get_data(as_text=True)) - self.assertEqual(1, json_data['latest_version']) - def test_get_file(self): self.load_example_data() spec = session.query(WorkflowSpecModel).first() file = session.query(FileModel).filter_by(workflow_spec_id=spec.id).first() - rv = self.app.get('/v1.0/file/%i/data' % file.id, headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/spec_file/%i/data' % file.id, headers=self.logged_in_headers()) self.assert_success(rv) self.assertEqual("text/xml; charset=utf-8", rv.content_type) self.assertTrue(rv.content_length > 1) @@ -337,16 +359,16 @@ class TestFilesApi(BaseTest): self.assertEqual('Ancillary Document', json_data['document']['category1']) self.assertEqual('Study Team', json_data['document']['who_uploads?']) - def test_delete_file(self): + def test_delete_spec_file(self): self.load_example_data() spec = session.query(WorkflowSpecModel).first() file = session.query(FileModel).filter_by(workflow_spec_id=spec.id).first() file_id = file.id - rv = self.app.get('/v1.0/file/%i' % file.id, headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/spec_file/%i' % file.id, headers=self.logged_in_headers()) self.assert_success(rv) - rv = self.app.delete('/v1.0/file/%i' % file.id, headers=self.logged_in_headers()) + self.app.delete('/v1.0/spec_file/%i' % file.id, headers=self.logged_in_headers()) db.session.flush() - rv = self.app.get('/v1.0/file/%i' % file_id, headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/spec_file/%i' % file_id, headers=self.logged_in_headers()) self.assertEqual(404, rv.status_code) def test_change_primary_bpmn(self): @@ -356,7 +378,7 @@ class TestFilesApi(BaseTest): data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' # Add a new BPMN file to the specification - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, + rv = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) self.assertIsNotNone(rv.get_data()) @@ -367,11 +389,11 @@ class TestFilesApi(BaseTest): orig_model = session.query(FileModel). \ filter(FileModel.primary == True). \ filter(FileModel.workflow_spec_id == spec.id).first() - rv = self.app.delete('/v1.0/file?file_id=%s' % orig_model.id, headers=self.logged_in_headers()) + rv = self.app.delete('/v1.0/spec_file?file_id=%s' % orig_model.id, headers=self.logged_in_headers()) # Set that new file to be the primary BPMN, assure it has a primary_process_id file.primary = True - rv = self.app.put('/v1.0/file/%i' % file.id, + rv = self.app.put('/v1.0/spec_file/%i' % file.id, content_type="application/json", data=json.dumps(FileModelSchema().dump(file)), headers=self.logged_in_headers()) self.assert_success(rv) @@ -385,7 +407,7 @@ class TestFilesApi(BaseTest): # Add file data = {'file': (io.BytesIO(b'asdf'), 'test_file.xlsx')} - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, + rv = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True, content_type='multipart/form-data', @@ -401,14 +423,14 @@ class TestFilesApi(BaseTest): session.commit() # Assert we have the correct file data and the file is archived - file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model.id).first() + file_data_model = SpecFileService().get_spec_file_data(file_model.id) self.assertEqual(b'asdf', file_data_model.data) file_model = session.query(FileModel).filter_by(id=file_model.id).first() self.assertEqual(True, file_model.archived) # Upload file with same name data = {'file': (io.BytesIO(b'xyzpdq'), 'test_file.xlsx')} - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % workflow_spec_model.id, + rv = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % workflow_spec_model.id, data=data, follow_redirects=True, content_type='multipart/form-data', @@ -419,7 +441,7 @@ class TestFilesApi(BaseTest): file_id = file_json['id'] # Assert we have the correct file data and the file is *not* archived - file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_id).order_by(desc(FileDataModel.version)).first() + file_data_model = SpecFileService().get_spec_file_data(file_id) self.assertEqual(b'xyzpdq', file_data_model.data) file_model = session.query(FileModel).filter_by(id=file_id).first() self.assertEqual(False, file_model.archived) diff --git a/tests/ldap/test_ldap_lookup_script.py b/tests/ldap/test_ldap_lookup_script.py index e7bdf658..9e6d035c 100644 --- a/tests/ldap/test_ldap_lookup_script.py +++ b/tests/ldap/test_ldap_lookup_script.py @@ -7,14 +7,12 @@ from crc.services.workflow_service import WorkflowService from crc.models.user import UserModel from crc.services.workflow_processor import WorkflowProcessor from crc.scripts.ldap import Ldap -from crc.api.common import ApiError class TestLdapLookupScript(BaseTest): def test_get_existing_user_details(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('empty_workflow') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -35,7 +33,6 @@ class TestLdapLookupScript(BaseTest): def test_get_invalid_user_details(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('empty_workflow') processor = WorkflowProcessor(workflow) task = processor.next_task() @@ -50,7 +47,6 @@ class TestLdapLookupScript(BaseTest): def test_get_current_user_details(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('empty_workflow') processor = WorkflowProcessor(workflow) task = processor.next_task() diff --git a/tests/study/test_study_api.py b/tests/study/test_study_api.py index 0de5c6ef..023433ad 100644 --- a/tests/study/test_study_api.py +++ b/tests/study/test_study_api.py @@ -78,7 +78,6 @@ class TestStudyApi(BaseTest): # Set up the study and attach a file to it. self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('file_upload_form') processor = WorkflowProcessor(workflow) task = processor.next_task() diff --git a/tests/study/test_study_associate_script.py b/tests/study/test_study_associate_script.py index 80b2f02a..496ffd37 100644 --- a/tests/study/test_study_associate_script.py +++ b/tests/study/test_study_associate_script.py @@ -4,7 +4,6 @@ from unittest.mock import patch import flask from crc.api.common import ApiError -from crc.services.user_service import UserService from crc import session, app from crc.models.study import StudyModel @@ -43,7 +42,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associate") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -81,7 +79,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associate_fail") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -99,7 +96,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associate_switch_user") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -121,7 +117,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associate_switch_user") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -153,7 +148,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associate_switch_user") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -182,7 +176,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_associates_delete") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) diff --git a/tests/study/test_study_data_store_script.py b/tests/study/test_study_data_store_script.py index 6f7924c5..9eebc46f 100644 --- a/tests/study/test_study_data_store_script.py +++ b/tests/study/test_study_data_store_script.py @@ -39,7 +39,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors_data_store") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) diff --git a/tests/study/test_study_details.py b/tests/study/test_study_details.py index 87c813ee..a8e736d4 100644 --- a/tests/study/test_study_details.py +++ b/tests/study/test_study_details.py @@ -18,7 +18,6 @@ class TestStudyDetailsScript(BaseTest): def setUp(self): self.load_example_data() - self.create_reference_document() self.study = session.query(StudyModel).first() self.workflow_spec_model = self.load_test_spec("two_forms") self.workflow_model = StudyService._create_workflow_model(self.study, self.workflow_spec_model) diff --git a/tests/study/test_study_details_documents.py b/tests/study/test_study_details_documents.py index e81de5e5..15655efc 100644 --- a/tests/study/test_study_details_documents.py +++ b/tests/study/test_study_details_documents.py @@ -34,7 +34,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -60,7 +59,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -82,7 +80,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.ok = True mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -98,7 +95,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.ok = True mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -120,7 +116,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.ok = True mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) @@ -143,7 +138,6 @@ class TestStudyDetailsDocumentsScript(BaseTest): mock_get.return_value.ok = True mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("two_forms") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) diff --git a/tests/study/test_study_service.py b/tests/study/test_study_service.py index c094c9db..87fe84f7 100644 --- a/tests/study/test_study_service.py +++ b/tests/study/test_study_service.py @@ -90,7 +90,6 @@ class TestStudyService(BaseTest): # self.assertEqual(WorkflowStatus.user_input_required, workflow.status) self.assertTrue(workflow.total_tasks > 0) self.assertEqual(0, workflow.completed_tasks) - self.assertIsNotNone(workflow.spec_version) # Complete a task task = processor.next_task() diff --git a/tests/study/test_study_sponsors_script.py b/tests/study/test_study_sponsors_script.py index ab5e6f3b..0b0c4b19 100644 --- a/tests/study/test_study_sponsors_script.py +++ b/tests/study/test_study_sponsors_script.py @@ -35,7 +35,6 @@ class TestSudySponsorsScript(BaseTest): app.config['PB_ENABLED'] = True self.load_example_data() - self.create_reference_document() study = session.query(StudyModel).first() workflow_spec_model = self.load_test_spec("study_sponsors") workflow_model = StudyService._create_workflow_model(study, workflow_spec_model) diff --git a/tests/study/test_update_study_script.py b/tests/study/test_update_study_script.py index 62444dd4..b3d2132d 100644 --- a/tests/study/test_update_study_script.py +++ b/tests/study/test_update_study_script.py @@ -9,7 +9,6 @@ class TestUpdateStudyScript(BaseTest): def test_do_task(self): self.load_example_data() - self.create_reference_document() workflow = self.create_workflow('empty_workflow') processor = WorkflowProcessor(workflow) task = processor.next_task() diff --git a/tests/test_auto_set_primary_bpmn.py b/tests/test_auto_set_primary_bpmn.py index b4c22e94..2456105d 100644 --- a/tests/test_auto_set_primary_bpmn.py +++ b/tests/test_auto_set_primary_bpmn.py @@ -30,7 +30,7 @@ class TestAutoSetPrimaryBPMN(BaseTest): data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' # Add a new BPMN file to the specification - rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % db_spec.id, data=data, follow_redirects=True, + rv = self.app.post('/v1.0/spec_file?workflow_spec_id=%s' % db_spec.id, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) file_id = rv.json['id'] diff --git a/tests/test_file_datastore.py b/tests/test_file_datastore.py index 1b311e04..9db66491 100644 --- a/tests/test_file_datastore.py +++ b/tests/test_file_datastore.py @@ -14,7 +14,6 @@ class TestFileDatastore(BaseTest): def test_file_datastore_workflow(self): self.load_example_data() - self.create_reference_document() # we need to create a file with an IRB code # for this study workflow = self.create_workflow('file_data_store') diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index 1aa58613..e55b351e 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -6,7 +6,10 @@ from crc.services.file_service import FileService from crc.api.common import ApiError from crc import session, app from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel, CONTENT_TYPES +from crc.models.workflow import WorkflowSpecModel from crc.services.lookup_service import LookupService +from crc.services.reference_file_service import ReferenceFileService +from crc.services.spec_file_service import SpecFileService from crc.services.workflow_processor import WorkflowProcessor @@ -49,7 +52,13 @@ class TestLookupService(BaseTest): file_path = os.path.join(app.root_path, '..', 'tests', 'data', 'enum_options_with_search', 'sponsors_modified.xlsx') file = open(file_path, 'rb') - FileService.update_file(file_model, file.read(), CONTENT_TYPES['xlsx']) + if file_model.workflow_spec_id is not None: + workflow_spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==file_model.workflow_spec_id).first() + SpecFileService().update_spec_file_data(workflow_spec_model, file_model.name, file.read()) + elif file_model.is_reference: + ReferenceFileService().update_reference_file(file_model, file.read()) + else: + FileService.update_file(file_model, file.read(), CONTENT_TYPES['xlsx']) file.close() # restart the workflow, so it can pick up the changes. @@ -182,15 +191,17 @@ class TestLookupService(BaseTest): # Using an old xls file should raise an error file_model_xls = session.query(FileModel).filter(FileModel.name == 'sponsors.xls').first() - file_data_model_xls = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xls.id).first() + file_data_xls = SpecFileService().get_spec_file_data(file_model_xls.id) + # file_data_model_xls = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xls.id).first() with self.assertRaises(ApiError) as ae: - LookupService.build_lookup_table(file_data_model_xls, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') + LookupService.build_lookup_table(file_model_xls.id, 'sponsors.xls', file_data_xls.data, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') self.assertIn('Error opening excel file', ae.exception.args[0]) # Using an xlsx file should work file_model_xlsx = session.query(FileModel).filter(FileModel.name == 'sponsors.xlsx').first() - file_data_model_xlsx = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xlsx.id).first() - lookup_model = LookupService.build_lookup_table(file_data_model_xlsx, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') + file_data_xlsx = SpecFileService().get_spec_file_data(file_model_xlsx.id) + # file_data_model_xlsx = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xlsx.id).first() + lookup_model = LookupService.build_lookup_table(file_model_xlsx.id, 'sponsors.xlsx', file_data_xlsx.data, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') self.assertEqual(28, len(lookup_model.dependencies)) self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys()) self.assertIn('CUSTOMER_NUMBER', lookup_model.dependencies[0].data.keys()) diff --git a/tests/test_multi_instance_tasks_api.py b/tests/test_multi_instance_tasks_api.py index ca5100f5..68b56c0e 100644 --- a/tests/test_multi_instance_tasks_api.py +++ b/tests/test_multi_instance_tasks_api.py @@ -4,7 +4,7 @@ from unittest.mock import patch from tests.base_test import BaseTest -from crc import session, app +from crc import app from crc.models.api_models import WorkflowApiSchema, MultiInstanceType from crc.models.workflow import WorkflowStatus from example_data import ExampleDataLoader @@ -28,15 +28,15 @@ class TestMultiinstanceTasksApi(BaseTest): workflow = self.create_workflow('multi_instance') # get the first form in the two form workflow. - workflow = self.get_workflow_api(workflow) - navigation = self.get_workflow_api(workflow).navigation + workflow_api = self.get_workflow_api(workflow) + navigation = self.get_workflow_api(workflow_api).navigation self.assertEqual(5, len(navigation)) # Start task, form_task, multi_task, end task - self.assertEqual("UserTask", workflow.next_task.type) - self.assertEqual(MultiInstanceType.sequential.value, workflow.next_task.multi_instance_type) - self.assertEqual(5, workflow.next_task.multi_instance_count) + self.assertEqual("UserTask", workflow_api.next_task.type) + self.assertEqual(MultiInstanceType.sequential.value, workflow_api.next_task.multi_instance_type) + self.assertEqual(5, workflow_api.next_task.multi_instance_count) # Assure that the names for each task are properly updated, so they aren't all the same. - self.assertEqual("Primary Investigator", workflow.next_task.title) + self.assertEqual("Primary Investigator", workflow_api.next_task.title) @patch('crc.services.protocol_builder.requests.get') diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index b663207a..7a9325a9 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -1,16 +1,14 @@ import json import os -import random -from unittest.mock import patch from tests.base_test import BaseTest from crc import session, app -from crc.models.api_models import WorkflowApiSchema, MultiInstanceType, TaskSchema +from crc.models.api_models import WorkflowApiSchema from crc.models.file import FileModelSchema from crc.models.workflow import WorkflowStatus from crc.models.task_event import TaskEventModel -from SpiffWorkflow.bpmn.PythonScriptEngine import Box + class TestTasksApi(BaseTest): @@ -185,34 +183,33 @@ class TestTasksApi(BaseTest): def test_load_workflow_from_outdated_spec(self): # Start the basic two_forms workflow and complete a task. workflow = self.create_workflow('two_forms') - workflow_api = self.get_workflow_api(workflow) - self.complete_form(workflow, workflow_api.next_task, {"color": "blue"}) - self.assertTrue(workflow_api.is_latest_spec) + workflow_api_1 = self.get_workflow_api(workflow) + self.complete_form(workflow, workflow_api_1.next_task, {"color": "blue"}) # Modify the specification, with a major change that alters the flow and can't be deserialized # effectively, if it uses the latest spec files. file_path = os.path.join(app.root_path, '..', 'tests', 'data', 'two_forms', 'modified', 'two_forms_struc_mod.bpmn') self.replace_file("two_forms.bpmn", file_path) - workflow_api = self.get_workflow_api(workflow) - self.assertTrue(workflow_api.spec_version.startswith("v1 ")) - self.assertFalse(workflow_api.is_latest_spec) + # This should use the original workflow spec, and just move to the next task + workflow_api_2 = self.get_workflow_api(workflow) + self.assertEqual('StepTwo', workflow_api_2.next_task.name) - workflow_api = self.restart_workflow_api(workflow_api, clear_data=True) - self.assertTrue(workflow_api.spec_version.startswith("v2 ")) - self.assertTrue(workflow_api.is_latest_spec) + workflow_api_3 = self.restart_workflow_api(workflow_api_2, clear_data=True) + # This should restart the workflow and we should be back on StepOne + self.assertEqual('StepOne', workflow_api_3.next_task.name) # Assure this hard_reset sticks (added this after a bug was found) - workflow_api = self.get_workflow_api(workflow) - self.assertTrue(workflow_api.spec_version.startswith("v2 ")) - self.assertTrue(workflow_api.is_latest_spec) + # Again, we should be on StepOne + workflow_api_4 = self.get_workflow_api(workflow) + self.assertEqual('StepOne', workflow_api_4.next_task.name) def test_reset_workflow_from_broken_spec(self): # Start the basic two_forms workflow and complete a task. workflow = self.create_workflow('two_forms') workflow_api = self.get_workflow_api(workflow) self.complete_form(workflow, workflow_api.next_task, {"color": "blue"}) - self.assertTrue(workflow_api.is_latest_spec) + # self.assertTrue(workflow_api.is_latest_spec) # Break the bpmn json workflow.bpmn_workflow_json = '{"something":"broken"}' @@ -227,12 +224,10 @@ class TestTasksApi(BaseTest): workflow_api = self.restart_workflow_api(workflow_api, clear_data=True) self.assertIsNotNone(workflow_api) - - def test_manual_task_with_external_documentation(self): workflow = self.create_workflow('manual_task_with_external_documentation') - # get the first form in the two form workflow. + # Complete the form in the workflow. task = self.get_workflow_api(workflow).next_task workflow_api = self.complete_form(workflow, task, {"name": "Dan"}) diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py deleted file mode 100644 index d2c71100..00000000 --- a/tests/test_workflow_sync.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest import mock -from unittest.mock import patch - -from tests.base_test import BaseTest - -from crc import db -from crc.api.workflow_sync import get_all_spec_state, \ - get_changed_workflows, \ - get_workflow_spec_files, \ - get_changed_files, \ - get_workflow_specification, \ - sync_changed_files -from crc.models.workflow import WorkflowSpecModel -from datetime import datetime -from crc.services.file_service import FileService -from crc.services.workflow_sync import WorkflowSyncService - -def get_random_fact_pos(othersys): - """ - Make sure we get the 'random_fact' workflow spec - no matter what order it is in - """ - rf2pos = 0 - for pos in range(len(othersys)): - if othersys[pos]['workflow_spec_id'] == 'random_fact': - rf2pos = pos - return rf2pos - - -def get_random_fact_2_pos(othersys): - """ - Makes sure we get the random_fact2.bpmn file no matter what order it is in - """ - rf2pos = 0 - for pos in range(len(othersys)): - if othersys[pos]['filename'] == 'random_fact2.bpmn': - rf2pos = pos - return rf2pos - - -class TestWorkflowSync(BaseTest): - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') - def test_get_no_changes(self, mock_get): - self.load_example_data() - othersys = get_all_spec_state() - mock_get.return_value = othersys - response = get_changed_workflows('localhost:0000') # not actually used due to mock - self.assertIsNotNone(response) - self.assertEqual(response,[]) - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') - def test_remote_workflow_change(self, mock_get): - self.load_example_data() - othersys = get_all_spec_state() - rf2pos = get_random_fact_pos(othersys) - othersys[rf2pos]['date_created'] = str(datetime.utcnow()) - othersys[rf2pos]['md5_hash'] = '12345' - mock_get.return_value = othersys - response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0]['workflow_spec_id'], 'random_fact') - self.assertEqual(response[0]['location'], 'remote') - self.assertEqual(response[0]['new'], False) - - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') - def test_remote_workflow_has_new(self, mock_get): - self.load_example_data() - othersys = get_all_spec_state() - othersys.append({'workflow_spec_id':'my_new_workflow', - 'date_created':str(datetime.utcnow()), - 'md5_hash': '12345'}) - mock_get.return_value = othersys - response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0]['workflow_spec_id'],'my_new_workflow') - self.assertEqual(response[0]['location'], 'remote') - self.assertEqual(response[0]['new'], True) - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') - def test_local_workflow_has_new(self, mock_get): - self.load_example_data() - - othersys = get_all_spec_state() - mock_get.return_value = othersys - wf_spec = WorkflowSpecModel() - wf_spec.id = 'abcdefg' - wf_spec.display_name = 'New Workflow - Yum!!' - wf_spec.name = 'my_new_workflow' - wf_spec.description = 'yep - its a new workflow' - wf_spec.category_id = 0 - wf_spec.display_order = 0 - db.session.add(wf_spec) - db.session.commit() - FileService.add_workflow_spec_file(wf_spec,'dummyfile.txt','text',b'this is a test') - # after setting up the test - I realized that this doesn't return anything for - # a workflow that is new locally - it just returns nothing - response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock - self.assertIsNotNone(response) - self.assertEqual(response,[]) - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - def test_file_differences_clean_slate(self, mock_get): - """ This test is basically for coverage""" - self.load_example_data() - othersys = get_workflow_spec_files('random_fact') - mock_get.return_value = othersys - self.delete_example_data() - response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock - self.assertIsNotNone(response) - self.assertEqual(len(response),2) - self.assertEqual(response[0]['location'], 'remote') - self.assertEqual(response[0]['new'], True) - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - def test_file_differences(self, mock_get): - self.load_example_data() - othersys = get_workflow_spec_files('random_fact') - rf2pos = get_random_fact_2_pos(othersys) - othersys[rf2pos]['date_created'] = str(datetime.utcnow()) - othersys[rf2pos]['md5_hash'] = '12345' - mock_get.return_value = othersys - response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') - self.assertEqual(response[0]['location'], 'remote') - self.assertEqual(response[0]['new'], False) - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') - def test_workflow_differences(self, workflow_mock, spec_files_mock, file_data_mock): - self.load_example_data() - # make a remote workflow that is slightly different from local - remote_workflow = get_workflow_specification('random_fact') - self.assertEqual(remote_workflow['display_name'],'Random Fact') - remote_workflow['description'] = 'This Workflow came from Remote' - remote_workflow['display_name'] = 'Remote Workflow' - remote_workflow['library'] = True - workflow_mock.return_value = remote_workflow - # change the remote file date and hash - othersys = get_workflow_spec_files('random_fact') - rf2pos = get_random_fact_2_pos(othersys) - othersys[rf2pos]['date_created'] = str(datetime.utcnow()) - othersys[rf2pos]['md5_hash'] = '12345' - spec_files_mock.return_value = othersys - # actually go get a different file - file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn') - response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock - # now make sure that everything gets pulled over - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0], 'random_fact2.bpmn') - files = FileService.get_spec_data_files('random_fact') - md5sums = [str(f.md5_hash) for f in files] - self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True) - new_local_workflow = get_workflow_specification('random_fact') - self.assertEqual(new_local_workflow['display_name'],'Remote Workflow') - self.assertTrue(new_local_workflow['library']) - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - def test_workflow_sync_with_libraries(self, get_remote_workflow_spec_files_mock, get_remote_file_by_hash_mock): - self.load_example_data() - # make a remote workflow that is slightly different from local, and add a library to it. - remote_workflow = get_workflow_specification('random_fact') - remote_library = self.load_test_spec('two_forms') - remote_workflow['description'] = 'This Workflow came from Remote' - remote_workflow['libraries'] = [{'id': remote_library.id, 'name': 'two_forms', 'display_name': "Two Forms"}] - - random_workflow_remote_files = get_workflow_spec_files('random_fact') - rf2pos = get_random_fact_2_pos(random_workflow_remote_files) - random_workflow_remote_files[rf2pos]['date_created'] = str(datetime.utcnow()) - random_workflow_remote_files[rf2pos]['md5_hash'] = '12345' - get_remote_workflow_spec_files_mock.return_value = random_workflow_remote_files - get_remote_file_by_hash_mock.return_value = self.workflow_sync_response('random_fact2.bpmn') - - # more mock stuff, but we need to return different things depending on what is asked, so we use the side - # effect pattern rather than setting a single return_value through a patch. - def mock_workflow_spec(*args): - if args[1] == 'random_fact': - return remote_workflow - else: - return get_workflow_specification(args[1]) - - with mock.patch.object(WorkflowSyncService, 'get_remote_workflow_spec', side_effect=mock_workflow_spec): - response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock - - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0], 'random_fact2.bpmn') - files = FileService.get_spec_data_files('random_fact') - md5sums = [str(f.md5_hash) for f in files] - self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True) - new_local_workflow = get_workflow_specification('random_fact') - self.assertEqual(new_local_workflow['display_name'],'Random Fact') - self.assertEqual(1, len(new_local_workflow['libraries'])) - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - def test_ref_file_differences(self, spec_files_mock, file_data_mock): - """ - Make sure we copy over a new reference file if it exists - """ - self.load_example_data() - # make a remote workflow that is slightly different from local - othersys = get_workflow_spec_files('REFERENCE_FILES') - newfile = {'file_model_id':9999, - 'workflow_spec_id': None, - 'filename':'test.txt', - 'type':'txt', - 'primary':False, - 'content_type':'text/text', - 'primary_process_id':None, - 'date_created':str(datetime.utcnow()), - 'md5_hash':'12345' - } - othersys.append(newfile) - spec_files_mock.return_value = othersys - # actually go get a different file - file_data_mock.return_value = self.workflow_sync_response('test.txt') - response = sync_changed_files('localhost:0000','REFERENCE_FILES') # endpoint not used due to mock - # now make sure that everything gets pulled over - self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0], 'test.txt') - ref_file = FileService.get_reference_file_data('test.txt') - self.assertEqual('24a2ab0d-1138-a80a-0b98-ed38894f5a04',str(ref_file.md5_hash)) - - - - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') - def test_file_deleted(self, workflow_mock, spec_files_mock): - self.load_example_data() - remote_workflow = get_workflow_specification('random_fact') - workflow_mock.return_value = remote_workflow - othersys = get_workflow_spec_files('random_fact') - rf2pos = get_random_fact_2_pos(othersys) - del(othersys[rf2pos]) - spec_files_mock.return_value = othersys - response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock - self.assertIsNotNone(response) - # when we delete a local file, we do not return that it was deleted - just - # a list of updated files. We may want to change this in the future. - self.assertEqual(len(response),0) - files = FileService.get_spec_data_files('random_fact') - self.assertEqual(len(files),1) - diff --git a/tests/workflow/test_duplicate_workflow_spec_file.py b/tests/workflow/test_duplicate_workflow_spec_file.py index 0e4d82fc..ef183a42 100644 --- a/tests/workflow/test_duplicate_workflow_spec_file.py +++ b/tests/workflow/test_duplicate_workflow_spec_file.py @@ -2,7 +2,7 @@ from tests.base_test import BaseTest from crc import session from crc.api.common import ApiError from crc.models.workflow import WorkflowSpecModel -from crc.services.file_service import FileService +from crc.services.spec_file_service import SpecFileService class TestDuplicateWorkflowSpecFile(BaseTest): @@ -15,7 +15,7 @@ class TestDuplicateWorkflowSpecFile(BaseTest): spec = session.query(WorkflowSpecModel).first() # Add a file - file_model = FileService.add_workflow_spec_file(spec, + file_model = SpecFileService.add_workflow_spec_file(spec, name="something.png", content_type="text", binary_data=b'1234') @@ -24,7 +24,7 @@ class TestDuplicateWorkflowSpecFile(BaseTest): # Try to add it again try: - FileService.add_workflow_spec_file(spec, + SpecFileService.add_workflow_spec_file(spec, name="something.png", content_type="text", binary_data=b'5678') diff --git a/tests/workflow/test_workflow_processor.py b/tests/workflow/test_workflow_processor.py index e82558a4..91a56094 100644 --- a/tests/workflow/test_workflow_processor.py +++ b/tests/workflow/test_workflow_processor.py @@ -14,7 +14,6 @@ from crc.models.file import FileModel, FileDataModel from crc.models.protocol_builder import ProtocolBuilderCreatorStudySchema from crc.models.study import StudyModel from crc.models.workflow import WorkflowSpecModel, WorkflowStatus -from crc.services.file_service import FileService from crc.services.study_service import StudyService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_service import WorkflowService @@ -247,27 +246,6 @@ class TestWorkflowProcessor(BaseTest): self.assertIn("last_updated", task.data["StudyInfo"]["info"]) self.assertIn("sponsor", task.data["StudyInfo"]["info"]) - def test_spec_versioning(self): - self.load_example_data() - study = session.query(StudyModel).first() - workflow_spec_model = self.load_test_spec("decision_table") - processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_version_string().startswith('v1.1')) - file_service = FileService() - - file_service.add_workflow_spec_file(workflow_spec_model, "new_file.txt", "txt", b'blahblah') - processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_version_string().startswith('v1.1.1')) - - file_path = os.path.join(app.root_path, '..', 'tests', 'data', 'docx', 'docx.bpmn') - file = open(file_path, "rb") - data = file.read() - - file_model = db.session.query(FileModel).filter(FileModel.name == "decision_table.bpmn").first() - file_service.update_file(file_model, data, "txt") - processor = self.get_processor(study, workflow_spec_model) - self.assertTrue(processor.get_version_string().startswith('v2.1.1')) - def test_hard_reset(self): self.load_example_data() # Start the two_forms workflow, and enter some data in the first form. @@ -291,14 +269,14 @@ class TestWorkflowProcessor(BaseTest): db.session.add(processor.workflow_model) ## Assure this isn't transient, which was causing some errors. self.assertIsNotNone(processor.workflow_model.bpmn_workflow_json) processor2 = WorkflowProcessor(processor.workflow_model) - self.assertFalse(processor2.is_latest_spec) # Still at version 1. + # self.assertFalse(processor2.is_latest_spec) # Still at version 1. # Do a hard reset, which should bring us back to the beginning, but retain the data. processor2 = WorkflowProcessor.reset(processor2.workflow_model) processor3 = WorkflowProcessor(processor.workflow_model) processor3.do_engine_steps() self.assertEqual("Step 1", processor3.next_task().task_spec.description) - self.assertTrue(processor3.is_latest_spec) # Now at version 2. + # self.assertTrue(processor3.is_latest_spec) # Now at version 2. task = processor3.next_task() task.data = {"color": "blue"} processor3.complete_task(task)