diff --git a/crc/api.yml b/crc/api.yml index 980c4f23..f426c536 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -538,6 +538,67 @@ paths: format: binary example: '' # /v1.0/workflow/0 + /reference_file: + get: + operationId: crc.api.file.get_reference_files + summary: Provides a list of existing reference files that are available in the system. + tags: + - Files + responses: + '200': + description: An array of file descriptions (not the file content) + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/File" + /reference_file/{name}: + parameters: + - name: name + in: path + required: true + description: The special name of the reference file. + schema: + type: string + get: + operationId: crc.api.file.get_reference_file + summary: Reference files are called by name rather than by id. + tags: + - Files + responses: + '200': + description: Returns the actual file + content: + application/octet-stream: + schema: + type: string + format: binary + example: '' + put: + operationId: crc.api.file.set_reference_file + summary: Update the contents of a named reference file. + tags: + - Files + requestBody: + content: + multipart/form-data: + schema: + type: object + properties: + file: + type: string + format: binary + responses: + '200': + description: Returns the actual file + content: + application/octet-stream: + schema: + type: string + format: binary + example: '' + # /v1.0/workflow/0 /workflow/{workflow_id}: parameters: - name: workflow_id diff --git a/crc/api/file.py b/crc/api/file.py index 5e4b7c48..ee2a2a82 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -1,3 +1,4 @@ +import enum import io import os from datetime import datetime @@ -21,6 +22,10 @@ def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=No results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key) return FileModelSchema(many=True).dump(results) +def get_reference_files(): + results = FileService.get_files(is_reference=True) + return FileModelSchema(many=True).dump(results) + def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key]) @@ -43,6 +48,42 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non return FileModelSchema().dump(file_model) +def get_reference_file(name): + file_data = FileService.get_reference_file_data(name) + return send_file( + io.BytesIO(file_data.data), + attachment_filename=file_data.file_model.name, + mimetype=file_data.file_model.content_type, + cache_timeout=-1 # Don't cache these files on the browser. + ) + + +def set_reference_file(name): + """Uses the file service to manage reference-files. They will be used in script tasks to compute values.""" + if 'file' not in connexion.request.files: + raise ApiError('invalid_file', + 'Expected a file named "file" in the multipart form request', status_code=400) + + file = connexion.request.files['file'] + + name_extension = FileService.get_extension(name) + file_extension = FileService.get_extension(file.filename) + if name_extension != file_extension: + raise ApiError('invalid_file_type', + "The file you uploaded has an extension '%s', but it should have an extension of '%s' " % + (file_extension, name_extension)) + + + file_models = FileService.get_files(name=name, is_reference=True) + if len(file_models) == 0: + file_model = FileService.add_reference_file(name, file.content_type, file.stream.read()) + else: + file_model = file_models[0] + FileService.update_file(file_models[0], file.stream.read(), file.content_type) + + return FileModelSchema().dump(file_model) + + def update_file_data(file_id): file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first() file = connexion.request.files['file'] diff --git a/crc/models/api_models.py b/crc/models/api_models.py index 6c4c7b2d..aa8fd0fe 100644 --- a/crc/models/api_models.py +++ b/crc/models/api_models.py @@ -107,6 +107,7 @@ class WorkflowApi(object): self.is_latest_spec = is_latest_spec self.is_active = is_active + class WorkflowApiSchema(ma.Schema): class Meta: model = WorkflowApi diff --git a/crc/models/file.py b/crc/models/file.py index a98ca68e..77e895a6 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -66,6 +66,12 @@ class FileDataModel(db.Model): class FileModel(db.Model): + """A file model defines one of the following increasingly specific types: + * A Reference file. Which just has a name and a reference flag set to true. These are global, and available everywhere. + * A Workflow Specification (such as BPMN or DMN model or a template) + * A Script generated file in a workflow. Which is specific to a study, workflow and task. + * An Uploaded file in a workflow. specific to a study, workflow, task, AND a field value. + """ __tablename__ = 'file' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String) @@ -73,6 +79,8 @@ class FileModel(db.Model): primary = db.Column(db.Boolean) is_status = db.Column(db.Boolean) content_type = db.Column(db.String) + is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file. + primary = db.Column(db.Boolean) # Is this the primary BPMN in a workflow? workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True) workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True) diff --git a/crc/services/file_service.py b/crc/services/file_service.py index aa0182d1..3042c12e 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -1,14 +1,13 @@ +import hashlib import os -from datetime import datetime from uuid import UUID from xml.etree import ElementTree from crc import session -from crc.api.common import ApiErrorSchema, ApiError -from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES +from crc.api.common import ApiError +from crc.models.file import FileType, FileDataModel, FileModel from crc.models.workflow import WorkflowSpecModel from crc.services.workflow_processor import WorkflowProcessor -import hashlib class FileService(object): @@ -55,25 +54,39 @@ class FileService(object): ) return FileService.update_file(file_model, binary_data, content_type) + @staticmethod + def add_reference_file(name, content_type, binary_data): + """Create a file with the given name, but not associated with a spec or workflow. + Only one file with the given reference name can exist.""" + file_model = FileModel( + name=name, + is_reference=True + ) + return FileService.update_file(file_model, binary_data, content_type) + + @staticmethod + def get_extension(file_name): + basename, file_extension = os.path.splitext(file_name) + return file_extension.lower().strip()[1:] + @staticmethod def update_file(file_model, binary_data, content_type): - file_data_model = session.query(FileDataModel).\ + file_data_model = session.query(FileDataModel). \ filter_by(file_model_id=file_model.id, version=file_model.latest_version ).with_for_update().first() md5_checksum = UUID(hashlib.md5(binary_data).hexdigest()) - if(file_data_model is not None and md5_checksum == file_data_model.md5_hash): + if (file_data_model is not None and md5_checksum == file_data_model.md5_hash): # This file does not need to be updated, it's the same file. return file_model # Verify the extension - basename, file_extension = os.path.splitext(file_model.name) - file_extension = file_extension.lower().strip()[1:] + file_extension = FileService.get_extension(file_model.name) if file_extension not in FileType._member_names_: - return ApiErrorSchema().dump(ApiError('unknown_extension', - 'The file you provided does not have an accepted extension:' + - file_extension)), 404 + raise ApiError('unknown_extension', + 'The file you provided does not have an accepted extension:' + + file_extension, status_code=404) else: file_model.type = FileType[file_extension] file_model.content_type = content_type @@ -93,8 +106,10 @@ class FileService(object): return file_model @staticmethod - def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): - query = session.query(FileModel) + def get_files(workflow_spec_id=None, + study_id=None, workflow_id=None, task_id=None, form_field_key=None, + name=None, is_reference=False): + query = session.query(FileModel).filter_by(is_reference=is_reference) if workflow_spec_id: query = query.filter_by(workflow_spec_id=workflow_spec_id) if study_id: @@ -105,15 +120,28 @@ class FileService(object): query = query.filter_by(task_id=str(task_id)) if form_field_key: query = query.filter_by(form_field_key=form_field_key) + if name: + query = query.filter_by(name=form_field_key) results = query.all() return results @staticmethod - def get_file_data(file_id): - """Returns the file_data that is associated with the file model id""" - file_model = session.query(FileModel).filter(FileModel.id == file_id).first() - return session.query(FileDataModel)\ - .filter(FileDataModel.file_model_id == file_id)\ - .filter(FileDataModel.version == file_model.latest_version)\ + def get_file_data(file_id, file_model=None): + """Returns the file_data that is associated with the file model id, if an actual file_model + is provided, uses that rather than looking it up again.""" + if file_model is None: + file_model = session.query(FileModel).filter(FileModel.id == file_id).first() + return session.query(FileDataModel) \ + .filter(FileDataModel.file_model_id == file_id) \ + .filter(FileDataModel.version == file_model.latest_version) \ .first() + + @staticmethod + def get_reference_file_data(file_name): + file_model = session.query(FileModel). \ + filter(FileModel.is_reference == True). \ + filter(FileModel.name == file_name).first() + if not file_model: + raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name) + return FileService.get_file_data(file_model.id, file_model) diff --git a/migrations/versions/0c8a2f8db28c_.py b/migrations/versions/0c8a2f8db28c_.py new file mode 100644 index 00000000..6a172260 --- /dev/null +++ b/migrations/versions/0c8a2f8db28c_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: 0c8a2f8db28c +Revises: 8856126b6658 +Create Date: 2020-03-13 14:05:46.983484 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '0c8a2f8db28c' +down_revision = '8856126b6658' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file', sa.Column('is_reference', sa.Boolean(), nullable=False)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('file', 'is_reference') + # ### end Alembic commands ### diff --git a/tests/data/reference/irb_documents.xlsx b/tests/data/reference/irb_documents.xlsx new file mode 100644 index 00000000..21ecc9da Binary files /dev/null and b/tests/data/reference/irb_documents.xlsx differ diff --git a/tests/test_files_api.py b/tests/test_files_api.py index 57dd1dff..e666ea1b 100644 --- a/tests/test_files_api.py +++ b/tests/test_files_api.py @@ -1,9 +1,8 @@ import io import json -from datetime import datetime from crc import session -from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel +from crc.models.file import FileModel, FileType, FileModelSchema from crc.models.workflow import WorkflowSpecModel from tests.base_test import BaseTest @@ -58,6 +57,52 @@ class TestFilesApi(BaseTest): file2 = FileModelSchema().load(json_data, session=session) self.assertEqual(file, file2) + def test_set_reference_file(self): + file_name = "irb_document_types.xls" + data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")} + rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + content_type='multipart/form-data') + self.assert_success(rv) + self.assertIsNotNone(rv.get_data()) + json_data = json.loads(rv.get_data(as_text=True)) + file = FileModelSchema().load(json_data, session=session) + self.assertEqual(FileType.xls, file.type) + self.assertTrue(file.is_reference) + self.assertEqual("application/vnd.ms-excel", file.content_type) + + def test_set_reference_file_bad_extension(self): + file_name = "irb_document_types.xls" + data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")} + rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + content_type='multipart/form-data') + self.assert_failure(rv, error_code="invalid_file_type") + + def test_get_reference_file(self): + file_name = "irb_document_types.xls" + data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")} + rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + content_type='multipart/form-data') + rv = self.app.get('/v1.0/reference_file/%s' % file_name) + self.assert_success(rv) + data_out = rv.get_data() + self.assertEqual(b"abcdef", data_out) + + def test_list_reference_files(self): + file_name = "irb_document_types.xls" + data = {'file': (io.BytesIO(b"abcdef"), file_name)} + rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True, + content_type='multipart/form-data') + + rv = self.app.get('/v1.0/reference_file', + follow_redirects=True, + content_type="application/json") + self.assert_success(rv) + json_data = json.loads(rv.get_data(as_text=True)) + self.assertEqual(1, len(json_data)) + file = FileModelSchema(many=True).load(json_data, session=session) + self.assertEqual(file_name, file[0].name) + self.assertTrue(file[0].is_reference) + def test_update_file_info(self): self.load_example_data() file: FileModel = session.query(FileModel).first() @@ -118,7 +163,6 @@ class TestFilesApi(BaseTest): file = FileModelSchema().load(json_data, session=session) self.assertEqual(1, file.latest_version) - def test_get_file(self): self.load_example_data() spec = session.query(WorkflowSpecModel).first() @@ -137,3 +181,4 @@ class TestFilesApi(BaseTest): rv = self.app.delete('/v1.0/file/%i' % file.id) rv = self.app.get('/v1.0/file/%i' % file.id) self.assertEqual(404, rv.status_code) +