Add the ability to upload and request general reference files by name. These will be used across workflows and will frequently contain lookup tables that can be referenced by various script tasks.

2025-02-20 11:48:16 +00:00 · 2020-03-13 15:03:57 -04:00 · 2020-03-13 15:03:57 -04:00 · 779674ab60
commit 779674ab60
parent 05b39df745
8 changed files with 234 additions and 23 deletions
--- a/crc/api.yml
+++ b/crc/api.yml
@ -458,6 +458,67 @@ paths:
                format: binary
                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
  # /v1.0/workflow/0
+  /reference_file:
+    get:
+      operationId: crc.api.file.get_reference_files
+      summary: Provides a list of existing reference files that are available in the system.
+      tags:
+        - Files
+      responses:
+        '200':
+          description: An array of file descriptions (not the file content)
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/File"
+  /reference_file/{name}:
+    parameters:
+      - name: name
+        in: path
+        required: true
+        description: The special name of the reference file.
+        schema:
+          type: string
+    get:
+      operationId: crc.api.file.get_reference_file
+      summary: Reference files are called by name rather than by id.
+      tags:
+        - Files
+      responses:
+        '200':
+          description: Returns the actual file
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
+    put:
+      operationId: crc.api.file.set_reference_file
+      summary: Update the contents of a named reference file.
+      tags:
+        - Files
+      requestBody:
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              properties:
+                file:
+                  type: string
+                  format: binary
+      responses:
+        '200':
+          description: Returns the actual file
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
+  # /v1.0/workflow/0
  /workflow/{workflow_id}:
    parameters:
        - name: workflow_id
--- a/crc/api/file.py
+++ b/crc/api/file.py
@ -1,3 +1,4 @@
+import enum
 import io
 import os
 from datetime import datetime
@ -21,6 +22,10 @@ def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=No
    results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key)
    return FileModelSchema(many=True).dump(results)

+def get_reference_files():
+    results = FileService.get_files(is_reference=True)
+    return FileModelSchema(many=True).dump(results)
+

 def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
    all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key])
@ -43,6 +48,42 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non
    return FileModelSchema().dump(file_model)


+def get_reference_file(name):
+    file_data = FileService.get_reference_file_data(name)
+    return send_file(
+        io.BytesIO(file_data.data),
+        attachment_filename=file_data.file_model.name,
+        mimetype=file_data.file_model.content_type,
+        cache_timeout=-1  # Don't cache these files on the browser.
+    )
+
+
+def set_reference_file(name):
+    """Uses the file service to manage reference-files. They will be used in script tasks to compute values."""
+    if 'file' not in connexion.request.files:
+        raise ApiError('invalid_file',
+                       'Expected a file named "file" in the multipart form request', status_code=400)
+
+    file = connexion.request.files['file']
+
+    name_extension = FileService.get_extension(name)
+    file_extension = FileService.get_extension(file.filename)
+    if name_extension != file_extension:
+        raise ApiError('invalid_file_type',
+                       "The file you uploaded has an extension '%s', but it should have an extension of '%s' " %
+                       (file_extension, name_extension))
+
+
+    file_models = FileService.get_files(name=name, is_reference=True)
+    if len(file_models) == 0:
+        file_model = FileService.add_reference_file(name, file.content_type, file.stream.read())
+    else:
+        file_model = file_models[0]
+        FileService.update_file(file_models[0], file.stream.read(), file.content_type)
+
+    return FileModelSchema().dump(file_model)
+
+
 def update_file_data(file_id):
    file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
    file = connexion.request.files['file']
--- a/crc/models/api_models.py
+++ b/crc/models/api_models.py
@ -105,6 +105,7 @@ class WorkflowApi(object):
        self.spec_version = spec_version
        self.is_latest_spec = is_latest_spec

+
 class WorkflowApiSchema(ma.Schema):
    class Meta:
        model = WorkflowApi
--- a/crc/models/file.py
+++ b/crc/models/file.py
@ -66,12 +66,19 @@ class FileDataModel(db.Model):


 class FileModel(db.Model):
+    """A file model defines one of the following increasingly specific types:
+    * A Reference file.  Which just has a name and a reference flag set to true. These are global, and available everywhere.
+    * A Workflow Specification (such as BPMN or DMN model or a template)
+    * A Script generated file in a workflow. Which is specific to a study, workflow and task.
+    * An Uploaded file in a workflow. specific to a study, workflow, task, AND a field value.
+    """
    __tablename__ = 'file'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String)
    type = db.Column(db.Enum(FileType))
-    primary = db.Column(db.Boolean)
    content_type = db.Column(db.String)
+    is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
+    primary = db.Column(db.Boolean) # Is this the primary BPMN in a workflow?
    workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
    workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
    study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True)
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@ -1,14 +1,13 @@
+import hashlib
 import os
-from datetime import datetime
 from uuid import UUID
 from xml.etree import ElementTree

 from crc import session
-from crc.api.common import ApiErrorSchema, ApiError
-from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES
+from crc.api.common import ApiError
+from crc.models.file import FileType, FileDataModel, FileModel
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.workflow_processor import WorkflowProcessor
-import hashlib


 class FileService(object):
@ -54,25 +53,39 @@ class FileService(object):
        )
        return FileService.update_file(file_model, binary_data, content_type)

+    @staticmethod
+    def add_reference_file(name, content_type, binary_data):
+        """Create a file with the given name, but not associated with a spec or workflow.
+           Only one file with the given reference name can exist."""
+        file_model = FileModel(
+            name=name,
+            is_reference=True
+        )
+        return FileService.update_file(file_model, binary_data, content_type)
+
+    @staticmethod
+    def get_extension(file_name):
+        basename, file_extension = os.path.splitext(file_name)
+        return file_extension.lower().strip()[1:]
+
    @staticmethod
    def update_file(file_model, binary_data, content_type):

-        file_data_model = session.query(FileDataModel).\
+        file_data_model = session.query(FileDataModel). \
            filter_by(file_model_id=file_model.id,
                      version=file_model.latest_version
                      ).with_for_update().first()
        md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
-        if(file_data_model is not None and md5_checksum == file_data_model.md5_hash):
+        if (file_data_model is not None and md5_checksum == file_data_model.md5_hash):
            # This file does not need to be updated, it's the same file.
            return file_model

        # Verify the extension
-        basename, file_extension = os.path.splitext(file_model.name)
-        file_extension = file_extension.lower().strip()[1:]
+        file_extension = FileService.get_extension(file_model.name)
        if file_extension not in FileType._member_names_:
-            return ApiErrorSchema().dump(ApiError('unknown_extension',
-                                                  'The file you provided does not have an accepted extension:' +
-                                                  file_extension)), 404
+            raise ApiError('unknown_extension',
+                           'The file you provided does not have an accepted extension:' +
+                           file_extension, status_code=404)
        else:
            file_model.type = FileType[file_extension]
            file_model.content_type = content_type
@ -92,8 +105,10 @@ class FileService(object):
        return file_model

    @staticmethod
-    def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
-        query = session.query(FileModel)
+    def get_files(workflow_spec_id=None,
+                  study_id=None, workflow_id=None, task_id=None, form_field_key=None,
+                  name=None, is_reference=False):
+        query = session.query(FileModel).filter_by(is_reference=is_reference)
        if workflow_spec_id:
            query = query.filter_by(workflow_spec_id=workflow_spec_id)
        if study_id:
@ -104,15 +119,28 @@ class FileService(object):
            query = query.filter_by(task_id=str(task_id))
        if form_field_key:
            query = query.filter_by(form_field_key=form_field_key)
+        if name:
+            query = query.filter_by(name=form_field_key)

        results = query.all()
        return results

    @staticmethod
-    def get_file_data(file_id):
-        """Returns the file_data that is associated with the file model id"""
-        file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
-        return session.query(FileDataModel)\
-            .filter(FileDataModel.file_model_id == file_id)\
-            .filter(FileDataModel.version == file_model.latest_version)\
+    def get_file_data(file_id, file_model=None):
+        """Returns the file_data that is associated with the file model id, if an actual file_model
+        is provided, uses that rather than looking it up again."""
+        if file_model is None:
+            file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
+        return session.query(FileDataModel) \
+            .filter(FileDataModel.file_model_id == file_id) \
+            .filter(FileDataModel.version == file_model.latest_version) \
            .first()
+
+    @staticmethod
+    def get_reference_file_data(file_name):
+        file_model = session.query(FileModel). \
+            filter(FileModel.is_reference == True). \
+            filter(FileModel.name == file_name).first()
+        if not file_model:
+            raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
+        return FileService.get_file_data(file_model.id, file_model)
--- a/migrations/versions/0c8a2f8db28c_.py
+++ b/migrations/versions/0c8a2f8db28c_.py
@ -0,0 +1,28 @@
+"""empty message
+
+Revision ID: 0c8a2f8db28c
+Revises: 8856126b6658
+Create Date: 2020-03-13 14:05:46.983484
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '0c8a2f8db28c'
+down_revision = '8856126b6658'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('file', sa.Column('is_reference', sa.Boolean(), nullable=False))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('file', 'is_reference')
+    # ### end Alembic commands ###
--- a/tests/data/reference/irb_documents.xlsx
+++ b/tests/data/reference/irb_documents.xlsx
--- a/tests/test_files_api.py
+++ b/tests/test_files_api.py
@ -1,9 +1,8 @@
 import io
 import json
-from datetime import datetime

 from crc import session
-from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel
+from crc.models.file import FileModel, FileType, FileModelSchema
 from crc.models.workflow import WorkflowSpecModel
 from tests.base_test import BaseTest

@ -58,6 +57,52 @@ class TestFilesApi(BaseTest):
        file2 = FileModelSchema().load(json_data, session=session)
        self.assertEqual(file, file2)

+    def test_set_reference_file(self):
+        file_name = "irb_document_types.xls"
+        data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")}
+        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+                          content_type='multipart/form-data')
+        self.assert_success(rv)
+        self.assertIsNotNone(rv.get_data())
+        json_data = json.loads(rv.get_data(as_text=True))
+        file = FileModelSchema().load(json_data, session=session)
+        self.assertEqual(FileType.xls, file.type)
+        self.assertTrue(file.is_reference)
+        self.assertEqual("application/vnd.ms-excel", file.content_type)
+
+    def test_set_reference_file_bad_extension(self):
+        file_name = "irb_document_types.xls"
+        data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")}
+        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+                          content_type='multipart/form-data')
+        self.assert_failure(rv, error_code="invalid_file_type")
+
+    def test_get_reference_file(self):
+        file_name = "irb_document_types.xls"
+        data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")}
+        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+                          content_type='multipart/form-data')
+        rv = self.app.get('/v1.0/reference_file/%s' % file_name)
+        self.assert_success(rv)
+        data_out = rv.get_data()
+        self.assertEqual(b"abcdef", data_out)
+
+    def test_list_reference_files(self):
+        file_name = "irb_document_types.xls"
+        data = {'file': (io.BytesIO(b"abcdef"), file_name)}
+        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+                          content_type='multipart/form-data')
+
+        rv = self.app.get('/v1.0/reference_file',
+                          follow_redirects=True,
+                          content_type="application/json")
+        self.assert_success(rv)
+        json_data = json.loads(rv.get_data(as_text=True))
+        self.assertEqual(1, len(json_data))
+        file = FileModelSchema(many=True).load(json_data, session=session)
+        self.assertEqual(file_name, file[0].name)
+        self.assertTrue(file[0].is_reference)
+
    def test_update_file_info(self):
        self.load_example_data()
        file: FileModel = session.query(FileModel).first()
@ -118,7 +163,6 @@ class TestFilesApi(BaseTest):
        file = FileModelSchema().load(json_data, session=session)
        self.assertEqual(1, file.latest_version)

-
    def test_get_file(self):
        self.load_example_data()
        spec = session.query(WorkflowSpecModel).first()
@ -137,3 +181,4 @@ class TestFilesApi(BaseTest):
        rv = self.app.delete('/v1.0/file/%i' % file.id)
        rv = self.app.get('/v1.0/file/%i' % file.id)
        self.assertEqual(404, rv.status_code)
+