Merge remote-tracking branch 'origin/feature/reference_files' into feature/workflow_spec_categories

2025-02-22 12:48:25 +00:00 · 2020-03-18 16:58:57 -04:00 · 2020-03-18 16:58:57 -04:00 · 9a26fc8e80
commit 9a26fc8e80
parent f0678b43d4 02be8ede75
8 changed files with 234 additions and 22 deletions
--- a/crc/api.yml
+++ b/crc/api.yml
@ -538,6 +538,67 @@ paths:
                format: binary
                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
  # /v1.0/workflow/0
  /reference_file:
    get:
      operationId: crc.api.file.get_reference_files
      summary: Provides a list of existing reference files that are available in the system.
      tags:
        - Files
      responses:
        '200':
          description: An array of file descriptions (not the file content)
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: "#/components/schemas/File"
  /reference_file/{name}:
    parameters:
      - name: name
        in: path
        required: true
        description: The special name of the reference file.
        schema:
          type: string
    get:
      operationId: crc.api.file.get_reference_file
      summary: Reference files are called by name rather than by id.
      tags:
        - Files
      responses:
        '200':
          description: Returns the actual file
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
    put:
      operationId: crc.api.file.set_reference_file
      summary: Update the contents of a named reference file.
      tags:
        - Files
      requestBody:
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
      responses:
        '200':
          description: Returns the actual file
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
  # /v1.0/workflow/0
  /workflow/{workflow_id}:
    parameters:
        - name: workflow_id
--- a/crc/api/file.py
+++ b/crc/api/file.py
@ -1,3 +1,4 @@
 import enum
 import io
 import os
 from datetime import datetime
@ -21,6 +22,10 @@ def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=No
    results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key)
    return FileModelSchema(many=True).dump(results)
 def get_reference_files():
    results = FileService.get_files(is_reference=True)
    return FileModelSchema(many=True).dump(results)
 def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
    all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key])
@ -43,6 +48,42 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non
    return FileModelSchema().dump(file_model)
 def get_reference_file(name):
    file_data = FileService.get_reference_file_data(name)
    return send_file(
        io.BytesIO(file_data.data),
        attachment_filename=file_data.file_model.name,
        mimetype=file_data.file_model.content_type,
        cache_timeout=-1  # Don't cache these files on the browser.
    )
 def set_reference_file(name):
    """Uses the file service to manage reference-files. They will be used in script tasks to compute values."""
    if 'file' not in connexion.request.files:
        raise ApiError('invalid_file',
                       'Expected a file named "file" in the multipart form request', status_code=400)
    file = connexion.request.files['file']
    name_extension = FileService.get_extension(name)
    file_extension = FileService.get_extension(file.filename)
    if name_extension != file_extension:
        raise ApiError('invalid_file_type',
                       "The file you uploaded has an extension '%s', but it should have an extension of '%s' " %
                       (file_extension, name_extension))
    file_models = FileService.get_files(name=name, is_reference=True)
    if len(file_models) == 0:
        file_model = FileService.add_reference_file(name, file.content_type, file.stream.read())
    else:
        file_model = file_models[0]
        FileService.update_file(file_models[0], file.stream.read(), file.content_type)
    return FileModelSchema().dump(file_model)
 def update_file_data(file_id):
    file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
    file = connexion.request.files['file']
--- a/crc/models/api_models.py
+++ b/crc/models/api_models.py
@ -107,6 +107,7 @@ class WorkflowApi(object):
        self.is_latest_spec = is_latest_spec
        self.is_active = is_active
 class WorkflowApiSchema(ma.Schema):
    class Meta:
        model = WorkflowApi
--- a/crc/models/file.py
+++ b/crc/models/file.py
@ -66,6 +66,12 @@ class FileDataModel(db.Model):
 class FileModel(db.Model):
    """A file model defines one of the following increasingly specific types:
    * A Reference file.  Which just has a name and a reference flag set to true. These are global, and available everywhere.
    * A Workflow Specification (such as BPMN or DMN model or a template)
    * A Script generated file in a workflow. Which is specific to a study, workflow and task.
    * An Uploaded file in a workflow. specific to a study, workflow, task, AND a field value.
    """
    __tablename__ = 'file'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String)
@ -73,6 +79,8 @@ class FileModel(db.Model):
    primary = db.Column(db.Boolean)
    is_status = db.Column(db.Boolean)
    content_type = db.Column(db.String)
    is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
    primary = db.Column(db.Boolean) # Is this the primary BPMN in a workflow?
    workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
    workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
    study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True)
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@ -1,14 +1,13 @@
 import hashlib
 import os
 from datetime import datetime
 from uuid import UUID
 from xml.etree import ElementTree
 from crc import session
-from crc.api.common import ApiErrorSchema, ApiError
+from crc.api.common import ApiError
-from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES
+from crc.models.file import FileType, FileDataModel, FileModel
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.workflow_processor import WorkflowProcessor
 import hashlib
 class FileService(object):
@ -55,25 +54,39 @@ class FileService(object):
        )
        return FileService.update_file(file_model, binary_data, content_type)
    @staticmethod
    def add_reference_file(name, content_type, binary_data):
        """Create a file with the given name, but not associated with a spec or workflow.
           Only one file with the given reference name can exist."""
        file_model = FileModel(
            name=name,
            is_reference=True
        )
        return FileService.update_file(file_model, binary_data, content_type)
    @staticmethod
    def get_extension(file_name):
        basename, file_extension = os.path.splitext(file_name)
        return file_extension.lower().strip()[1:]
    @staticmethod
    def update_file(file_model, binary_data, content_type):
-        file_data_model = session.query(FileDataModel).\
+        file_data_model = session.query(FileDataModel). \
            filter_by(file_model_id=file_model.id,
                      version=file_model.latest_version
                      ).with_for_update().first()
        md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
-        if(file_data_model is not None and md5_checksum == file_data_model.md5_hash):
+        if (file_data_model is not None and md5_checksum == file_data_model.md5_hash):
            # This file does not need to be updated, it's the same file.
            return file_model
        # Verify the extension
-        basename, file_extension = os.path.splitext(file_model.name)
+        file_extension = FileService.get_extension(file_model.name)
        file_extension = file_extension.lower().strip()[1:]
        if file_extension not in FileType._member_names_:
-            return ApiErrorSchema().dump(ApiError('unknown_extension',
+            raise ApiError('unknown_extension',
-                                                  'The file you provided does not have an accepted extension:' +
+                           'The file you provided does not have an accepted extension:' +
-                                                  file_extension)), 404
+                           file_extension, status_code=404)
        else:
            file_model.type = FileType[file_extension]
            file_model.content_type = content_type
@ -93,8 +106,10 @@ class FileService(object):
        return file_model
    @staticmethod
-    def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
+    def get_files(workflow_spec_id=None,
-        query = session.query(FileModel)
+                  study_id=None, workflow_id=None, task_id=None, form_field_key=None,
                  name=None, is_reference=False):
        query = session.query(FileModel).filter_by(is_reference=is_reference)
        if workflow_spec_id:
            query = query.filter_by(workflow_spec_id=workflow_spec_id)
        if study_id:
@ -105,15 +120,28 @@ class FileService(object):
            query = query.filter_by(task_id=str(task_id))
        if form_field_key:
            query = query.filter_by(form_field_key=form_field_key)
        if name:
            query = query.filter_by(name=form_field_key)
        results = query.all()
        return results
    @staticmethod
-    def get_file_data(file_id):
+    def get_file_data(file_id, file_model=None):
-        """Returns the file_data that is associated with the file model id"""
+        """Returns the file_data that is associated with the file model id, if an actual file_model
-        file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
+        is provided, uses that rather than looking it up again."""
-        return session.query(FileDataModel)\
+        if file_model is None:
-            .filter(FileDataModel.file_model_id == file_id)\
+            file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
-            .filter(FileDataModel.version == file_model.latest_version)\
+        return session.query(FileDataModel) \
            .filter(FileDataModel.file_model_id == file_id) \
            .filter(FileDataModel.version == file_model.latest_version) \
            .first()
    @staticmethod
    def get_reference_file_data(file_name):
        file_model = session.query(FileModel). \
            filter(FileModel.is_reference == True). \
            filter(FileModel.name == file_name).first()
        if not file_model:
            raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
        return FileService.get_file_data(file_model.id, file_model)
--- a/migrations/versions/0c8a2f8db28c_.py
+++ b/migrations/versions/0c8a2f8db28c_.py
@ -0,0 +1,28 @@
 """empty message
 Revision ID: 0c8a2f8db28c
 Revises: 8856126b6658
 Create Date: 2020-03-13 14:05:46.983484
 """
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = '0c8a2f8db28c'
 down_revision = '8856126b6658'
 branch_labels = None
 depends_on = None
 def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column('file', sa.Column('is_reference', sa.Boolean(), nullable=False))
    # ### end Alembic commands ###
 def downgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_column('file', 'is_reference')
    # ### end Alembic commands ###
--- a/tests/data/reference/irb_documents.xlsx
+++ b/tests/data/reference/irb_documents.xlsx
--- a/tests/test_files_api.py
+++ b/tests/test_files_api.py
@ -1,9 +1,8 @@
 import io
 import json
 from datetime import datetime
 from crc import session
-from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel
+from crc.models.file import FileModel, FileType, FileModelSchema
 from crc.models.workflow import WorkflowSpecModel
 from tests.base_test import BaseTest
@ -58,6 +57,52 @@ class TestFilesApi(BaseTest):
        file2 = FileModelSchema().load(json_data, session=session)
        self.assertEqual(file, file2)
    def test_set_reference_file(self):
        file_name = "irb_document_types.xls"
        data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")}
        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
                          content_type='multipart/form-data')
        self.assert_success(rv)
        self.assertIsNotNone(rv.get_data())
        json_data = json.loads(rv.get_data(as_text=True))
        file = FileModelSchema().load(json_data, session=session)
        self.assertEqual(FileType.xls, file.type)
        self.assertTrue(file.is_reference)
        self.assertEqual("application/vnd.ms-excel", file.content_type)
    def test_set_reference_file_bad_extension(self):
        file_name = "irb_document_types.xls"
        data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")}
        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
                          content_type='multipart/form-data')
        self.assert_failure(rv, error_code="invalid_file_type")
    def test_get_reference_file(self):
        file_name = "irb_document_types.xls"
        data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")}
        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
                          content_type='multipart/form-data')
        rv = self.app.get('/v1.0/reference_file/%s' % file_name)
        self.assert_success(rv)
        data_out = rv.get_data()
        self.assertEqual(b"abcdef", data_out)
    def test_list_reference_files(self):
        file_name = "irb_document_types.xls"
        data = {'file': (io.BytesIO(b"abcdef"), file_name)}
        rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
                          content_type='multipart/form-data')
        rv = self.app.get('/v1.0/reference_file',
                          follow_redirects=True,
                          content_type="application/json")
        self.assert_success(rv)
        json_data = json.loads(rv.get_data(as_text=True))
        self.assertEqual(1, len(json_data))
        file = FileModelSchema(many=True).load(json_data, session=session)
        self.assertEqual(file_name, file[0].name)
        self.assertTrue(file[0].is_reference)
    def test_update_file_info(self):
        self.load_example_data()
        file: FileModel = session.query(FileModel).first()
@ -118,7 +163,6 @@ class TestFilesApi(BaseTest):
        file = FileModelSchema().load(json_data, session=session)
        self.assertEqual(1, file.latest_version)
    def test_get_file(self):
        self.load_example_data()
        spec = session.query(WorkflowSpecModel).first()
@ -137,3 +181,4 @@ class TestFilesApi(BaseTest):
        rv = self.app.delete('/v1.0/file/%i' % file.id)
        rv = self.app.get('/v1.0/file/%i' % file.id)
        self.assertEqual(404, rv.status_code)