diff --git a/crc/api.yml b/crc/api.yml
index 980c4f23..f426c536 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -538,6 +538,67 @@ paths:
format: binary
example: ''
# /v1.0/workflow/0
+ /reference_file:
+ get:
+ operationId: crc.api.file.get_reference_files
+ summary: Provides a list of existing reference files that are available in the system.
+ tags:
+ - Files
+ responses:
+ '200':
+ description: An array of file descriptions (not the file content)
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: "#/components/schemas/File"
+ /reference_file/{name}:
+ parameters:
+ - name: name
+ in: path
+ required: true
+ description: The special name of the reference file.
+ schema:
+ type: string
+ get:
+ operationId: crc.api.file.get_reference_file
+ summary: Reference files are called by name rather than by id.
+ tags:
+ - Files
+ responses:
+ '200':
+ description: Returns the actual file
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ example: ''
+ put:
+ operationId: crc.api.file.set_reference_file
+ summary: Update the contents of a named reference file.
+ tags:
+ - Files
+ requestBody:
+ content:
+ multipart/form-data:
+ schema:
+ type: object
+ properties:
+ file:
+ type: string
+ format: binary
+ responses:
+ '200':
+ description: Returns the actual file
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ example: ''
+ # /v1.0/workflow/0
/workflow/{workflow_id}:
parameters:
- name: workflow_id
diff --git a/crc/api/file.py b/crc/api/file.py
index 5e4b7c48..ee2a2a82 100644
--- a/crc/api/file.py
+++ b/crc/api/file.py
@@ -1,3 +1,4 @@
+import enum
import io
import os
from datetime import datetime
@@ -21,6 +22,10 @@ def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=No
results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key)
return FileModelSchema(many=True).dump(results)
+def get_reference_files():
+ results = FileService.get_files(is_reference=True)
+ return FileModelSchema(many=True).dump(results)
+
def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key])
@@ -43,6 +48,42 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non
return FileModelSchema().dump(file_model)
+def get_reference_file(name):
+ file_data = FileService.get_reference_file_data(name)
+ return send_file(
+ io.BytesIO(file_data.data),
+ attachment_filename=file_data.file_model.name,
+ mimetype=file_data.file_model.content_type,
+ cache_timeout=-1 # Don't cache these files on the browser.
+ )
+
+
+def set_reference_file(name):
+ """Uses the file service to manage reference-files. They will be used in script tasks to compute values."""
+ if 'file' not in connexion.request.files:
+ raise ApiError('invalid_file',
+ 'Expected a file named "file" in the multipart form request', status_code=400)
+
+ file = connexion.request.files['file']
+
+ name_extension = FileService.get_extension(name)
+ file_extension = FileService.get_extension(file.filename)
+ if name_extension != file_extension:
+ raise ApiError('invalid_file_type',
+ "The file you uploaded has an extension '%s', but it should have an extension of '%s' " %
+ (file_extension, name_extension))
+
+
+ file_models = FileService.get_files(name=name, is_reference=True)
+ if len(file_models) == 0:
+ file_model = FileService.add_reference_file(name, file.content_type, file.stream.read())
+ else:
+ file_model = file_models[0]
+ FileService.update_file(file_models[0], file.stream.read(), file.content_type)
+
+ return FileModelSchema().dump(file_model)
+
+
def update_file_data(file_id):
file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
file = connexion.request.files['file']
diff --git a/crc/models/api_models.py b/crc/models/api_models.py
index 6c4c7b2d..aa8fd0fe 100644
--- a/crc/models/api_models.py
+++ b/crc/models/api_models.py
@@ -107,6 +107,7 @@ class WorkflowApi(object):
self.is_latest_spec = is_latest_spec
self.is_active = is_active
+
class WorkflowApiSchema(ma.Schema):
class Meta:
model = WorkflowApi
diff --git a/crc/models/file.py b/crc/models/file.py
index a98ca68e..77e895a6 100644
--- a/crc/models/file.py
+++ b/crc/models/file.py
@@ -66,6 +66,12 @@ class FileDataModel(db.Model):
class FileModel(db.Model):
+ """A file model defines one of the following increasingly specific types:
+ * A Reference file. Which just has a name and a reference flag set to true. These are global, and available everywhere.
+ * A Workflow Specification (such as BPMN or DMN model or a template)
+ * A Script generated file in a workflow. Which is specific to a study, workflow and task.
+ * An Uploaded file in a workflow. specific to a study, workflow, task, AND a field value.
+ """
__tablename__ = 'file'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
@@ -73,6 +79,8 @@ class FileModel(db.Model):
primary = db.Column(db.Boolean)
is_status = db.Column(db.Boolean)
content_type = db.Column(db.String)
+ is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
+ primary = db.Column(db.Boolean) # Is this the primary BPMN in a workflow?
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True)
diff --git a/crc/services/file_service.py b/crc/services/file_service.py
index aa0182d1..3042c12e 100644
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@@ -1,14 +1,13 @@
+import hashlib
import os
-from datetime import datetime
from uuid import UUID
from xml.etree import ElementTree
from crc import session
-from crc.api.common import ApiErrorSchema, ApiError
-from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES
+from crc.api.common import ApiError
+from crc.models.file import FileType, FileDataModel, FileModel
from crc.models.workflow import WorkflowSpecModel
from crc.services.workflow_processor import WorkflowProcessor
-import hashlib
class FileService(object):
@@ -55,25 +54,39 @@ class FileService(object):
)
return FileService.update_file(file_model, binary_data, content_type)
+ @staticmethod
+ def add_reference_file(name, content_type, binary_data):
+ """Create a file with the given name, but not associated with a spec or workflow.
+ Only one file with the given reference name can exist."""
+ file_model = FileModel(
+ name=name,
+ is_reference=True
+ )
+ return FileService.update_file(file_model, binary_data, content_type)
+
+ @staticmethod
+ def get_extension(file_name):
+ basename, file_extension = os.path.splitext(file_name)
+ return file_extension.lower().strip()[1:]
+
@staticmethod
def update_file(file_model, binary_data, content_type):
- file_data_model = session.query(FileDataModel).\
+ file_data_model = session.query(FileDataModel). \
filter_by(file_model_id=file_model.id,
version=file_model.latest_version
).with_for_update().first()
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
- if(file_data_model is not None and md5_checksum == file_data_model.md5_hash):
+ if (file_data_model is not None and md5_checksum == file_data_model.md5_hash):
# This file does not need to be updated, it's the same file.
return file_model
# Verify the extension
- basename, file_extension = os.path.splitext(file_model.name)
- file_extension = file_extension.lower().strip()[1:]
+ file_extension = FileService.get_extension(file_model.name)
if file_extension not in FileType._member_names_:
- return ApiErrorSchema().dump(ApiError('unknown_extension',
- 'The file you provided does not have an accepted extension:' +
- file_extension)), 404
+ raise ApiError('unknown_extension',
+ 'The file you provided does not have an accepted extension:' +
+ file_extension, status_code=404)
else:
file_model.type = FileType[file_extension]
file_model.content_type = content_type
@@ -93,8 +106,10 @@ class FileService(object):
return file_model
@staticmethod
- def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
- query = session.query(FileModel)
+ def get_files(workflow_spec_id=None,
+ study_id=None, workflow_id=None, task_id=None, form_field_key=None,
+ name=None, is_reference=False):
+ query = session.query(FileModel).filter_by(is_reference=is_reference)
if workflow_spec_id:
query = query.filter_by(workflow_spec_id=workflow_spec_id)
if study_id:
@@ -105,15 +120,28 @@ class FileService(object):
query = query.filter_by(task_id=str(task_id))
if form_field_key:
query = query.filter_by(form_field_key=form_field_key)
+ if name:
+ query = query.filter_by(name=form_field_key)
results = query.all()
return results
@staticmethod
- def get_file_data(file_id):
- """Returns the file_data that is associated with the file model id"""
- file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
- return session.query(FileDataModel)\
- .filter(FileDataModel.file_model_id == file_id)\
- .filter(FileDataModel.version == file_model.latest_version)\
+ def get_file_data(file_id, file_model=None):
+ """Returns the file_data that is associated with the file model id, if an actual file_model
+ is provided, uses that rather than looking it up again."""
+ if file_model is None:
+ file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
+ return session.query(FileDataModel) \
+ .filter(FileDataModel.file_model_id == file_id) \
+ .filter(FileDataModel.version == file_model.latest_version) \
.first()
+
+ @staticmethod
+ def get_reference_file_data(file_name):
+ file_model = session.query(FileModel). \
+ filter(FileModel.is_reference == True). \
+ filter(FileModel.name == file_name).first()
+ if not file_model:
+ raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
+ return FileService.get_file_data(file_model.id, file_model)
diff --git a/migrations/versions/0c8a2f8db28c_.py b/migrations/versions/0c8a2f8db28c_.py
new file mode 100644
index 00000000..6a172260
--- /dev/null
+++ b/migrations/versions/0c8a2f8db28c_.py
@@ -0,0 +1,28 @@
+"""empty message
+
+Revision ID: 0c8a2f8db28c
+Revises: 8856126b6658
+Create Date: 2020-03-13 14:05:46.983484
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '0c8a2f8db28c'
+down_revision = '8856126b6658'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('file', sa.Column('is_reference', sa.Boolean(), nullable=False))
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column('file', 'is_reference')
+ # ### end Alembic commands ###
diff --git a/tests/data/reference/irb_documents.xlsx b/tests/data/reference/irb_documents.xlsx
new file mode 100644
index 00000000..21ecc9da
Binary files /dev/null and b/tests/data/reference/irb_documents.xlsx differ
diff --git a/tests/test_files_api.py b/tests/test_files_api.py
index 57dd1dff..e666ea1b 100644
--- a/tests/test_files_api.py
+++ b/tests/test_files_api.py
@@ -1,9 +1,8 @@
import io
import json
-from datetime import datetime
from crc import session
-from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel
+from crc.models.file import FileModel, FileType, FileModelSchema
from crc.models.workflow import WorkflowSpecModel
from tests.base_test import BaseTest
@@ -58,6 +57,52 @@ class TestFilesApi(BaseTest):
file2 = FileModelSchema().load(json_data, session=session)
self.assertEqual(file, file2)
+ def test_set_reference_file(self):
+ file_name = "irb_document_types.xls"
+ data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")}
+ rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+ content_type='multipart/form-data')
+ self.assert_success(rv)
+ self.assertIsNotNone(rv.get_data())
+ json_data = json.loads(rv.get_data(as_text=True))
+ file = FileModelSchema().load(json_data, session=session)
+ self.assertEqual(FileType.xls, file.type)
+ self.assertTrue(file.is_reference)
+ self.assertEqual("application/vnd.ms-excel", file.content_type)
+
+ def test_set_reference_file_bad_extension(self):
+ file_name = "irb_document_types.xls"
+ data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")}
+ rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+ content_type='multipart/form-data')
+ self.assert_failure(rv, error_code="invalid_file_type")
+
+ def test_get_reference_file(self):
+ file_name = "irb_document_types.xls"
+ data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")}
+ rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+ content_type='multipart/form-data')
+ rv = self.app.get('/v1.0/reference_file/%s' % file_name)
+ self.assert_success(rv)
+ data_out = rv.get_data()
+ self.assertEqual(b"abcdef", data_out)
+
+ def test_list_reference_files(self):
+ file_name = "irb_document_types.xls"
+ data = {'file': (io.BytesIO(b"abcdef"), file_name)}
+ rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
+ content_type='multipart/form-data')
+
+ rv = self.app.get('/v1.0/reference_file',
+ follow_redirects=True,
+ content_type="application/json")
+ self.assert_success(rv)
+ json_data = json.loads(rv.get_data(as_text=True))
+ self.assertEqual(1, len(json_data))
+ file = FileModelSchema(many=True).load(json_data, session=session)
+ self.assertEqual(file_name, file[0].name)
+ self.assertTrue(file[0].is_reference)
+
def test_update_file_info(self):
self.load_example_data()
file: FileModel = session.query(FileModel).first()
@@ -118,7 +163,6 @@ class TestFilesApi(BaseTest):
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(1, file.latest_version)
-
def test_get_file(self):
self.load_example_data()
spec = session.query(WorkflowSpecModel).first()
@@ -137,3 +181,4 @@ class TestFilesApi(BaseTest):
rv = self.app.delete('/v1.0/file/%i' % file.id)
rv = self.app.get('/v1.0/file/%i' % file.id)
self.assertEqual(404, rv.status_code)
+